From efc947fb3eea38eeae257980e663de806f1e19d0 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 4 Oct 2021 18:37:05 +0000 Subject: [PATCH 0001/2552] [Bilibili] Add subtitle converter (#1144) Closes #1015 Based on https://github.com/y2361547758/bcc2ass Authored by: u-spec-png --- yt_dlp/extractor/bilibili.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0a81452c3..daa224b17 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -22,6 +22,7 @@ from ..utils import ( parse_iso8601, try_get, smuggle_url, + srt_subtitles_timecode, str_or_none, str_to_int, strip_jsonp, @@ -623,7 +624,7 @@ class BiliBiliSearchIE(SearchInfoExtractor): while True: pageNumber += 1 # FIXME - api_url = "https://api.bilibili.com/x/web-interface/search/type?context=&page=%s&order=pubdate&keyword=%s&duration=0&tids_2=&__refresh__=true&search_type=video&tids=0&highlight=1" % (pageNumber, query) + api_url = 'https://api.bilibili.com/x/web-interface/search/type?context=&page=%s&order=pubdate&keyword=%s&duration=0&tids_2=&__refresh__=true&search_type=video&tids=0&highlight=1' % (pageNumber, query) json_str = self._download_webpage( api_url, "None", query={"Search_key": query}, note='Extracting results from page %s' % pageNumber) @@ -783,6 +784,12 @@ class BiliIntlBaseIE(InfoExtractor): def _call_api(self, type, endpoint, id): return self._download_json(self._API_URL.format(type, endpoint), id)['data'] + def json2srt(self, json): + data = '\n\n'.join( + f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}' + for i, line in enumerate(json['body'])) + return data + def _get_subtitles(self, type, ep_id): sub_json = self._call_api(type, f'/m/subtitle?ep_id={ep_id}&platform=web', ep_id) subtitles = {} @@ -790,8 +797,13 @@ class BiliIntlBaseIE(InfoExtractor): sub_url = sub.get('url') if not sub_url: continue + sub_data = self._download_json(sub_url, ep_id, fatal=False) + if not sub_data: + continue + sub_data = self._parse_json(sub_data) subtitles.setdefault(sub.get('key', 'en'), []).append({ - 'url': sub_url, + 'ext': 'srt', + 'data': self.json2srt(sub_data) }) return subtitles From ebf2fb4d619b7d65b40ae6bacc79bd9f3d3ceab8 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 4 Oct 2021 18:42:24 +0000 Subject: [PATCH 0002/2552] [Vupload] Add extractor (#1146) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29877 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/vupload.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/vupload.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 8e8d269ce..b90110c7f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1713,6 +1713,7 @@ from .vtm import VTMIE from .medialaan import MedialaanIE from .vube import VubeIE from .vuclip import VuClipIE +from .vupload import VuploadIE from .vvvvid import ( VVVVIDIE, VVVVIDShowIE, diff --git a/yt_dlp/extractor/vupload.py b/yt_dlp/extractor/vupload.py new file mode 100644 index 000000000..9846ababc --- /dev/null +++ b/yt_dlp/extractor/vupload.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_filesize, + extract_attributes, + int_or_none, +) + + +class VuploadIE(InfoExtractor): + _VALID_URL = r'https://vupload\.com/v/(?P[a-z0-9]+)' + _TESTS = [{ + 'url': 'https://vupload.com/v/u28d0pl2tphy', + 'md5': '9b42a4a193cca64d80248e58527d83c8', + 'info_dict': { + 'id': 'u28d0pl2tphy', + 'ext': 'mp4', + 'description': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb', + 'title': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + video_e = self._html_search_regex(r'\|([a-z0-9]{60})\|', webpage, 'video') + video_url = f'https://wurize.megaupload.to/{video_e}/v.mp4' + duration = parse_duration(self._html_search_regex( + r'\s*([\d:]+)\s*', webpage, 'duration', fatal=False)) + filesize_approx = parse_filesize(self._html_search_regex( + r'\s*([^<]+)\s*', webpage, 'filesize', fatal=False)) + extra_video_info = extract_attributes(self._html_search_regex( + r'(]+>)', webpage, 'video_info', fatal=False)) + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'duration': duration, + 'filesize_approx': filesize_approx, + 'width': int_or_none(extra_video_info.get('width')), + 'height': int_or_none(extra_video_info.get('height')), + 'format_id': extra_video_info.get('height', '') + 'p', + 'title': title, + 'description': description, + } From 3001a84dca08612e72aa2116941868636e800f32 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 4 Oct 2021 18:58:02 +0000 Subject: [PATCH 0003/2552] [Newgrounds] Add age_limit and fix duration (#1156) Authored by: u-spec-png --- yt_dlp/extractor/newgrounds.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 3c49008a0..bbbd9e8ee 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -42,6 +42,7 @@ class NewgroundsIE(InfoExtractor): 'timestamp': 955064100, 'upload_date': '20000406', 'description': 'Scrotum plays "catch."', + 'age_limit': 17, }, }, { # source format unavailable, additional mp4 formats @@ -54,6 +55,7 @@ class NewgroundsIE(InfoExtractor): 'timestamp': 1487965140, 'upload_date': '20170224', 'description': 'ZTV News Episode 8 (February 2017)', + 'age_limit': 17, }, 'params': { 'skip_download': True, @@ -69,6 +71,7 @@ class NewgroundsIE(InfoExtractor): 'timestamp': 1140663240, 'upload_date': '20060223', 'description': 'Metal Gear is awesome is so is this movie.', + 'age_limit': 13, } }, { 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash', @@ -81,8 +84,15 @@ class NewgroundsIE(InfoExtractor): 'uploader': 'Egoraptor', 'upload_date': '20060223', 'timestamp': 1140663240, + 'age_limit': 13, } }] + _AGE_LIMIT = { + 'e': 0, + 't': 13, + 'm': 17, + 'a': 18, + } def _real_extract(self, url): media_id = self._match_id(url) @@ -127,12 +137,16 @@ class NewgroundsIE(InfoExtractor): r'(?:Author|Writer)\s*]+>([^<]+)'), webpage, 'uploader', fatal=False) + age_limit = self._html_search_regex( + r']+>', webpage, 'age_limit', default='e') + age_limit = self._AGE_LIMIT.get(age_limit) + timestamp = unified_timestamp(self._html_search_regex( (r'
\s*Uploaded\s*
\s*
([^<]+
\s*
[^<]+)', r'
\s*Uploaded\s*
\s*
([^<]+)'), webpage, 'timestamp', default=None)) duration = parse_duration(self._html_search_regex( - r'"duration"\s*:\s*["\']?([\d]+)["\']?,', webpage, + r'"duration"\s*:\s*["\']?(\d+)["\']?', webpage, 'duration', default=None)) view_count = parse_count(self._html_search_regex( @@ -164,6 +178,7 @@ class NewgroundsIE(InfoExtractor): 'formats': formats, 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), + 'age_limit': age_limit, 'view_count': view_count, } From 943d5ab13305b6a37424e6572d10f562384ada9a Mon Sep 17 00:00:00 2001 From: MinePlayersPE Date: Tue, 5 Oct 2021 02:01:33 +0700 Subject: [PATCH 0004/2552] [Douyin] Rewrite extractor (#1157) Closes #1121 Authored by: MinePlayersPE --- yt_dlp/extractor/douyin.py | 145 ------------------ yt_dlp/extractor/extractors.py | 2 +- yt_dlp/extractor/tiktok.py | 264 +++++++++++++++++++++++++-------- 3 files changed, 205 insertions(+), 206 deletions(-) delete mode 100644 yt_dlp/extractor/douyin.py diff --git a/yt_dlp/extractor/douyin.py b/yt_dlp/extractor/douyin.py deleted file mode 100644 index 7f3176be7..000000000 --- a/yt_dlp/extractor/douyin.py +++ /dev/null @@ -1,145 +0,0 @@ -# coding: utf-8 - -from ..utils import ( - int_or_none, - traverse_obj, - url_or_none, -) -from .common import ( - InfoExtractor, - compat_urllib_parse_unquote, -) - - -class DouyinIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P[0-9]+)' - _TESTS = [{ - 'url': 'https://www.douyin.com/video/6961737553342991651', - 'md5': '10523312c8b8100f353620ac9dc8f067', - 'info_dict': { - 'id': '6961737553342991651', - 'ext': 'mp4', - 'title': '#杨超越 小小水手带你去远航❤️', - 'uploader': '杨超越', - 'upload_date': '20210513', - 'timestamp': 1620905839, - 'uploader_id': '110403406559', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }, { - 'url': 'https://www.douyin.com/video/6982497745948921092', - 'md5': 'd78408c984b9b5102904cf6b6bc2d712', - 'info_dict': { - 'id': '6982497745948921092', - 'ext': 'mp4', - 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想', - 'uploader': '杨超越工作室', - 'upload_date': '20210708', - 'timestamp': 1625739481, - 'uploader_id': '408654318141572', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }, { - 'url': 'https://www.douyin.com/video/6953975910773099811', - 'md5': '72e882e24f75064c218b76c8b713c185', - 'info_dict': { - 'id': '6953975910773099811', - 'ext': 'mp4', - 'title': '#一起看海 出现在你的夏日里', - 'uploader': '杨超越', - 'upload_date': '20210422', - 'timestamp': 1619098692, - 'uploader_id': '110403406559', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }, { - 'url': 'https://www.douyin.com/video/6950251282489675042', - 'md5': 'b4db86aec367ef810ddd38b1737d2fed', - 'info_dict': { - 'id': '6950251282489675042', - 'ext': 'mp4', - 'title': '哈哈哈,成功了哈哈哈哈哈哈', - 'uploader': '杨超越', - 'upload_date': '20210412', - 'timestamp': 1618231483, - 'uploader_id': '110403406559', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }, { - 'url': 'https://www.douyin.com/video/6963263655114722595', - 'md5': '1abe1c477d05ee62efb40bf2329957cf', - 'info_dict': { - 'id': '6963263655114722595', - 'ext': 'mp4', - 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈', - 'uploader': '杨超越', - 'upload_date': '20210517', - 'timestamp': 1621261163, - 'uploader_id': '110403406559', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - render_data = self._parse_json( - self._search_regex( - r'', - webpage, 'render data'), - video_id, transform_source=compat_urllib_parse_unquote) - details = traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False) - - thumbnails = [{'url': self._proto_relative_url(url)} for url in traverse_obj( - details, ('video', ('cover', 'dynamicCover', 'originCover')), expected_type=url_or_none, default=[])] - - common = { - 'width': traverse_obj(details, ('video', 'width'), expected_type=int), - 'height': traverse_obj(details, ('video', 'height'), expected_type=int), - 'ext': 'mp4', - } - formats = [{**common, 'url': self._proto_relative_url(url)} for url in traverse_obj( - details, ('video', 'playAddr', ..., 'src'), expected_type=url_or_none, default=[]) if url] - self._remove_duplicate_formats(formats) - - download_url = traverse_obj(details, ('download', 'url'), expected_type=url_or_none) - if download_url: - formats.append({ - **common, - 'format_id': 'download', - 'url': self._proto_relative_url(download_url), - 'quality': 1, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': details.get('desc') or self._html_search_meta('title', webpage), - 'formats': formats, - 'thumbnails': thumbnails, - 'uploader': traverse_obj(details, ('authorInfo', 'nickname'), expected_type=str), - 'uploader_id': traverse_obj(details, ('authorInfo', 'uid'), expected_type=str), - 'uploader_url': 'https://www.douyin.com/user/%s' % traverse_obj( - details, ('authorInfo', 'secUid'), expected_type=str), - 'timestamp': int_or_none(details.get('createTime')), - 'duration': traverse_obj(details, ('video', 'duration'), expected_type=int), - 'view_count': traverse_obj(details, ('stats', 'playCount'), expected_type=int), - 'like_count': traverse_obj(details, ('stats', 'diggCount'), expected_type=int), - 'repost_count': traverse_obj(details, ('stats', 'shareCount'), expected_type=int), - 'comment_count': traverse_obj(details, ('stats', 'commentCount'), expected_type=int), - } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index b90110c7f..71e4cd4cf 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -340,7 +340,6 @@ from .discoveryplusindia import ( DiscoveryPlusIndiaShowIE, ) from .dotsub import DotsubIE -from .douyin import DouyinIE from .douyutv import ( DouyuShowIE, DouyuTVIE, @@ -1445,6 +1444,7 @@ from .threeqsdn import ThreeQSDNIE from .tiktok import ( TikTokIE, TikTokUserIE, + DouyinIE, ) from .tinypic import TinyPicIE from .tmz import TMZIE diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 4b0efd4a3..fc0915fb0 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -8,12 +8,14 @@ import time import json from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, int_or_none, str_or_none, traverse_obj, try_get, + url_or_none, qualities, ) @@ -21,6 +23,10 @@ from ..utils import ( class TikTokBaseIE(InfoExtractor): _APP_VERSION = '20.9.3' _MANIFEST_APP_VERSION = '291' + _APP_NAME = 'trill' + _AID = 1180 + _API_HOSTNAME = 'api-t2.tiktokv.com' + _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' QUALITIES = ('360p', '540p', '720p') def _call_api(self, ep, query, video_id, fatal=True, @@ -46,7 +52,7 @@ class TikTokBaseIE(InfoExtractor): 'carrier_region': 'US', 'sys_region': 'US', 'region': 'US', - 'app_name': 'trill', + 'app_name': self._APP_NAME, 'app_language': 'en', 'language': 'en', 'timezone_name': 'America/New_York', @@ -55,20 +61,20 @@ class TikTokBaseIE(InfoExtractor): 'ac': 'wifi', 'mcc_mnc': '310260', 'is_my_cn': 0, - 'aid': 1180, + 'aid': self._AID, 'ssmix': 'a', 'as': 'a1qwert123', 'cp': 'cbfhckdckkde1', } - self._set_cookie('.tiktokv.com', 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) + self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) return self._download_json( - 'https://api-t2.tiktokv.com/aweme/v1/%s/' % ep, video_id=video_id, + 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id, fatal=fatal, note=note, errnote=errnote, headers={ 'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)', 'Accept': 'application/json', }, query=real_query) - def _parse_aweme_video(self, aweme_detail): + def _parse_aweme_video_app(self, aweme_detail): aweme_id = aweme_detail['aweme_id'] video_info = aweme_detail['video'] @@ -146,6 +152,7 @@ class TikTokBaseIE(InfoExtractor): 'tbr': try_get(bitrate, lambda x: x['bit_rate'] / 1000), 'vcodec': 'h265' if traverse_obj( bitrate, 'is_bytevc1', 'is_h265') else 'h264', + 'fps': bitrate.get('FPS'), })) self._remove_duplicate_formats(formats) @@ -165,7 +172,9 @@ class TikTokBaseIE(InfoExtractor): stats_info = aweme_detail.get('statistics', {}) author_info = aweme_detail.get('author', {}) music_info = aweme_detail.get('music', {}) - user_id = str_or_none(author_info.get('nickname')) + user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info, + 'sec_uid', 'id', 'uid', 'unique_id', + expected_type=str_or_none, get_all=False)) contained_music_track = traverse_obj( music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str) @@ -187,9 +196,9 @@ class TikTokBaseIE(InfoExtractor): 'repost_count': int_or_none(stats_info.get('share_count')), 'comment_count': int_or_none(stats_info.get('comment_count')), 'uploader': str_or_none(author_info.get('unique_id')), - 'creator': user_id, + 'creator': str_or_none(author_info.get('nickname')), 'uploader_id': str_or_none(author_info.get('uid')), - 'uploader_url': f'https://www.tiktok.com/@{user_id}' if user_id else None, + 'uploader_url': user_url, 'track': music_track, 'album': str_or_none(music_info.get('album')) or None, 'artist': music_author, @@ -199,6 +208,79 @@ class TikTokBaseIE(InfoExtractor): 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000) } + def _parse_aweme_video_web(self, aweme_detail, webpage, url): + video_info = aweme_detail['video'] + author_info = traverse_obj(aweme_detail, 'author', 'authorInfo', default={}) + music_info = aweme_detail.get('music') or {} + stats_info = aweme_detail.get('stats') or {} + user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info, + 'secUid', 'id', 'uid', 'uniqueId', + expected_type=str_or_none, get_all=False)) + + formats = [] + play_url = video_info.get('playAddr') + width = video_info.get('width') + height = video_info.get('height') + if isinstance(play_url, str): + formats = [{ + 'url': self._proto_relative_url(play_url), + 'ext': 'mp4', + 'width': width, + 'height': height, + }] + elif isinstance(play_url, list): + formats = [{ + 'url': self._proto_relative_url(url), + 'ext': 'mp4', + 'width': width, + 'height': height, + } for url in traverse_obj(play_url, (..., 'src'), expected_type=url_or_none, default=[]) if url] + + download_url = url_or_none(video_info.get('downloadAddr')) or traverse_obj(video_info, ('download', 'url'), expected_type=url_or_none) + if download_url: + formats.append({ + 'format_id': 'download', + 'url': self._proto_relative_url(download_url), + 'ext': 'mp4', + 'width': width, + 'height': height, + }) + self._remove_duplicate_formats(formats) + self._sort_formats(formats) + + thumbnails = [] + for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'): + if aweme_detail.get(thumbnail_name): + thumbnails = [{ + 'url': self._proto_relative_url(aweme_detail[thumbnail_name]), + 'width': width, + 'height': height + }] + + return { + 'id': traverse_obj(aweme_detail, 'id', 'awemeId', expected_type=str_or_none), + 'title': aweme_detail.get('desc'), + 'duration': try_get(aweme_detail, lambda x: x['video']['duration'], int), + 'view_count': int_or_none(stats_info.get('playCount')), + 'like_count': int_or_none(stats_info.get('diggCount')), + 'repost_count': int_or_none(stats_info.get('shareCount')), + 'comment_count': int_or_none(stats_info.get('commentCount')), + 'timestamp': int_or_none(aweme_detail.get('createTime')), + 'creator': str_or_none(author_info.get('nickname')), + 'uploader': str_or_none(author_info.get('uniqueId')), + 'uploader_id': str_or_none(author_info.get('id')), + 'uploader_url': user_url, + 'track': str_or_none(music_info.get('title')), + 'album': str_or_none(music_info.get('album')) or None, + 'artist': str_or_none(music_info.get('authorName')), + 'formats': formats, + 'thumbnails': thumbnails, + 'description': str_or_none(aweme_detail.get('desc')), + 'http_headers': { + 'Referer': url + } + } + class TikTokIE(TikTokBaseIE): _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P\d+)' @@ -255,60 +337,10 @@ class TikTokIE(TikTokBaseIE): 'only_matching': True, }] - def _extract_aweme(self, props_data, webpage, url): - video_info = try_get( - props_data, lambda x: x['pageProps']['itemInfo']['itemStruct'], dict) - author_info = try_get( - props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['author'], dict) or {} - music_info = try_get( - props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['music'], dict) or {} - stats_info = try_get(props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['stats'], dict) or {} - - user_id = str_or_none(author_info.get('uniqueId')) - download_url = try_get(video_info, (lambda x: x['video']['playAddr'], - lambda x: x['video']['downloadAddr'])) - height = try_get(video_info, lambda x: x['video']['height'], int) - width = try_get(video_info, lambda x: x['video']['width'], int) - thumbnails = [{ - 'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), - 'width': width, - 'height': height - }] - tracker = try_get(props_data, lambda x: x['initialProps']['$wid']) - - return { - 'id': str_or_none(video_info.get('id')), - 'url': download_url, - 'ext': 'mp4', - 'height': height, - 'width': width, - 'title': video_info.get('desc') or self._og_search_title(webpage), - 'duration': try_get(video_info, lambda x: x['video']['duration'], int), - 'view_count': int_or_none(stats_info.get('playCount')), - 'like_count': int_or_none(stats_info.get('diggCount')), - 'repost_count': int_or_none(stats_info.get('shareCount')), - 'comment_count': int_or_none(stats_info.get('commentCount')), - 'timestamp': try_get(video_info, lambda x: int(x['createTime']), int), - 'creator': str_or_none(author_info.get('nickname')), - 'uploader': user_id, - 'uploader_id': str_or_none(author_info.get('id')), - 'uploader_url': f'https://www.tiktok.com/@{user_id}', - 'track': str_or_none(music_info.get('title')), - 'album': str_or_none(music_info.get('album')) or None, - 'artist': str_or_none(music_info.get('authorName')), - 'thumbnails': thumbnails, - 'description': str_or_none(video_info.get('desc')), - 'webpage_url': self._og_search_url(webpage), - 'http_headers': { - 'Referer': url, - 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker, tracker), - } - } - def _extract_aweme_app(self, aweme_id): aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id, note='Downloading video details', errnote='Unable to download video details')['aweme_detail'] - return self._parse_aweme_video(aweme_detail) + return self._parse_aweme_video_app(aweme_detail) def _real_extract(self, url): video_id = self._match_id(url) @@ -330,7 +362,7 @@ class TikTokIE(TikTokBaseIE): # Chech statusCode for success status = props_data.get('pageProps').get('statusCode') if status == 0: - return self._extract_aweme(props_data, webpage, url) + return self._parse_aweme_video_web(props_data['pageProps']['itemInfo']['itemStruct'], webpage, url) elif status == 10216: raise ExtractorError('This video is private', expected=True) @@ -413,3 +445,115 @@ class TikTokUserIE(TikTokBaseIE): }) own_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID') return self.playlist_result(self._entries_api(webpage, own_id, user_id), user_id) + + +class DouyinIE(TikTokIE): + _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://www.douyin.com/video/6961737553342991651', + 'md5': '10523312c8b8100f353620ac9dc8f067', + 'info_dict': { + 'id': '6961737553342991651', + 'ext': 'mp4', + 'title': '#杨超越 小小水手带你去远航❤️', + 'uploader': '杨超越', + 'upload_date': '20210513', + 'timestamp': 1620905839, + 'uploader_id': '110403406559', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }, { + 'url': 'https://www.douyin.com/video/6982497745948921092', + 'md5': 'd78408c984b9b5102904cf6b6bc2d712', + 'info_dict': { + 'id': '6982497745948921092', + 'ext': 'mp4', + 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想', + 'uploader': '杨超越工作室', + 'upload_date': '20210708', + 'timestamp': 1625739481, + 'uploader_id': '408654318141572', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }, { + 'url': 'https://www.douyin.com/video/6953975910773099811', + 'md5': '72e882e24f75064c218b76c8b713c185', + 'info_dict': { + 'id': '6953975910773099811', + 'ext': 'mp4', + 'title': '#一起看海 出现在你的夏日里', + 'uploader': '杨超越', + 'upload_date': '20210422', + 'timestamp': 1619098692, + 'uploader_id': '110403406559', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }, { + 'url': 'https://www.douyin.com/video/6950251282489675042', + 'md5': 'b4db86aec367ef810ddd38b1737d2fed', + 'info_dict': { + 'id': '6950251282489675042', + 'ext': 'mp4', + 'title': '哈哈哈,成功了哈哈哈哈哈哈', + 'uploader': '杨超越', + 'upload_date': '20210412', + 'timestamp': 1618231483, + 'uploader_id': '110403406559', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }, { + 'url': 'https://www.douyin.com/video/6963263655114722595', + 'md5': '1abe1c477d05ee62efb40bf2329957cf', + 'info_dict': { + 'id': '6963263655114722595', + 'ext': 'mp4', + 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈', + 'uploader': '杨超越', + 'upload_date': '20210517', + 'timestamp': 1621261163, + 'uploader_id': '110403406559', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }] + _APP_VERSION = '9.6.0' + _MANIFEST_APP_VERSION = '960' + _APP_NAME = 'aweme' + _AID = 1128 + _API_HOSTNAME = 'aweme.snssdk.com' + _UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + + try: + return self._extract_aweme_app(video_id) + except ExtractorError as e: + self.report_warning(f'{e}; Retrying with webpage') + + webpage = self._download_webpage(url, video_id) + render_data_json = self._search_regex( + r'', + webpage, 'render data', default=None) + if not render_data_json: + # TODO: Run verification challenge code to generate signature cookies + raise ExtractorError('Fresh cookies (not necessarily logged in) are needed') + + render_data = self._parse_json( + render_data_json, video_id, transform_source=compat_urllib_parse_unquote) + return self._parse_aweme_video_web( + traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), webpage, url) From 755203fc3fd33c257e582377c67790e1d4e0bfb6 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 4 Oct 2021 19:09:00 +0000 Subject: [PATCH 0005/2552] [parliamentlive.tv] Fix extractor (#1153) Closes #1139 Authored by: u-spec-png --- yt_dlp/extractor/parliamentliveuk.py | 76 +++++++++++++++++++++------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/parliamentliveuk.py b/yt_dlp/extractor/parliamentliveuk.py index bdd5ff565..869ebd865 100644 --- a/yt_dlp/extractor/parliamentliveuk.py +++ b/yt_dlp/extractor/parliamentliveuk.py @@ -1,6 +1,14 @@ +# coding: utf-8 from __future__ import unicode_literals +import json +import uuid + from .common import InfoExtractor +from ..utils import ( + unified_timestamp, + try_get, +) class ParliamentLiveUKIE(InfoExtractor): @@ -11,12 +19,14 @@ class ParliamentLiveUKIE(InfoExtractor): _TESTS = [{ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'info_dict': { - 'id': '1_af9nv9ym', + 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'ext': 'mp4', 'title': 'Home Affairs Committee', - 'uploader_id': 'FFMPEG-01', - 'timestamp': 1422696664, - 'upload_date': '20150131', + 'timestamp': 1395153872, + 'upload_date': '20140318', + }, + 'params': { + 'format': 'bestvideo', }, }, { 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', @@ -25,19 +35,49 @@ class ParliamentLiveUKIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id) - widget_config = self._parse_json(self._search_regex( - r'(?s)kWidgetConfig\s*=\s*({.+});', - webpage, 'kaltura widget config'), video_id) - kaltura_url = 'kaltura:%s:%s' % ( - widget_config['wid'][1:], widget_config['entry_id']) - event_title = self._download_json( - 'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title'] + video_info = self._download_json(f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id) + _DEVICE_ID = str(uuid.uuid4()) + auth = 'Bearer ' + self._download_json( + 'https://exposure.api.redbee.live/v2/customer/UKParliament/businessunit/ParliamentLive/auth/anonymous', + video_id, headers={ + 'Origin': 'https://videoplayback.parliamentlive.tv', + 'Accept': 'application/json, text/plain, */*', + 'Content-Type': 'application/json;charset=utf-8' + }, data=json.dumps({ + 'deviceId': _DEVICE_ID, + 'device': { + 'deviceId': _DEVICE_ID, + 'width': 653, + 'height': 368, + 'type': 'WEB', + 'name': ' Mozilla Firefox 91' + } + }).encode('utf-8'))['sessionToken'] + + video_urls = self._download_json( + f'https://exposure.api.redbee.live/v2/customer/UKParliament/businessunit/ParliamentLive/entitlement/{video_id}/play', + video_id, headers={'Authorization': auth, 'Accept': 'application/json, text/plain, */*'})['formats'] + + formats = [] + for format in video_urls: + if not format.get('mediaLocator'): + continue + if format.get('format') == 'DASH': + formats.extend(self._extract_mpd_formats( + format['mediaLocator'], video_id, mpd_id='dash', fatal=False)) + elif format.get('format') == 'SMOOTHSTREAMING': + formats.extend(self._extract_ism_formats( + format['mediaLocator'], video_id, ism_id='ism', fatal=False)) + elif format.get('format') == 'HLS': + formats.extend(self._extract_m3u8_formats( + format['mediaLocator'], video_id, m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + return { - '_type': 'url_transparent', - 'title': event_title, - 'description': '', - 'url': kaltura_url, - 'ie_key': 'Kaltura', + 'id': video_id, + 'formats': formats, + 'title': video_info['event']['title'], + 'timestamp': unified_timestamp(try_get(video_info, lambda x: x['event']['publishedStartTime'])), + 'thumbnail': video_info.get('thumbnailUrl'), } From 0f0ac87be3fc55cab8fec767c446431a8ce085f3 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Mon, 4 Oct 2021 15:11:00 -0400 Subject: [PATCH 0006/2552] [CBC] Cleanup tests (#1162) Related: #1013 Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 061b09908..5e4526c53 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -202,7 +202,6 @@ class CBCGemIE(InfoExtractor): IE_NAME = 'gem.cbc.ca' _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' _TESTS = [{ - # geo-restricted to Canada, bypassable # This is a normal, public, TV show video 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', 'md5': '93dbb31c74a8e45b378cf13bd3f6f11e', @@ -224,7 +223,6 @@ class CBCGemIE(InfoExtractor): 'params': {'format': 'bv'}, 'skip': 'Geo-restricted to Canada', }, { - # geo-restricted to Canada, bypassable # This video requires an account in the browser, but works fine in yt-dlp 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01', 'md5': '297a9600f554f2258aed01514226a697', @@ -313,7 +311,6 @@ class CBCGemPlaylistIE(InfoExtractor): IE_NAME = 'gem.cbc.ca:playlist' _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P(?P[0-9a-z-]+)/s(?P[0-9]+))/?(?:[?#]|$)' _TESTS = [{ - # geo-restricted to Canada, bypassable # TV show playlist, all public videos 'url': 'https://gem.cbc.ca/media/schitts-creek/s06', 'playlist_count': 16, @@ -322,7 +319,6 @@ class CBCGemPlaylistIE(InfoExtractor): 'title': 'Season 6', 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2', }, - 'skip': 'Geo-restricted to Canada', }] _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/' From d92125aeba4eefe8ef2c4f9ead8af99dd33ff0d4 Mon Sep 17 00:00:00 2001 From: i6t <62123048+i6t@users.noreply.github.com> Date: Tue, 5 Oct 2021 04:23:37 +0900 Subject: [PATCH 0007/2552] [GoPro] Add extractor (#1167) Fixes: https://github.com/ytdl-org/youtube-dl/issues/30044 Authored by: i6t --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/gopro.py | 110 +++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 yt_dlp/extractor/gopro.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 71e4cd4cf..8c5b8b160 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -524,6 +524,7 @@ from .googlepodcasts import ( GooglePodcastsFeedIE, ) from .googlesearch import GoogleSearchIE +from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE diff --git a/yt_dlp/extractor/gopro.py b/yt_dlp/extractor/gopro.py new file mode 100644 index 000000000..10cc1aec1 --- /dev/null +++ b/yt_dlp/extractor/gopro.py @@ -0,0 +1,110 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + remove_end, + str_or_none, + try_get, + unified_timestamp, + url_or_none, +) + + +class GoProIE(InfoExtractor): + _VALID_URL = r'https?://(www\.)?gopro\.com/v/(?P[A-Za-z0-9]+)' + + _TESTS = [{ + 'url': 'https://gopro.com/v/ZNVvED8QDzR5V', + 'info_dict': { + 'id': 'ZNVvED8QDzR5V', + 'title': 'My GoPro Adventure - 9/19/21', + 'thumbnail': r're:https?://.+', + 'ext': 'mp4', + 'timestamp': 1632072947, + 'upload_date': '20210919', + 'uploader_id': 'fireydive30018', + 'duration': 396062, + } + }, { + 'url': 'https://gopro.com/v/KRm6Vgp2peg4e', + 'info_dict': { + 'id': 'KRm6Vgp2peg4e', + 'title': 'じゃがいも カリカリ オーブン焼き', + 'thumbnail': r're:https?://.+', + 'ext': 'mp4', + 'timestamp': 1607231125, + 'upload_date': '20201206', + 'uploader_id': 'dc9bcb8b-47d2-47c6-afbc-4c48f9a3769e', + 'duration': 45187, + 'track': 'The Sky Machine', + } + }, { + 'url': 'https://gopro.com/v/kVrK9wlJvBMwn', + 'info_dict': { + 'id': 'kVrK9wlJvBMwn', + 'title': 'DARKNESS', + 'thumbnail': r're:https?://.+', + 'ext': 'mp4', + 'timestamp': 1594183735, + 'upload_date': '20200708', + 'uploader_id': '闇夜乃皇帝', + 'duration': 313075, + 'track': 'Battery (Live)', + 'artist': 'Metallica', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + metadata = self._parse_json( + self._html_search_regex(r'window\.__reflectData\s*=\s*([^;]+)', webpage, 'metadata'), video_id) + + video_info = metadata['collectionMedia'][0] + media_data = self._download_json( + 'https://api.gopro.com/media/%s/download' % video_info['id'], video_id) + + formats = [] + for fmt in try_get(media_data, lambda x: x['_embedded']['variations']) or []: + format_url = url_or_none(fmt.get('url')) + if not format_url: + continue + formats.append({ + 'url': format_url, + 'format_id': str_or_none(fmt.get('quality')), + 'format_note': str_or_none(fmt.get('label')), + 'ext': str_or_none(fmt.get('type')), + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + }) + + self._sort_formats(formats) + + title = str_or_none( + try_get(metadata, lambda x: x['collection']['title']) + or self._html_search_meta(['og:title', 'twitter:title'], webpage) + or remove_end(self._html_search_regex( + r']*>([^<]+)', webpage, 'title', fatal=False), ' | GoPro')) + if title: + title = title.replace('\n', ' ') + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': url_or_none( + self._html_search_meta(['og:image', 'twitter:image'], webpage)), + 'timestamp': unified_timestamp( + try_get(metadata, lambda x: x['collection']['created_at'])), + 'uploader_id': str_or_none( + try_get(metadata, lambda x: x['account']['nickname'])), + 'duration': int_or_none( + video_info.get('source_duration')), + 'artist': str_or_none( + video_info.get('music_track_artist')), + 'track': str_or_none( + video_info.get('music_track_name')), + } From 762e509d91be50546f62fc5c717280839b83c1e2 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 5 Oct 2021 08:30:57 +1300 Subject: [PATCH 0008/2552] [Mediaite] Relax valid url (#1158) Closes #1131 Authored by: coletdjnz --- yt_dlp/extractor/mediaite.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/mediaite.py b/yt_dlp/extractor/mediaite.py index 646c92223..b670f0d61 100644 --- a/yt_dlp/extractor/mediaite.py +++ b/yt_dlp/extractor/mediaite.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class MediaiteIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mediaite.com/(?:tv|sports|politics|podcasts|opinion)/[\w-]+/' + _VALID_URL = r'https?://(?:www\.)?mediaite.com(?!/category)(?:/[\w-]+){2}' _TESTS = [{ 'url': 'https://www.mediaite.com/sports/bill-burr-roasts-nfl-for-promoting-black-lives-matter-while-scheduling-more-games-after-all-the-sht-they-know-about-cte/', 'info_dict': { @@ -71,6 +71,19 @@ class MediaiteIE(InfoExtractor): 'upload_date': '20210913', }, 'params': {'skip_download': True} + }, { + 'url': 'https://www.mediaite.com/news/watch-cnbcs-jim-cramer-says-nobody-wants-to-die-getting-infected-by-unvaccinated-coworker-even-for-22-an-hour/', + 'info_dict': { + 'id': 'nwpt1elX', + 'ext': 'mp4', + 'title': "CNBC's Jim Cramer Says Nobody Wants to Die Getting Infected by Unvaccinated Coworker 'Even for $22 an Hour'.mp4", + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/nwpt1elX/poster.jpg?width=720', + 'duration': 60, + 'timestamp': 1633014214, + 'upload_date': '20210930', + }, + 'params': {'skip_download': True} }] def _real_extract(self, url): From f85e6be42ec5e65c07a3f99927ca9dfe81d683f0 Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Tue, 5 Oct 2021 13:37:58 -0400 Subject: [PATCH 0009/2552] [build] Use pycryptodomex for PyInstaller (#1179) --- .github/workflows/build.yml | 4 ++-- .github/workflows/quick-test.yml | 2 +- README.md | 6 +++--- pyinst.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4f983f2c1..324cf7eb6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -154,7 +154,7 @@ jobs: run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds - run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets + run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version run: python devscripts/update-version.py @@ -220,7 +220,7 @@ jobs: - name: Upgrade pip and enable wheel support run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements - run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets + run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version run: python devscripts/update-version.py diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 500a504a4..bbad209b3 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -12,7 +12,7 @@ jobs: with: python-version: 3.9 - name: Install test requirements - run: pip install pytest pycryptodome + run: pip install pytest pycryptodomex - name: Run tests run: ./devscripts/run_tests.sh core flake8: diff --git a/README.md b/README.md index d219b28d3..cf46360a9 100644 --- a/README.md +++ b/README.md @@ -207,7 +207,7 @@ While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly To use or redistribute the dependencies, you must agree to their respective licensing terms. -The windows releases are already built with the python interpreter, mutagen, pycryptodome and websockets included. +The windows releases are already built with the python interpreter, mutagen, pycryptodomex and websockets included. **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependancy, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds/wiki/Latest#latest-autobuilds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specifc issues solved by these builds @@ -215,9 +215,9 @@ The windows releases are already built with the python interpreter, mutagen, pyc ### COMPILE **For Windows**: -To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodome, websockets) +To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets) - python3 -m pip install --upgrade pyinstaller mutagen pycryptodome websockets + python3 -m pip install --upgrade pyinstaller mutagen pycryptodomex websockets Once you have all the necessary dependencies installed, just run `py pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. diff --git a/pyinst.py b/pyinst.py index 7e040647c..be1e00caa 100644 --- a/pyinst.py +++ b/pyinst.py @@ -76,7 +76,7 @@ VERSION_FILE = VSVersionInfo( ] ) -dependancies = ['Crypto', 'mutagen'] + collect_submodules('websockets') +dependancies = ['Cryptodome', 'mutagen'] + collect_submodules('websockets') excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] PyInstaller.__main__.run([ From 4e3d1898a802b3729a56fabecbcd5a641a6ab19c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Oct 2021 08:32:05 +0530 Subject: [PATCH 0010/2552] Workaround ssl errors in mingw python Closes #1151 --- yt_dlp/utils.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index b79b79688..8b5b15103 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2373,13 +2373,20 @@ def make_HTTPS_handler(params, **kwargs): context.check_hostname = opts_check_certificate context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE if opts_check_certificate: - # Work around the issue in load_default_certs when there are bad certificates. See: - # https://github.com/yt-dlp/yt-dlp/issues/1060, - # https://bugs.python.org/issue35665, https://bugs.python.org/issue4531 - if sys.platform == 'win32': - for storename in ('CA', 'ROOT'): - _ssl_load_windows_store_certs(context, storename) - context.set_default_verify_paths() + try: + context.load_default_certs() + # Work around the issue in load_default_certs when there are bad certificates. See: + # https://github.com/yt-dlp/yt-dlp/issues/1060, + # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312 + except ssl.SSLError: + # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 + if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): + # Create a new context to discard any certificates that were already loaded + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED + for storename in ('CA', 'ROOT'): + _ssl_load_windows_store_certs(context, storename) + context.set_default_verify_paths() return YoutubeDLHTTPSHandler(params, context=context, **kwargs) From 644149afec99b2db4c1cc1286eb5c753ac187c44 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Oct 2021 08:33:36 +0530 Subject: [PATCH 0011/2552] [soundcloud:playlist] Detect last page correctly Closes #1168 --- yt_dlp/extractor/soundcloud.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 1503ae586..ad3a32a02 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -707,6 +707,8 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): yield resolve_entry(e, e.get('track'), e.get('playlist')) url = response.get('next_href') + if not url: + break query.pop('offset', None) From 1b6bb4a85a74028111597e1a683914bb33615ef8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Oct 2021 08:34:05 +0530 Subject: [PATCH 0012/2552] [reddit] bugfix for 8e3fd7e034cdd54972d13394821cd9e55e1c3735 --- yt_dlp/extractor/reddit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index e5a1f6920..c75d95a8e 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -109,7 +109,7 @@ class RedditRIE(InfoExtractor): self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id()) self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') - data = self._download_json(f'https://{subdomain}.reddit.com/r/{slug}/.json', video_id, fatal=False) + data = self._download_json(f'https://{subdomain}reddit.com/r/{slug}/.json', video_id, fatal=False) if not data: # Fall back to old.reddit.com in case the requested subdomain fails data = self._download_json(f'https://old.reddit.com/r/{slug}/.json', video_id) From 519804a92fbc065e35b752ca160dcef3f3656ef7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Oct 2021 09:45:46 +0530 Subject: [PATCH 0013/2552] bugfix for 80c03fa98fdd54410bd36684ef453f6976a9c0bf --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index f009e9e19..3abb43000 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3434,7 +3434,7 @@ class YoutubeDL(object): except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') continue - return ret + return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) ''' From 1276a43a77144567fc575d6aaec5b5f8468b7d56 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 4 Oct 2021 02:44:55 +0530 Subject: [PATCH 0014/2552] [youtube] Fix non-fatal errors in fetching player --- yt_dlp/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 159b0a3b9..56de2ef59 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1911,10 +1911,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _load_player(self, video_id, player_url, fatal=True) -> bool: player_id = self._extract_player_info(player_url) if player_id not in self._code_cache: - self._code_cache[player_id] = self._download_webpage( + code = self._download_webpage( player_url, video_id, fatal=fatal, note='Downloading player ' + player_id, errnote='Download of %s failed' % player_url) + if code: + self._code_cache[player_id] = code return player_id in self._code_cache def _extract_signature_function(self, video_id, player_url, example_sig): From 84726743993295f6105ed9ef5412040b8842e4c6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 6 Oct 2021 05:43:22 +0530 Subject: [PATCH 0015/2552] [FixupM3u8] Do not run if merge is needed We pass the relevant arguments to the merger, so separate fixup in redundant --- yt_dlp/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3abb43000..770f62734 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2820,7 +2820,8 @@ class YoutubeDL(object): downloader = (get_suitable_downloader(info_dict, self.params).__name__ if 'protocol' in info_dict else None) - ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP) + ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD', + 'malformed AAC bitstream detected', FFmpegFixupM3u8PP) ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP) ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP) From 49e7e9c3ce9d5294f024757cbbfedd6c9d0623be Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 6 Oct 2021 06:34:10 +0530 Subject: [PATCH 0016/2552] [docs,build] Change all pycryptodome references to pycryptodomex --- README.md | 4 ++-- pyinst.py | 22 ++++++++++++++++------ requirements.txt | 2 +- setup.py | 2 +- yt_dlp/downloader/hls.py | 4 ++-- yt_dlp/extractor/ivi.py | 2 +- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index cf46360a9..3c73d3aac 100644 --- a/README.md +++ b/README.md @@ -195,7 +195,7 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodomex) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodomex/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) @@ -217,7 +217,7 @@ The windows releases are already built with the python interpreter, mutagen, pyc **For Windows**: To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets) - python3 -m pip install --upgrade pyinstaller mutagen pycryptodomex websockets + python3 -m pip install -U -r requirements.txt Once you have all the necessary dependencies installed, just run `py pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. diff --git a/pyinst.py b/pyinst.py index be1e00caa..ed410e0f2 100644 --- a/pyinst.py +++ b/pyinst.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import sys -# import os import platform from PyInstaller.utils.hooks import collect_submodules @@ -29,10 +28,6 @@ print(f'Building {arch}bit version with options {opts}') FILE_DESCRIPTION = 'yt-dlp%s' % (' (32 Bit)' if _x86 else '') -# root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -# print('Changing working directory to %s' % root_dir) -# os.chdir(root_dir) - exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) VERSION = locals()['__version__'] @@ -76,7 +71,22 @@ VERSION_FILE = VSVersionInfo( ] ) -dependancies = ['Cryptodome', 'mutagen'] + collect_submodules('websockets') + +def pycryptodome_module(): + try: + import Cryptodome # noqa: F401 + except ImportError: + try: + import Crypto # noqa: F401 + print('WARNING: Using Crypto since Cryptodome is not available. ' + 'Install with: pip install pycryptodomex', file=sys.stderr) + return 'Crypto' + except ImportError: + pass + return 'Cryptodome' + + +dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] PyInstaller.__main__.run([ diff --git a/requirements.txt b/requirements.txt index 6a982fa36..cecd08eae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ mutagen -pycryptodome +pycryptodomex websockets diff --git a/setup.py b/setup.py index b5eb81c30..ff23877dc 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ LONG_DESCRIPTION = '\n\n'.join(( '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', open('README.md', 'r', encoding='utf-8').read())) -REQUIREMENTS = ['mutagen', 'pycryptodome', 'websockets'] +REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets'] if sys.argv[1:2] == ['py2exe']: diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 751d874d4..3c5a2617d 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -72,9 +72,9 @@ class HlsFD(FragmentFD): can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: if FFmpegFD.available(): - can_download, message = False, 'The stream has AES-128 encryption and pycryptodome is not available' + can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' else: - message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodome are available; ' + message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' 'Decryption will be performed natively, but will be extremely slow') if not can_download: message = message or 'Unsupported features have been detected' diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 098ab6665..5f8a046e0 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -141,7 +141,7 @@ class IviIE(InfoExtractor): elif site == 353: continue elif not pycryptodome_found: - raise ExtractorError('pycryptodome not found. Please install', expected=True) + raise ExtractorError('pycryptodomex not found. Please install', expected=True) elif message: extractor_msg += ': ' + message raise ExtractorError(extractor_msg % video_id, expected=True) From 705e7c2005dfe67a905e18736c9f6345ee9d386b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 6 Oct 2021 10:53:22 +0530 Subject: [PATCH 0017/2552] [Hidive] Fix duplicate and incorrect formats --- yt_dlp/extractor/hidive.py | 85 +++++++++++++++----------------------- 1 file changed, 34 insertions(+), 51 deletions(-) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 90457b77e..909d1fbc1 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -14,7 +12,7 @@ from ..utils import ( class HiDiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P[^/]+)/(?P<key>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))' # Using X-Forwarded-For results in 403 HTTP error for HLS fragments, # so disabling geo bypass completely _GEO_BYPASS = False @@ -55,68 +53,53 @@ class HiDiveIE(InfoExtractor): self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data)) def _real_extract(self, url): - mobj = self._match_valid_url(url) - title, key = mobj.group('title', 'key') - video_id = '%s/%s' % (title, key) - webpage = self._download_webpage(url, video_id, fatal=False) - data_videos = re.findall(r'data-video=\"([^\"]+)\"\s?data-captions=\"([^\"]+)\"', webpage) - formats = [] - subtitles = {} - for data_video in data_videos: - _, _, _, version, audio, _, extra = data_video[0].split('_') - caption = data_video[1] - - settings = self._download_json( - 'https://www.hidive.com/play/settings', video_id, - data=urlencode_postdata({ - 'Title': title, - 'Key': key, - 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', - 'Version': version, - 'Audio': audio, - 'Captions': caption, - 'Extra': extra, - })) + video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key') + settings = self._download_json( + 'https://www.hidive.com/play/settings', video_id, + data=urlencode_postdata({ + 'Title': title, + 'Key': key, + 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', + })) - restriction = settings.get('restrictionReason') - if restriction == 'RegionRestricted': - self.raise_geo_restricted() + restriction = settings.get('restrictionReason') + if restriction == 'RegionRestricted': + self.raise_geo_restricted() + if restriction and restriction != 'None': + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, restriction), expected=True) - if restriction and restriction != 'None': - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, restriction), expected=True) - - for rendition_id, rendition in settings['renditions'].items(): - m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) - if not m3u8_url: - continue + formats, subtitles, urls = [], {}, {None} + for rendition_id, rendition in settings['renditions'].items(): + audio, version, extra = rendition_id.split('_') + m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) + if m3u8_url not in urls: + urls.add(m3u8_url) frmt = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='%s-%s-%s-%s' % (version, audio, extra, caption), fatal=False) + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False) for f in frmt: f['language'] = audio + f['format_note'] = f'{version}, {extra}' formats.extend(frmt) - for cc_file in rendition.get('ccFiles', []): - cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) - # name is used since we cant distinguish subs with same language code - cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) - if cc_url and cc_lang: - subtitles.setdefault(cc_lang, []).append({'url': cc_url}) + for cc_file in rendition.get('ccFiles', []): + cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) + # name is used since we cant distinguish subs with same language code + cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) + if cc_url not in urls and cc_lang: + urls.add(cc_url) + subtitles.setdefault(cc_lang, []).append({'url': cc_url}) self._sort_formats(formats) - season_number = int_or_none(self._search_regex( - r's(\d+)', key, 'season number', default=None)) - episode_number = int_or_none(self._search_regex( - r'e(\d+)', key, 'episode number', default=None)) - return { 'id': video_id, 'title': video_id, 'subtitles': subtitles, 'formats': formats, 'series': title, - 'season_number': season_number, - 'episode_number': episode_number, + 'season_number': int_or_none( + self._search_regex(r's(\d+)', key, 'season number', default=None)), + 'episode_number': int_or_none( + self._search_regex(r'e(\d+)', key, 'episode number', default=None)), 'http_headers': {'Referer': url} } From fee3f44f5f58274c637499f077aa0312e650f493 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 7 Oct 2021 14:32:42 +0000 Subject: [PATCH 0018/2552] [Streamable] Add codecs (#1189) Authored by: u-spec-png --- yt_dlp/extractor/streamable.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index 34725274e..808129649 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -8,6 +8,8 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + try_get, + parse_codecs, ) @@ -29,7 +31,7 @@ class StreamableIE(InfoExtractor): 'view_count': int, } }, - # older video without bitrate, width/height, etc. info + # older video without bitrate, width/height, codecs, etc. info { 'url': 'https://streamable.com/moo', 'md5': '2cf6923639b87fba3279ad0df3a64e73', @@ -95,7 +97,9 @@ class StreamableIE(InfoExtractor): 'height': int_or_none(info.get('height')), 'filesize': int_or_none(info.get('size')), 'fps': int_or_none(info.get('framerate')), - 'vbr': float_or_none(info.get('bitrate'), 1000) + 'vbr': float_or_none(info.get('bitrate'), 1000), + 'vcodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['video_codec_name'])).get('vcodec'), + 'acodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['audio_codec_name'])).get('acodec'), }) self._sort_formats(formats) From 819e05319baff2d896df026f1ef905e1f21be942 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 9 Oct 2021 00:41:59 +0530 Subject: [PATCH 0019/2552] Improved progress reporting (See desc) (#1125) * Separate `--console-title` and `--no-progress` * Add option `--progress` to show progress-bar even in quiet mode * Fix and refactor `minicurses` * Use `minicurses` for all progress reporting * Standardize use of terminal sequences and enable color support for windows 10 * Add option `--progress-template` to customize progress-bar and console-title * Add postprocessor hooks and progress reporting Closes: #906, #901, #1085, #1170 --- README.md | 11 ++ test/test_YoutubeDL.py | 3 +- yt_dlp/YoutubeDL.py | 77 ++++++---- yt_dlp/__init__.py | 8 +- yt_dlp/compat.py | 7 + yt_dlp/downloader/common.py | 86 ++++++------ yt_dlp/downloader/fragment.py | 4 +- yt_dlp/extractor/common.py | 5 +- yt_dlp/minicurses.py | 178 ++++++++++-------------- yt_dlp/options.py | 20 ++- yt_dlp/postprocessor/common.py | 63 ++++++++- yt_dlp/postprocessor/metadataparser.py | 3 +- yt_dlp/postprocessor/modify_chapters.py | 3 +- yt_dlp/utils.py | 23 +++ 14 files changed, 293 insertions(+), 198 deletions(-) diff --git a/README.md b/README.md index 3c73d3aac..172386553 100644 --- a/README.md +++ b/README.md @@ -604,7 +604,18 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t (Alias: --force-download-archive) --newline Output progress bar as new lines --no-progress Do not print progress bar + --progress Show progress bar, even if in quiet mode --console-title Display progress in console titlebar + --progress-template [TYPES:]TEMPLATE + Template for progress outputs, optionally + prefixed with one of "download:" (default), + "download-title:" (the console title), + "postprocess:", or "postprocess-title:". + The video's fields are accessible under the + "info" key and the progress attributes are + accessible under "progress" key. Eg: + --console-title --progress-template + "download-title:%(info.id)s-%(progress.eta)s" -v, --verbose Print various debugging information --dump-pages Print downloaded pages encoded using base64 to debug problems (very verbose) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 450f25493..06963f7a8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -666,8 +666,7 @@ class TestYoutubeDL(unittest.TestCase): ydl._num_downloads = 1 self.assertEqual(ydl.validate_outtmpl(tmpl), None) - outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, info or self.outtmpl_info) - out = ydl.escape_outtmpl(outtmpl) % tmpl_dict + out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info) fname = ydl.prepare_filename(info or self.outtmpl_info) if not isinstance(expected, (list, tuple)): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 770f62734..1d865161a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -42,6 +42,7 @@ from .compat import ( compat_urllib_error, compat_urllib_request, compat_urllib_request_DataHandler, + windows_enable_vt_mode, ) from .cookies import load_cookies from .utils import ( @@ -67,8 +68,6 @@ from .utils import ( float_or_none, format_bytes, format_field, - STR_FORMAT_RE_TMPL, - STR_FORMAT_TYPES, formatSeconds, GeoRestrictedError, HEADRequest, @@ -101,9 +100,13 @@ from .utils import ( sanitize_url, sanitized_Request, std_headers, + STR_FORMAT_RE_TMPL, + STR_FORMAT_TYPES, str_or_none, strftime_or_none, subtitles_filename, + supports_terminal_sequences, + TERMINAL_SEQUENCES, ThrottledDownload, to_high_limit_path, traverse_obj, @@ -248,6 +251,7 @@ class YoutubeDL(object): rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file clean_infojson: Remove private fields from the infojson @@ -353,6 +357,15 @@ class YoutubeDL(object): Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. + postprocessor_hooks: A list of functions that get called on postprocessing + progress, with a dictionary with the entries + * status: One of "started", "processing", or "finished". + Check this first and ignore unknown values. + * postprocessor: Name of the postprocessor + * info_dict: The extracted info_dict + + Progress hooks are guaranteed to be called at least twice + (with status "started" and "finished") if the processing is successful. merge_output_format: Extension to use when merging formats. final_ext: Expected final extension; used to detect when the file was already downloaded and converted. "merge_output_format" is @@ -412,11 +425,15 @@ class YoutubeDL(object): filename, abort-on-error, multistreams, no-live-chat, no-clean-infojson, no-playlist-metafiles, no-keep-subs. Refer __init__.py for their implementation + progress_template: Dictionary of templates for progress outputs. + Allowed keys are 'download', 'postprocess', + 'download-title' (console title) and 'postprocess-title'. + The template is mapped on a dictionary with keys 'progress' and 'info' The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, - max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, + max_filesize, test, noresizebuffer, retries, continuedl, noprogress, xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size. The following options are used by the post processors: @@ -484,26 +501,27 @@ class YoutubeDL(object): self._first_webpage_request = True self._post_hooks = [] self._progress_hooks = [] + self._postprocessor_hooks = [] self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr - self.params = { - # Default parameters - 'nocheckcertificate': False, - } - self.params.update(params) + self.params = params self.cache = Cache(self) + windows_enable_vt_mode() + self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file) + if sys.version_info < (3, 6): self.report_warning( 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2]) if self.params.get('allow_unplayable_formats'): self.report_warning( - 'You have asked for unplayable formats to be listed/downloaded. ' - 'This is a developer option intended for debugging. ' - 'If you experience any issues while using this option, DO NOT open a bug report') + f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. ' + 'This is a developer option intended for debugging. \n' + ' If you experience any issues while using this option, ' + f'{self._color_text("DO NOT", "red")} open a bug report') def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: @@ -675,9 +693,13 @@ class YoutubeDL(object): self._post_hooks.append(ph) def add_progress_hook(self, ph): - """Add the progress hook (currently only for the file downloader)""" + """Add the download progress hook""" self._progress_hooks.append(ph) + def add_postprocessor_hook(self, ph): + """Add the postprocessing progress hook""" + self._postprocessor_hooks.append(ph) + def _bidi_workaround(self, message): if not hasattr(self, '_output_channel'): return message @@ -790,6 +812,11 @@ class YoutubeDL(object): self.to_stdout( message, skip_eol, quiet=self.params.get('quiet', False)) + def _color_text(self, text, color): + if self.params.get('no_color'): + return text + return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}' + def report_warning(self, message, only_once=False): ''' Print the message to stderr, it will be prefixed with 'WARNING:' @@ -800,24 +827,14 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': - _msg_header = '\033[0;33mWARNING:\033[0m' - else: - _msg_header = 'WARNING:' - warning_message = '%s %s' % (_msg_header, message) - self.to_stderr(warning_message, only_once) + self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once) def report_error(self, message, tb=None): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': - _msg_header = '\033[0;31mERROR:\033[0m' - else: - _msg_header = 'ERROR:' - error_message = '%s %s' % (_msg_header, message) - self.trouble(error_message, tb) + self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb) def write_debug(self, message, only_once=False): '''Log debug message or Print message to stderr''' @@ -919,7 +936,7 @@ class YoutubeDL(object): return err def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): - """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """ + """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList @@ -1073,6 +1090,10 @@ class YoutubeDL(object): return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT + def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): + outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) + return self.escape_outtmpl(outtmpl) % info_dict + def _prepare_filename(self, info_dict, tmpl_type='default'): try: sanitize = lambda k, v: sanitize_filename( @@ -2431,10 +2452,8 @@ class YoutubeDL(object): if self.params.get('forceprint') or self.params.get('forcejson'): self.post_extract(info_dict) for tmpl in self.params.get('forceprint', []): - if re.match(r'\w+$', tmpl): - tmpl = '%({})s'.format(tmpl) - tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict) - self.to_stdout(self.escape_outtmpl(tmpl) % info_copy) + self.to_stdout(self.evaluate_outtmpl( + f'%({tmpl})s' if re.match(r'\w+$', tmpl) else tmpl, info_dict)) print_mandatory('title') print_mandatory('id') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 38e1d0ec6..ade822299 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -302,11 +302,14 @@ def _real_main(argv=None): parser.error('invalid %s %r: %s' % (msg, tmpl, error_to_compat_str(err))) for k, tmpl in opts.outtmpl.items(): - validate_outtmpl(tmpl, '%s output template' % k) + validate_outtmpl(tmpl, f'{k} output template') opts.forceprint = opts.forceprint or [] for tmpl in opts.forceprint or []: validate_outtmpl(tmpl, 'print template') validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') + for k, tmpl in opts.progress_template.items(): + k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' + validate_outtmpl(tmpl, f'{k} template') if opts.extractaudio and not opts.keepvideo and opts.format is None: opts.format = 'bestaudio/best' @@ -633,8 +636,9 @@ def _real_main(argv=None): 'noresizebuffer': opts.noresizebuffer, 'http_chunk_size': opts.http_chunk_size, 'continuedl': opts.continue_dl, - 'noprogress': opts.noprogress, + 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, 'progress_with_newline': opts.progress_with_newline, + 'progress_template': opts.progress_template, 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'playlistreverse': opts.playlist_reverse, diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 9bf05c737..b107b2114 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -159,6 +159,12 @@ except ImportError: compat_pycrypto_AES = None +def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 + if compat_os_name != 'nt': + return + os.system('') + + # Deprecated compat_basestring = str @@ -281,5 +287,6 @@ __all__ = [ 'compat_xml_parse_error', 'compat_xpath', 'compat_zip', + 'windows_enable_vt_mode', 'workaround_optparse_bug9161', ] diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index bb0614037..50e674829 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -7,7 +7,6 @@ import sys import time import random -from ..compat import compat_os_name from ..utils import ( decodeArgument, encodeFilename, @@ -17,6 +16,7 @@ from ..utils import ( timeconvert, ) from ..minicurses import ( + MultilineLogger, MultilinePrinter, QuietMultilinePrinter, BreaklineStatusPrinter @@ -44,8 +44,6 @@ class FileDownloader(object): noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. test: Download only first bytes to test the downloader. @@ -61,6 +59,7 @@ class FileDownloader(object): http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be useful for bypassing bandwidth throttling imposed by a webserver (experimental) + progress_template: See YoutubeDL.py Subclasses of this one must re-define the real_download method. """ @@ -73,7 +72,7 @@ class FileDownloader(object): self.ydl = ydl self._progress_hooks = [] self.params = params - self._multiline = None + self._prepare_multiline_status() self.add_progress_hook(self.report_progress) @staticmethod @@ -242,55 +241,46 @@ class FileDownloader(object): """Report destination filename.""" self.to_screen('[download] Destination: ' + filename) - def _prepare_multiline_status(self, lines): - if self.params.get('quiet'): + def _prepare_multiline_status(self, lines=1): + if self.params.get('noprogress'): self._multiline = QuietMultilinePrinter() - elif self.params.get('progress_with_newline', False): + elif self.ydl.params.get('logger'): + self._multiline = MultilineLogger(self.ydl.params['logger'], lines) + elif self.params.get('progress_with_newline'): self._multiline = BreaklineStatusPrinter(sys.stderr, lines) - elif self.params.get('noprogress', False): - self._multiline = None else: - self._multiline = MultilinePrinter(sys.stderr, lines) + self._multiline = MultilinePrinter(sys.stderr, lines, not self.params.get('quiet')) def _finish_multiline_status(self): - if self._multiline is not None: - self._multiline.end() - - def _report_progress_status(self, msg, is_last_line=False, progress_line=None): - fullmsg = '[download] ' + msg - if self.params.get('progress_with_newline', False): - self.to_screen(fullmsg) - elif progress_line is not None and self._multiline is not None: - self._multiline.print_at_line(fullmsg, progress_line) - else: - if compat_os_name == 'nt' or not sys.stderr.isatty(): - prev_len = getattr(self, '_report_progress_prev_line_length', 0) - if prev_len > len(fullmsg): - fullmsg += ' ' * (prev_len - len(fullmsg)) - self._report_progress_prev_line_length = len(fullmsg) - clear_line = '\r' - else: - clear_line = '\r\x1b[K' - self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) - self.to_console_title('yt-dlp ' + msg) + self._multiline.end() + + def _report_progress_status(self, s): + progress_dict = s.copy() + progress_dict.pop('info_dict') + progress_dict = {'info': s['info_dict'], 'progress': progress_dict} + + progress_template = self.params.get('progress_template', {}) + self._multiline.print_at_line(self.ydl.evaluate_outtmpl( + progress_template.get('download') or '[download] %(progress._default_template)s', + progress_dict), s.get('progress_idx') or 0) + self.to_console_title(self.ydl.evaluate_outtmpl( + progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', + progress_dict)) def report_progress(self, s): if s['status'] == 'finished': - if self.params.get('noprogress', False): + if self.params.get('noprogress'): self.to_screen('[download] Download completed') - else: - msg_template = '100%%' - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template += ' of %(_total_bytes_str)s' - if s.get('elapsed') is not None: - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template += ' in %(_elapsed_str)s' - self._report_progress_status( - msg_template % s, is_last_line=True, progress_line=s.get('progress_idx')) - return - - if self.params.get('noprogress'): + msg_template = '100%%' + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template += ' of %(_total_bytes_str)s' + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template += ' in %(_elapsed_str)s' + s['_percent_str'] = self.format_percent(100) + s['_default_template'] = msg_template % s + self._report_progress_status(s) return if s['status'] != 'downloading': @@ -332,8 +322,8 @@ class FileDownloader(object): msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' else: msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' - - self._report_progress_status(msg_template % s, progress_line=s.get('progress_idx')) + s['_default_template'] = msg_template % s + self._report_progress_status(s) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" @@ -405,7 +395,9 @@ class FileDownloader(object): '[download] Sleeping %s seconds ...' % ( sleep_interval_sub)) time.sleep(sleep_interval_sub) - return self.real_download(filename, info_dict), True + ret = self.real_download(filename, info_dict) + self._finish_multiline_status() + return ret, True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 22134f3b6..6a490131b 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -393,9 +393,7 @@ class FragmentFD(FileDownloader): result = result and job.result() finally: tpe.shutdown(wait=True) - - self._finish_multiline_status() - return True + return result def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None): fragment_retries = self.params.get('fragment_retries', 0) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f65a098d7..4f940730a 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1134,10 +1134,7 @@ class InfoExtractor(object): if mobj: break - if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty(): - _name = '\033[0;34m%s\033[0m' % name - else: - _name = name + _name = self._downloader._color_text(name, 'blue') if mobj: if group is None: diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index 74ad891c9..a466fb4b0 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,10 +1,12 @@ -import os - from threading import Lock -from .utils import compat_os_name, get_windows_version +from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES + +class MultilinePrinterBase: + def __init__(self, stream=None, lines=1): + self.stream = stream + self.maximum = lines - 1 -class MultilinePrinterBase(): def __enter__(self): return self @@ -17,119 +19,87 @@ class MultilinePrinterBase(): def end(self): pass + def _add_line_number(self, text, line): + if self.maximum: + return f'{line + 1}: {text}' + return text -class MultilinePrinter(MultilinePrinterBase): - def __init__(self, stream, lines): - """ - @param stream stream to write to - @lines number of lines to be written - """ - self.stream = stream +class QuietMultilinePrinter(MultilinePrinterBase): + pass - is_win10 = compat_os_name == 'nt' and get_windows_version() >= (10, ) - self.CARRIAGE_RETURN = '\r' - if os.getenv('TERM') and self._isatty() or is_win10: - # reason not to use curses https://github.com/yt-dlp/yt-dlp/pull/1036#discussion_r713851492 - # escape sequences for Win10 https://docs.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences - self.UP = '\x1b[A' - self.DOWN = '\n' - self.ERASE_LINE = '\x1b[K' - self._HAVE_FULLCAP = self._isatty() or is_win10 - else: - self.UP = self.DOWN = self.ERASE_LINE = None - self._HAVE_FULLCAP = False - # lines are numbered from top to bottom, counting from 0 to self.maximum - self.maximum = lines - 1 - self.lastline = 0 - self.lastlength = 0 +class MultilineLogger(MultilinePrinterBase): + def print_at_line(self, text, pos): + # stream is the logger object, not an actual stream + self.stream.debug(self._add_line_number(text, pos)) - self.movelock = Lock() - @property - def have_fullcap(self): - """ - True if the TTY is allowing to control cursor, - so that multiline progress works - """ - return self._HAVE_FULLCAP +class BreaklineStatusPrinter(MultilinePrinterBase): + def print_at_line(self, text, pos): + self.stream.write(self._add_line_number(text, pos) + '\n') - def _isatty(self): - try: - return self.stream.isatty() - except BaseException: - return False + +class MultilinePrinter(MultilinePrinterBase): + def __init__(self, stream=None, lines=1, preserve_output=True): + super().__init__(stream, lines) + self.preserve_output = preserve_output + self._lastline = self._lastlength = 0 + self._movelock = Lock() + self._HAVE_FULLCAP = supports_terminal_sequences(self.stream) + + def lock(func): + def wrapper(self, *args, **kwargs): + with self._movelock: + return func(self, *args, **kwargs) + return wrapper def _move_cursor(self, dest): - current = min(self.lastline, self.maximum) - self.stream.write(self.CARRIAGE_RETURN) - if current == dest: - # current and dest are at same position, no need to move cursor + current = min(self._lastline, self.maximum) + self.stream.write('\r') + distance = dest - current + if distance < 0: + self.stream.write(TERMINAL_SEQUENCES['UP'] * -distance) + elif distance > 0: + self.stream.write(TERMINAL_SEQUENCES['DOWN'] * distance) + self._lastline = dest + + @lock + def print_at_line(self, text, pos): + if self._HAVE_FULLCAP: + self._move_cursor(pos) + self.stream.write(TERMINAL_SEQUENCES['ERASE_LINE']) + self.stream.write(text) return - elif current > dest: - # when maximum == 2, - # 0. dest - # 1. - # 2. current - self.stream.write(self.UP * (current - dest)) - elif current < dest: - # when maximum == 2, - # 0. current - # 1. - # 2. dest - self.stream.write(self.DOWN * (dest - current)) - self.lastline = dest - def print_at_line(self, text, pos): - with self.movelock: - if self.have_fullcap: - self._move_cursor(pos) - self.stream.write(self.ERASE_LINE) - self.stream.write(text) - else: - if self.maximum != 0: - # let user know about which line is updating the status - text = f'{pos + 1}: {text}' - textlen = len(text) - if self.lastline == pos: - # move cursor at the start of progress when writing to same line - self.stream.write(self.CARRIAGE_RETURN) - if self.lastlength > textlen: - text += ' ' * (self.lastlength - textlen) - self.lastlength = textlen - else: - # otherwise, break the line - self.stream.write('\n') - self.lastlength = 0 - self.stream.write(text) - self.lastline = pos + text = self._add_line_number(text, pos) + textlen = len(text) + if self._lastline == pos: + # move cursor at the start of progress when writing to same line + self.stream.write('\r') + if self._lastlength > textlen: + text += ' ' * (self._lastlength - textlen) + self._lastlength = textlen + else: + # otherwise, break the line + self.stream.write('\n') + self._lastlength = textlen + self.stream.write(text) + self._lastline = pos + @lock def end(self): - with self.movelock: - # move cursor to the end of the last line, and write line break - # so that other to_screen calls can precede + # move cursor to the end of the last line, and write line break + # so that other to_screen calls can precede + if self._HAVE_FULLCAP: self._move_cursor(self.maximum) + if self.preserve_output: self.stream.write('\n') + return - -class QuietMultilinePrinter(MultilinePrinterBase): - def __init__(self): - self.have_fullcap = True - - -class BreaklineStatusPrinter(MultilinePrinterBase): - - def __init__(self, stream, lines): - """ - @param stream stream to write to - """ - self.stream = stream - self.maximum = lines - self.have_fullcap = True - - def print_at_line(self, text, pos): - if self.maximum != 0: - # let user know about which line is updating the status - text = f'{pos + 1}: {text}' - self.stream.write(text + '\n') + if self._HAVE_FULLCAP: + self.stream.write( + TERMINAL_SEQUENCES['ERASE_LINE'] + + f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum) + else: + self.stream.write(' ' * self._lastlength) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index be43f37ee..4652e8c58 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -910,12 +910,30 @@ def parseOpts(overrideArguments=None): help='Output progress bar as new lines') verbosity.add_option( '--no-progress', - action='store_true', dest='noprogress', default=False, + action='store_true', dest='noprogress', default=None, help='Do not print progress bar') + verbosity.add_option( + '--progress', + action='store_false', dest='noprogress', + help='Show progress bar, even if in quiet mode') verbosity.add_option( '--console-title', action='store_true', dest='consoletitle', default=False, help='Display progress in console titlebar') + verbosity.add_option( + '--progress-template', + metavar='[TYPES:]TEMPLATE', dest='progress_template', default={}, type='str', + action='callback', callback=_dict_from_options_callback, + callback_kwargs={ + 'allowed_keys': '(download|postprocess)(-title)?', + 'default_key': 'download' + }, help=( + 'Template for progress outputs, optionally prefixed with one of "download:" (default), ' + '"download-title:" (the console title), "postprocess:", or "postprocess-title:". ' + 'The video\'s fields are accessible under the "info" key and ' + 'the progress attributes are accessible under "progress" key. Eg: ' + # TODO: Document the fields inside "progress" + '--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"')) verbosity.add_option( '-v', '--verbose', action='store_true', dest='verbose', default=False, diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index d8ec997d9..376a1c95e 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import copy import functools import os @@ -11,7 +12,26 @@ from ..utils import ( ) -class PostProcessor(object): +class PostProcessorMetaClass(type): + @staticmethod + def run_wrapper(func): + @functools.wraps(func) + def run(self, info, *args, **kwargs): + self._hook_progress({'status': 'started'}, info) + ret = func(self, info, *args, **kwargs) + if ret is not None: + _, info = ret + self._hook_progress({'status': 'finished'}, info) + return ret + return run + + def __new__(cls, name, bases, attrs): + if 'run' in attrs: + attrs['run'] = cls.run_wrapper(attrs['run']) + return type.__new__(cls, name, bases, attrs) + + +class PostProcessor(metaclass=PostProcessorMetaClass): """Post Processor class. PostProcessor objects can be added to downloaders with their @@ -34,7 +54,9 @@ class PostProcessor(object): _downloader = None def __init__(self, downloader=None): - self._downloader = downloader + self._progress_hooks = [] + self.add_progress_hook(self.report_progress) + self.set_downloader(downloader) self.PP_NAME = self.pp_key() @classmethod @@ -68,6 +90,10 @@ class PostProcessor(object): def set_downloader(self, downloader): """Sets the downloader for this PP.""" self._downloader = downloader + if not downloader: + return + for ph in downloader._postprocessor_hooks: + self.add_progress_hook(ph) @staticmethod def _restrict_to(*, video=True, audio=True, images=True): @@ -115,6 +141,39 @@ class PostProcessor(object): return _configuration_args( self.pp_key(), self.get_param('postprocessor_args'), exe, *args, **kwargs) + def _hook_progress(self, status, info_dict): + if not self._progress_hooks: + return + info_dict = dict(info_dict) + for key in ('__original_infodict', '__postprocessors'): + info_dict.pop(key, None) + status.update({ + 'info_dict': copy.deepcopy(info_dict), + 'postprocessor': self.pp_key(), + }) + for ph in self._progress_hooks: + ph(status) + + def add_progress_hook(self, ph): + # See YoutubeDl.py (search for postprocessor_hooks) for a description of this interface + self._progress_hooks.append(ph) + + def report_progress(self, s): + s['_default_template'] = '%(postprocessor)s %(status)s' % s + + progress_dict = s.copy() + progress_dict.pop('info_dict') + progress_dict = {'info': s['info_dict'], 'progress': progress_dict} + + progress_template = self.get_param('progress_template', {}) + tmpl = progress_template.get('postprocess') + if tmpl: + self._downloader.to_stdout(self._downloader.evaluate_outtmpl(tmpl, progress_dict)) + + self._downloader.to_console_title(self._downloader.evaluate_outtmpl( + progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s', + progress_dict)) + class AudioConversionError(PostProcessingError): pass diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py index f7b0d8bde..96aac9beb 100644 --- a/yt_dlp/postprocessor/metadataparser.py +++ b/yt_dlp/postprocessor/metadataparser.py @@ -62,8 +62,7 @@ class MetadataParserPP(PostProcessor): def interpretter(self, inp, out): def f(info): - outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(template, info) - data_to_parse = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict + data_to_parse = self._downloader.evaluate_outtmpl(template, info) self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}') match = out_re.search(data_to_parse) if match is None: diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 2871e16d5..72a705fc5 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -292,8 +292,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): 'name': SponsorBlockPP.CATEGORIES[category], 'category_names': [SponsorBlockPP.CATEGORIES[c] for c in cats] }) - outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(self._sponsorblock_chapter_title, c) - c['title'] = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict + c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c) # Merge identically named sponsors. if (new_chapters and 'categories' in new_chapters[-1] and new_chapters[-1]['title'] == c['title']): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 8b5b15103..027387897 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6440,3 +6440,26 @@ def jwt_encode_hs256(payload_data, key, headers={}): signature_b64 = base64.b64encode(h.digest()) token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64 return token + + +def supports_terminal_sequences(stream): + if compat_os_name == 'nt': + if get_windows_version() < (10, ): + return False + elif not os.getenv('TERM'): + return False + try: + return stream.isatty() + except BaseException: + return False + + +TERMINAL_SEQUENCES = { + 'DOWN': '\n', + 'UP': '\x1b[A', + 'ERASE_LINE': '\x1b[K', + 'RED': '\033[0;31m', + 'YELLOW': '\033[0;33m', + 'BLUE': '\033[0;34m', + 'RESET_STYLE': '\033[0m', +} From c08b8873eae857b0bbd2a3cfee402253c02a3180 Mon Sep 17 00:00:00 2001 From: Damiano Amatruda <damiano.amatruda@outlook.com> Date: Fri, 8 Oct 2021 21:36:27 +0200 Subject: [PATCH 0020/2552] [ciscowebex] Add extractor (#1199) Authored by: damianoamatruda --- yt_dlp/extractor/ciscowebex.py | 90 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 91 insertions(+) create mode 100644 yt_dlp/extractor/ciscowebex.py diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py new file mode 100644 index 000000000..882dae91b --- /dev/null +++ b/yt_dlp/extractor/ciscowebex.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + unified_timestamp, +) + + +class CiscoWebexIE(InfoExtractor): + IE_NAME = 'ciscowebex' + IE_DESC = 'Cisco Webex' + _VALID_URL = r'''(?x) + (?P<url>https?://(?P<subdomain>[^/#?]*)\.webex\.com/(?: + (?P<siteurl_1>[^/#?]*)/(?:ldr|lsr).php\?(?:[^#]*&)*RCID=(?P<rcid>[0-9a-f]{32})| + (?:recordingservice|webappng)/sites/(?P<siteurl_2>[^/#?]*)/recording/(?:playback/|play/)?(?P<id>[0-9a-f]{32}) + ))''' + + _TESTS = [{ + 'url': 'https://demosubdomain.webex.com/demositeurl/ldr.php?RCID=e58e803bc0f766bb5f6376d2e86adb5b', + 'only_matching': True, + }, { + 'url': 'http://demosubdomain.webex.com/demositeurl/lsr.php?RCID=bc04b4a7b5ea2cc3a493d5ae6aaff5d7', + 'only_matching': True, + }, { + 'url': 'https://demosubdomain.webex.com/recordingservice/sites/demositeurl/recording/88e7a42f7b19f5b423c54754aecc2ce9/playback', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + rcid = mobj.group('rcid') + if rcid: + webpage = self._download_webpage(url, None, note='Getting video ID') + url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url') + url = self._request_webpage(url, None, note='Resolving final URL').geturl() + mobj = self._match_valid_url(url) + subdomain = mobj.group('subdomain') + siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2') + video_id = mobj.group('id') + + stream = self._download_json( + 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), + video_id, fatal=False, query={'siteurl': siteurl}) + if not stream: + self.raise_login_required(method='cookies') + + video_id = stream.get('recordUUID') or video_id + + formats = [{ + 'format_id': 'video', + 'url': stream['fallbackPlaySrc'], + 'ext': 'mp4', + 'vcodec': 'avc1.640028', + 'acodec': 'mp4a.40.2', + }] + if stream.get('preventDownload') is False: + mp4url = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['mp4URL']) + if mp4url: + formats.append({ + 'format_id': 'video', + 'url': mp4url, + 'ext': 'mp4', + 'vcodec': 'avc1.640028', + 'acodec': 'mp4a.40.2', + }) + audiourl = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['audioURL']) + if audiourl: + formats.append({ + 'format_id': 'audio', + 'url': audiourl, + 'ext': 'mp3', + 'vcodec': 'none', + 'acodec': 'mp3', + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': stream['recordName'], + 'description': stream.get('description'), + 'uploader': stream.get('ownerDisplayName'), + 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), # mail or id + 'timestamp': unified_timestamp(stream.get('createTime')), + 'duration': int_or_none(stream.get('duration'), 1000), + 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), + 'formats': formats, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 8c5b8b160..a224c4f9a 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -258,6 +258,7 @@ from .ciscolive import ( CiscoLiveSessionIE, CiscoLiveSearchIE, ) +from .ciscowebex import CiscoWebexIE from .cjsw import CJSWIE from .cliphunter import CliphunterIE from .clippit import ClippitIE From ac56cf38a463f0e21e3a3ec89572fcd1cade1563 Mon Sep 17 00:00:00 2001 From: coletdjnz <colethedj@protonmail.com> Date: Sat, 9 Oct 2021 10:19:25 +1300 Subject: [PATCH 0021/2552] [youtube:tab] Fallback to API when webpage fails to download (#1122) and add some extractor_args to force this mode Authored by: coletdjnz --- README.md | 3 + yt_dlp/extractor/youtube.py | 227 ++++++++++++++++++++++++++---------- 2 files changed, 171 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 172386553..ff117663a 100644 --- a/README.md +++ b/README.md @@ -1483,6 +1483,9 @@ The following extractors use this feature: * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side). * `max_comments`: Maximum amount of comments to download (default all). * `max_comment_depth`: Maximum depth for nested comments. YouTube supports depths 1 or 2 (default). +* **youtubetab** + (YouTube playlists, channels, feeds, etc.) + * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * **funimation** * `language`: Languages to extract. Eg: `funimation:language=english,japanese` diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 56de2ef59..97d02dc0b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -579,12 +579,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): data=json.dumps(data).encode('utf8'), headers=real_headers, query={'key': api_key or self._extract_api_key()}) - def extract_yt_initial_data(self, video_id, webpage): - return self._parse_json( - self._search_regex( - (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE), - self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'), - video_id) + def extract_yt_initial_data(self, item_id, webpage, fatal=True): + data = self._search_regex( + (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE), + self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal) + if data: + return self._parse_json(data, item_id, fatal=fatal) @staticmethod def _extract_session_index(*data): @@ -627,6 +627,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # and just "user_syncid||" for primary channel. We only want the channel_syncid return sync_ids[0] + @staticmethod + def _extract_visitor_data(*args): + """ + Extracts visitorData from an API response or ytcfg + Appears to be used to track session state + """ + return traverse_obj( + args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), + expected_type=compat_str, get_all=False) + @property def is_authenticated(self): return bool(self._generate_sapisidhash_header()) @@ -651,8 +661,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'Origin': origin, 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), - 'X-Goog-Visitor-Id': visitor_data or try_get( - self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str) + 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg) } if session_index is None: session_index = self._extract_session_index(ytcfg) @@ -826,9 +835,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return else: - # Youtube may send alerts if there was an issue with the continuation page try: - self._extract_and_report_alerts(response, expected=False, only_once=True) + self._extract_and_report_alerts(response, only_once=True) except ExtractorError as e: # YouTube servers may return errors we want to retry on in a 200 OK response # See: https://github.com/yt-dlp/yt-dlp/issues/839 @@ -3549,7 +3557,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/feed/watch_later', 'only_matching': True, }, { - 'note': 'Recommended - redirects to home page', + 'note': 'Recommended - redirects to home page.', 'url': 'https://www.youtube.com/feed/recommended', 'only_matching': True, }, { @@ -3646,6 +3654,51 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'availability': 'unlisted' }, 'playlist_count': 1, + }, { + 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', + 'url': 'https://www.youtube.com/feed/recommended', + 'info_dict': { + 'id': 'recommended', + 'title': 'recommended', + }, + 'playlist_mincount': 50, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }, { + 'note': 'API Fallback: /videos tab, sorted by oldest first', + 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid', + 'info_dict': { + 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', + 'title': 'Cody\'sLab - Videos', + 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa', + 'uploader': 'Cody\'sLab', + 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', + }, + 'playlist_mincount': 650, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }, { + 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', + 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', + 'info_dict': { + 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', + 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', + 'title': 'Uploads from Royalty Free Music - Topic', + 'uploader': 'Royalty Free Music - Topic', + }, + 'expected_warnings': [ + 'A channel/user page was given', + 'The URL does not have a videos tab', + ], + 'playlist_mincount': 101, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, }] @classmethod @@ -3834,7 +3887,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if entry: yield entry ''' - def _entries(self, tab, item_id, account_syncid, ytcfg): + def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] @@ -3876,7 +3929,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if not continuation_list[0]: continuation_list[0] = self._extract_continuation(parent_renderer) - continuation_list = [None] # Python 2 doesnot support nonlocal + continuation_list = [None] # Python 2 does not support nonlocal tab_content = try_get(tab, lambda x: x['content'], dict) if not tab_content: return @@ -3886,7 +3939,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): for entry in extract_entries(parent_renderer): yield entry continuation = continuation_list[0] - visitor_data = None for page_num in itertools.count(1): if not continuation: @@ -3900,8 +3952,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if not response: break - visitor_data = try_get( - response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data + # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases + # See: https://github.com/ytdl-org/youtube-dl/issues/28702 + visitor_data = self._extract_visitor_data(response) or visitor_data known_continuation_renderers = { 'playlistVideoListContinuation': self._playlist_entries, @@ -3975,9 +4028,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str)) return {k: v for k, v in uploader.items() if v is not None} - def _extract_from_tabs(self, item_id, webpage, data, tabs): + def _extract_from_tabs(self, item_id, ytcfg, data, tabs): playlist_id = title = description = channel_url = channel_name = channel_id = None - thumbnails_list = tags = [] + thumbnails_list = [] + tags = [] selected_tab = self._extract_selected_tab(tabs) renderer = try_get( @@ -4042,18 +4096,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'channel': metadata['uploader'], 'channel_id': metadata['uploader_id'], 'channel_url': metadata['uploader_url']}) - ytcfg = self.extract_ytcfg(item_id, webpage) return self.playlist_result( self._entries( - selected_tab, playlist_id, - self._extract_account_syncid(ytcfg, data), ytcfg), + selected_tab, playlist_id, ytcfg, + self._extract_account_syncid(ytcfg, data), + self._extract_visitor_data(data, ytcfg)), **metadata) - def _extract_mix_playlist(self, playlist, playlist_id, data, webpage): - first_id = last_id = None - ytcfg = self.extract_ytcfg(playlist_id, webpage) - headers = self.generate_api_headers( - ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data)) + def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg): + first_id = last_id = response = None for page_num in itertools.count(1): videos = list(self._playlist_entries(playlist)) if not videos: @@ -4070,6 +4121,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): last_id = videos[-1]['id'] watch_endpoint = try_get( playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint']) + headers = self.generate_api_headers( + ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), + visitor_data=self._extract_visitor_data(response, data, ytcfg)) query = { 'playlistId': playlist_id, 'videoId': watch_endpoint.get('videoId') or last_id, @@ -4084,7 +4138,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): playlist = try_get( response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) - def _extract_from_playlist(self, item_id, url, data, playlist, webpage): + def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg): title = playlist.get('title') or try_get( data, lambda x: x['titleText']['simpleText'], compat_str) playlist_id = playlist.get('playlistId') or item_id @@ -4099,7 +4153,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): video_title=title) return self.playlist_result( - self._extract_mix_playlist(playlist, playlist_id, data, webpage), + self._extract_mix_playlist(playlist, playlist_id, data, ytcfg), playlist_id=playlist_id, playlist_title=title) def _extract_availability(self, data): @@ -4143,7 +4197,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if renderer: return renderer - def _reload_with_unavailable_videos(self, item_id, data, webpage): + def _reload_with_unavailable_videos(self, item_id, data, ytcfg): """ Get playlist with unavailable videos if the 'show unavailable videos' button exists. """ @@ -4167,10 +4221,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): params = browse_endpoint.get('params') break - ytcfg = self.extract_ytcfg(item_id, webpage) headers = self.generate_api_headers( ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), - visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str)) + visitor_data=self._extract_visitor_data(data, ytcfg)) query = { 'params': params or 'wgYCCAA=', 'browseId': browse_id or 'VL%s' % item_id @@ -4180,28 +4233,87 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): check_get_keys='contents', fatal=False, ytcfg=ytcfg, note='Downloading API JSON with unavailable videos') - def _extract_webpage(self, url, item_id): + def _extract_webpage(self, url, item_id, fatal=True): retries = self.get_param('extractor_retries', 3) count = -1 - last_error = 'Incomplete yt initial data recieved' + webpage = data = last_error = None while count < retries: count += 1 # Sometimes youtube returns a webpage with incomplete ytInitialData # See: https://github.com/yt-dlp/yt-dlp/issues/116 - if count: + if last_error: self.report_warning('%s. Retrying ...' % last_error) - webpage = self._download_webpage( - url, item_id, - 'Downloading webpage%s' % (' (retry #%d)' % count if count else '')) - data = self.extract_yt_initial_data(item_id, webpage) - if data.get('contents') or data.get('currentVideoEndpoint'): + try: + webpage = self._download_webpage( + url, item_id, + note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',)) + data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} + except ExtractorError as e: + if isinstance(e.cause, network_exceptions): + if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429): + last_error = error_to_compat_str(e.cause or e.msg) + if count < retries: + continue + if fatal: + raise + self.report_warning(error_to_compat_str(e)) break - # Extract alerts here only when there is error - self._extract_and_report_alerts(data) - if count >= retries: - raise ExtractorError(last_error) + else: + try: + self._extract_and_report_alerts(data) + except ExtractorError as e: + if fatal: + raise + self.report_warning(error_to_compat_str(e)) + break + + if dict_get(data, ('contents', 'currentVideoEndpoint')): + break + + last_error = 'Incomplete yt initial data received' + if count >= retries: + if fatal: + raise ExtractorError(last_error) + self.report_warning(last_error) + break + return webpage, data + def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'): + data = None + if 'webpage' not in self._configuration_arg('skip'): + webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) + ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) + if not data: + if not ytcfg and self.is_authenticated: + msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' + if 'authcheck' not in self._configuration_arg('skip') and fatal: + raise ExtractorError( + msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,' + ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', + expected=True) + self.report_warning(msg, only_once=True) + data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client) + return data, ytcfg + + def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'): + headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client) + resolve_response = self._extract_response( + item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal, + ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client) + endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'} + for ep_key, ep in endpoints.items(): + params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict) + if params: + return self._extract_response( + item_id=item_id, query=params, ep=ep, headers=headers, + ytcfg=ytcfg, fatal=fatal, default_client=default_client, + check_get_keys=('contents', 'currentVideoEndpoint')) + err_note = 'Failed to resolve url (does the playlist exist?)' + if fatal: + raise ExtractorError(err_note, expected=True) + self.report_warning(err_note, item_id) + @staticmethod def _smuggle_data(entries, data): for entry in entries: @@ -4234,7 +4346,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): mobj = get_mobj(url) # Youtube returns incomplete data if tabname is not lower case pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel'] - if is_channel: if smuggled_data.get('is_music_url'): if item_id[:2] == 'VL': @@ -4242,12 +4353,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): item_id = item_id[2:] pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False elif item_id[:2] == 'MP': - # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage - item_id = self._search_regex( - r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22', - self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id), - 'playlist id') - pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False + # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist + mdata = self._extract_tab_endpoint( + 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music') + murl = traverse_obj( + mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str) + if not murl: + raise ExtractorError('Failed to resolve album to playlist.') + return self.url_result(murl, ie=YoutubeTabIE.ie_key()) elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/ pre = 'https://www.youtube.com/channel/%s' % item_id @@ -4281,7 +4394,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id) self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id)) - webpage, data = self._extract_webpage(url, item_id) + data, ytcfg = self._extract_data(url, item_id) tabs = try_get( data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) @@ -4299,11 +4412,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): pl_id = 'UU%s' % item_id[2:] pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post']) try: - pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id) - for alert_type, alert_message in self._extract_alerts(pl_data): - if alert_type == 'error': - raise ExtractorError('Youtube said: %s' % alert_message) - item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data + data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url except ExtractorError: self.report_warning('The playlist gave error. Falling back to channel URL') else: @@ -4313,17 +4422,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): # YouTube sometimes provides a button to reload playlist with unavailable videos. if 'no-youtube-unavailable-videos' not in compat_opts: - data = self._reload_with_unavailable_videos(item_id, data, webpage) or data + data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data self._extract_and_report_alerts(data, only_once=True) tabs = try_get( data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) if tabs: - return self._extract_from_tabs(item_id, webpage, data, tabs) + return self._extract_from_tabs(item_id, ytcfg, data, tabs) playlist = try_get( data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) if playlist: - return self._extract_from_playlist(item_id, url, data, playlist, webpage) + return self._extract_from_playlist(item_id, url, data, playlist, ytcfg) video_id = try_get( data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'], From 8c6f4daa4c7c54df600bf4990bd91ca381fbd8f3 Mon Sep 17 00:00:00 2001 From: timethrow <39486242+timethrow@users.noreply.github.com> Date: Sat, 9 Oct 2021 02:08:01 +0100 Subject: [PATCH 0022/2552] [docs] Write embedding and contributing documentation (#528) Authored by: pukkandan, timethrow --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- CONTRIBUTING.md | 271 ++++++++++++++++++++++--------- README.md | 86 +++++++++- devscripts/make_contributing.py | 21 +-- 4 files changed, 292 insertions(+), 88 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1bcac69da..7ef08d68a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,7 +7,7 @@ --- ### Before submitting a *pull request* make sure you have: -- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections +- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-support-for-a-new-site) and [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) sections - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5faf97b10..7aaf6a52b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,26 +1,59 @@ -**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: -``` -$ youtube-dl -v <your command line> -[debug] System config: [] -[debug] User config: [] -[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKc'] -[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2015.12.06 -[debug] Git HEAD: 135392e -[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2 -[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +# CONTRIBUTING TO YT-DLP + +- [OPENING AN ISSUE](#opening-an-issue) + - [Is the description of the issue itself sufficient?](#is-the-description-of-the-issue-itself-sufficient) + - [Are you using the latest version?](#are-you-using-the-latest-version) + - [Is the issue already documented?](#is-the-issue-already-documented) + - [Why are existing options not enough?](#why-are-existing-options-not-enough) + - [Have you read and understood the changes, between youtube-dl and yt-dlp](#have-you-read-and-understood-the-changes-between-youtube-dl-and-yt-dlp) + - [Is there enough context in your bug report?](#is-there-enough-context-in-your-bug-report) + - [Does the issue involve one problem, and one problem only?](#does-the-issue-involve-one-problem-and-one-problem-only) + - [Is anyone going to need the feature?](#is-anyone-going-to-need-the-feature) + - [Is your question about yt-dlp?](#is-your-question-about-yt-dlp) +- [DEVELOPER INSTRUCTIONS](#developer-instructions) + - [Adding new feature or making overarching changes](#adding-new-feature-or-making-overarching-changes) + - [Adding support for a new site](#adding-support-for-a-new-site) + - [yt-dlp coding conventions](#yt-dlp-coding-conventions) + - [Mandatory and optional metafields](#mandatory-and-optional-metafields) + - [Provide fallbacks](#provide-fallbacks) + - [Regular expressions](#regular-expressions) + - [Long lines policy](#long-lines-policy) + - [Inline values](#inline-values) + - [Collapse fallbacks](#collapse-fallbacks) + - [Trailing parentheses](#trailing-parentheses) + - [Use convenience conversion and parsing functions](#use-convenience-conversion-and-parsing-functions) +- [EMBEDDING YT-DLP](README.md#embedding-yt-dlp) + + + +# OPENING AN ISSUE + +Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://github.com/yt-dlp/yt-dlp/issues). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in our [discord server](https://discord.gg/H5MNcFW63r). + +**Please include the full output of yt-dlp when run with `-Uv`**, i.e. **add** `-Uv` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: +``` +$ yt-dlp -Uv <your command line> +[debug] Command-line config: ['-v', 'demo.com'] +[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 +[debug] yt-dlp version 2021.09.25 (zip) +[debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29 +[debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 [debug] Proxy map: {} +Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535 +yt-dlp is up to date (2021.09.25) ... ``` **Do not post screenshots of verbose logs; only plain text is acceptable.** -The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. +The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore will be closed as `incomplete`. + +The templates provided for the Issues, should be completed and **not removed**, this helps aide the resolution of the issue. Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): ### Is the description of the issue itself sufficient? -We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts. +We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious @@ -28,25 +61,31 @@ So please elaborate on what feature you are requesting, or what bug you want to - How it could be fixed - How your proposed solution would look like -If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. +If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. We often get frustrated by these issues, since the only possible way for us to move forward on them is to ask for clarification over and over. -For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. +For bug reports, this means that your report should contain the **complete** output of yt-dlp when called with the `-Uv` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. -If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). +If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--write-pages` and upload the `.dump` files you get [somewhere](https://gist.github.com). **Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. ### Are you using the latest version? -Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. +Before reporting any issue, type `yt-dlp -U`. This should report that you're up-to-date. This goes for feature requests as well. ### Is the issue already documented? -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/yt-dlp/yt-dlp/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2021.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. + +Additionally, it is also helpful to see if the issue has already been documented in the [youtube-dl issue tracker](https://github.com/ytdl-org/youtube-dl/issues). If similar issues have already been reported in youtube-dl (but not in our issue tracker), links to them can be included in your issue report here. ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](README.md#usage-and-options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. + +### Have you read and understood the changes, between youtube-dl and yt-dlp + +There are many changes between youtube-dl and yt-dlp [(changes to default behavior)](README.md#differences-in-default-behavior), and some of the options available have a different behaviour in yt-dlp, or have been removed all together [(list of changes to options)](README.md#deprecated-options). Make sure you have read and understand the differences in the options and how this may impact your downloads before opening an issue. ### Is there enough context in your bug report? @@ -58,23 +97,28 @@ We are then presented with a very complicated request when the original problem Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. -In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. +In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of yt-dlp that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. ### Is anyone going to need the feature? Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. -### Is your question about youtube-dl? +### Is your question about yt-dlp? + +Some bug reports are completely unrelated to yt-dlp and relate to a different, or even the reporter's own, application. Please make sure that you are actually using yt-dlp. If you are using a UI for yt-dlp, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for yt-dlp fails in some way you believe is related to yt-dlp, by all means, go ahead and report the bug. + +If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-dlp, the issue should be raised in the youtube-dl project. + + -It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. # DEVELOPER INSTRUCTIONS -Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution. +Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation). -To run youtube-dl as a developer, you don't need to build anything either. Simply execute +To run yt-dlp as a developer, you don't need to build anything either. Simply execute - python -m youtube_dl + python -m yt_dlp To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: @@ -85,42 +129,42 @@ To run the test, simply invoke your favorite test runner, or execute a test file See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. -If you want to create a build of youtube-dl yourself, you'll need +If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile). -* python3 -* make (only GNU make is supported) -* pandoc -* zip -* pytest -### Adding support for a new site +## Adding new feature or making overarching changes -If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. +Before you start writing code for implementing a new feature, open an issue explaining your feature request and atleast one use case. This allows the maintainers to decide whether such a feature is desired for the project in the first place, and will provide an avenue to discuss some implementation details. If you open a pull request for a new feature without discussing with us first, do not be surprised when we ask for large changes to the code, or even reject it outright. + +The same applies for overarching changes to the architecture, documentation or code style + + +## Adding support for a new site + +If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](https://www.github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. yt-dlp does **not support** such sites thus pull requests adding support for them **will be rejected**. After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): -1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork) -2. Check out the source code with: +1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork) +1. Check out the source code with: - git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git + git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git -3. Start a new git branch with +1. Start a new git branch with - cd youtube-dl + cd yt-dlp git checkout -b yourextractor -4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: +1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`: ```python # coding: utf-8 - from __future__ import unicode_literals - from .common import InfoExtractor - - + + class YourExtractorIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { @@ -134,12 +178,12 @@ After you have ensured this site is distributing its content legally, you can fo # * A regular expression; start the string with re: # * Any Python type (for example int or float) } - } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - + # TODO more code goes here, for example ... title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title') @@ -148,45 +192,48 @@ After you have ensured this site is distributing its content legally, you can fo 'title': title, 'description': self._og_search_description(webpage), 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) + # TODO more properties (see yt_dlp/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). -6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): +1. Add an import in [`yt_dlp/extractor/extractors.py`](yt_dlp/extractor/extractors.py). +1. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` +1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the purticular test is disabled from running. +1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. +1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): - $ flake8 youtube_dl/extractor/yourextractor.py + $ flake8 yt_dlp/extractor/yourextractor.py -9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.6 and above. Backward compatability is not required for even older versions of Python. +1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: - $ git add youtube_dl/extractor/extractors.py - $ git add youtube_dl/extractor/yourextractor.py - $ git commit -m '[yourextractor] Add new extractor' + $ git add yt_dlp/extractor/extractors.py + $ git add yt_dlp/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add extractor' $ git push origin yourextractor -11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. +1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. In any case, thank you very much for your contributions! -## youtube-dl coding conventions + +## yt-dlp coding conventions This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. -Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all. +Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old yt-dlp versions working. Even though this breakage issue may be easily fixed by a new version of yt-dlp, this could take some time, during which the the extractor will remain broken. + ### Mandatory and optional metafields -For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: +For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - `title` (media title) - `url` (media download URL) or `formats` -In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. +The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While, in fact, only `id` is technically mandatory, due to compatability reasons, yt-dlp also treats `title` as mandatory. The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet. -[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. +[Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. #### Example @@ -200,8 +247,10 @@ Assume at this point `meta`'s layout is: ```python { - ... "summary": "some fancy summary text", + "user": { + "name": "uploader name" + }, ... } ``` @@ -220,6 +269,30 @@ description = meta['summary'] # incorrect The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data). + +If the data is nested, do not use `.get` chains, but instead make use of the utility functions `try_get` or `traverse_obj` + +Considering the above `meta` again, assume you want to extract `["user"]["name"]` and put it in the resulting info dict as `uploader` + +```python +uploader = try_get(meta, lambda x: x['user']['name']) # correct +``` +or +```python +uploader = traverse_obj(meta, ('user', 'name')) # correct +``` + +and not like: + +```python +uploader = meta['user']['name'] # incorrect +``` +or +```python +uploader = meta.get('user', {}).get('name') # incorrect +``` + + Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance: ```python @@ -239,11 +312,36 @@ description = self._search_regex( ``` On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. - + + +Another thing to remember is not to try to iterate over `None` + +Say you extracted a list of thumbnails into `thumbnail_data` using `try_get` and now want to iterate over them + +```python +thumbnail_data = try_get(...) +thumbnails = [{ + 'url': item['url'] +} for item in thumbnail_data or []] # correct +``` + +and not like: + +```python +thumbnail_data = try_get(...) +thumbnails = [{ + 'url': item['url'] +} for item in thumbnail_data] # incorrect +``` + +In the later case, `thumbnail_data` will be `None` if the field was not found and this will cause the loop `for item in thumbnail_data` to raise a fatal error. Using `for item in thumbnail_data or []` avoids this error and results in setting an empty list in `thumbnails` instead. + + ### Provide fallbacks When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. + #### Example Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like: @@ -262,6 +360,7 @@ title = meta.get('title') or self._og_search_title(webpage) This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. + ### Regular expressions #### Don't capture groups you don't use @@ -283,11 +382,10 @@ Incorrect: r'(id|ID)=(?P<id>\d+)' ``` - #### Make regular expressions relaxed and flexible When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. - + ##### Example Say you need to extract `title` from the following HTML code: @@ -299,14 +397,14 @@ Say you need to extract `title` from the following HTML code: The code for that task should look similar to: ```python -title = self._search_regex( +title = self._search_regex( # correct r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title') ``` Or even better: ```python -title = self._search_regex( +title = self._search_regex( # correct r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)', webpage, 'title', group='title') ``` @@ -316,14 +414,25 @@ Note how you tolerate potential changes in the `style` attribute's value or swit The code definitely should not look like: ```python -title = self._search_regex( +title = self._search_regex( # incorrect r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>', webpage, 'title', group='title') ``` +or even + +```python +title = self._search_regex( # incorrect + r'<span style=".*?" class="title">(.*?)</span>', + webpage, 'title', group='title') +``` + +Here the presence or absence of other attributes including `style` is irrelevent for the data we need, and so the regex must not depend on it + + ### Long lines policy -There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. +There is a soft limit to keep lines of code under 100 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. Sometimes, it may be reasonable to go upto 120 characters and sometimes even 80 can be unreadable. Keep in mind that this is not a hard limit and is just one of many tools to make the code more readable For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: @@ -360,6 +469,7 @@ TITLE_RE = r'<title>([^<]+)' title = self._html_search_regex(TITLE_RE, webpage, 'title') ``` + ### Collapse fallbacks Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. @@ -385,10 +495,13 @@ description = ( Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + ### Trailing parentheses Always move trailing parentheses after the last argument. +Note that this *does not* apply to braces `}` or square brackets `]` both of which should closed be in a new line + #### Example Correct: @@ -406,30 +519,36 @@ Incorrect: ) ``` + ### Use convenience conversion and parsing functions -Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. -Use `try_get` for safe metadata extraction from parsed JSON. +Use `try_get`, `dict_get` and `traverse_obj` for safe metadata extraction from parsed JSON. Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. -Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. +Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions. #### More examples ##### Safely extract optional description from parsed JSON ```python -description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +description = traverse_obj(response, ('result', 'video', 'summary'), expected_type=str) ``` ##### Safely extract more optional metadata ```python -video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +video = traverse_obj(response, ('result', 'video', 0), default={}, expected_type=dict) description = video.get('summary') duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` + + + +# EMBEDDING YT-DLP +See [README.md#embedding-yt-dlp](README.md#embedding-yt-dlp) for instructions on how to embed yt-dlp in another Python program diff --git a/README.md b/README.md index ff117663a..f98fe98b6 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,11 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Modifying metadata examples](#modifying-metadata-examples) * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) +* [EMBEDDING YT-DLP](#embedding-yt-dlp) * [DEPRECATED OPTIONS](#deprecated-options) +* [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp) + * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) + * [Developer Instructions](CONTRIBUTING.md#developer-instructions) * [MORE](#more) @@ -1510,6 +1514,84 @@ Note that **all** plugins are imported even if not invoked, and that **there are If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability + +# EMBEDDING YT-DLP + +yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language. + +Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse. + +From a Python program, you can embed yt-dlp in a more powerful fashion, like this: + +```python +import yt_dlp + +ydl_opts = {} +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) +``` + +Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L154-L452). + +Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), converts the video to an mp3 file, implements a custom postprocessor and prints the final info_dict as json: + +```python +import json + +import yt_dlp +from yt_dlp.postprocessor.common import PostProcessor + + +class MyLogger: + def debug(self, msg): + # For compatability with youtube-dl, both debug and info are passed into debug + # You can distinguish them by the prefix '[debug] ' + if msg.startswith('[debug] '): + pass + else: + self.info(msg) + + def info(self, msg): + pass + + def warning(self, msg): + pass + + def error(self, msg): + print(msg) + + +class MyCustomPP(PostProcessor): + def run(self, info): + self.to_screen('Doing stuff') + return [], info + + +def my_hook(d): + if d['status'] == 'finished': + print('Done downloading, now converting ...') + + +ydl_opts = { + 'format': 'bestaudio/best', + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'mp3', + 'preferredquality': '192', + }], + 'logger': MyLogger(), + 'progress_hooks': [my_hook], +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.add_post_processor(MyCustomPP()) + info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') + print(json.dumps(ydl.sanitize_info(info))) +``` + +See the public functions in [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py) for other available functions. Eg: `ydl.download`, `ydl.download_with_info_file` + + # DEPRECATED OPTIONS These are all the deprecated options and the current alternative to achieve the same effect @@ -1611,6 +1693,8 @@ These options were deprecated since 2014 and have now been entirely removed -t, --title -o "%(title)s-%(id)s.%(ext)s" -l, --literal -o accepts literal names +# CONTRIBUTING +See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions) # MORE -For FAQ, Developer Instructions etc., see the [original README](https://github.com/ytdl-org/youtube-dl#faq) +For FAQ see the [youtube-dl README](https://github.com/ytdl-org/youtube-dl#faq) diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index c7f3eef76..6b1b8219c 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -1,33 +1,34 @@ #!/usr/bin/env python3 from __future__ import unicode_literals -# import io +import io import optparse -# import re +import re def main(): + return # This is unused in yt-dlp + parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') options, args = parser.parse_args() if len(args) != 2: parser.error('Expected an input and an output filename') - -""" infile, outfile = args + infile, outfile = args with io.open(infile, encoding='utf-8') as inf: readme = inf.read() - bug_text = re.search( """ -# r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) -# dev_text = re.search( -# r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', -""" readme).group(1) + bug_text = re.search( + r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) + dev_text = re.search( + r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', readme).group(1) out = bug_text + dev_text with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) """ + outf.write(out) + if __name__ == '__main__': main() From d6124e191e17f03fd48acf78db536400607c49d1 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sat, 9 Oct 2021 02:04:02 +0000 Subject: [PATCH 0023/2552] [bilibili] Fix bug in efc947fb3eea38eeae257980e663de806f1e19d0 Authored by: u-spec-png --- yt_dlp/extractor/bilibili.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index daa224b17..a1be7e04b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -800,7 +800,6 @@ class BiliIntlBaseIE(InfoExtractor): sub_data = self._download_json(sub_url, ep_id, fatal=False) if not sub_data: continue - sub_data = self._parse_json(sub_data) subtitles.setdefault(sub.get('key', 'en'), []).append({ 'ext': 'srt', 'data': self.json2srt(sub_data) From f2cad2e496843889274b79deb3f7f6e1c8c3f948 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 8 Oct 2021 20:37:24 +0530 Subject: [PATCH 0024/2552] [Hidive] Fix subtitles broken by 705e7c2005dfe67a905e18736c9f6345ee9d386b --- yt_dlp/extractor/hidive.py | 56 +++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 909d1fbc1..18ae4d379 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -1,5 +1,5 @@ # coding: utf-8 -from __future__ import unicode_literals +import re from .common import InfoExtractor from ..utils import ( @@ -52,15 +52,39 @@ class HiDiveIE(InfoExtractor): self._download_webpage( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data)) + def _call_api(self, video_id, title, key, data={}, **kwargs): + data = { + **data, + 'Title': title, + 'Key': key, + 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', + } + return self._download_json( + 'https://www.hidive.com/play/settings', video_id, + data=urlencode_postdata(data), **kwargs) or {} + + def _extract_subtitles_from_rendition(self, rendition, subtitles, parsed_urls): + for cc_file in rendition.get('ccFiles', []): + cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) + # name is used since we cant distinguish subs with same language code + cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) + if cc_url not in parsed_urls and cc_lang: + parsed_urls.add(cc_url) + subtitles.setdefault(cc_lang, []).append({'url': cc_url}) + + def _get_subtitles(self, url, video_id, title, key, subtitles, parsed_urls): + webpage = self._download_webpage(url, video_id, fatal=False) or '' + for caption in set(re.findall(r'data-captions=\"([^\"]+)\"', webpage)): + renditions = self._call_api( + video_id, title, key, {'Captions': caption}, fatal=False, + note=f'Downloading {caption} subtitle information').get('renditions') or {} + for rendition_id, rendition in renditions.items(): + self._extract_subtitles_from_rendition(rendition, subtitles, parsed_urls) + return subtitles + def _real_extract(self, url): video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key') - settings = self._download_json( - 'https://www.hidive.com/play/settings', video_id, - data=urlencode_postdata({ - 'Title': title, - 'Key': key, - 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', - })) + settings = self._call_api(video_id, title, key) restriction = settings.get('restrictionReason') if restriction == 'RegionRestricted': @@ -69,12 +93,12 @@ class HiDiveIE(InfoExtractor): raise ExtractorError( '%s said: %s' % (self.IE_NAME, restriction), expected=True) - formats, subtitles, urls = [], {}, {None} + formats, subtitles, parsed_urls = [], {}, {None} for rendition_id, rendition in settings['renditions'].items(): audio, version, extra = rendition_id.split('_') m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) - if m3u8_url not in urls: - urls.add(m3u8_url) + if m3u8_url not in parsed_urls: + parsed_urls.add(m3u8_url) frmt = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False) for f in frmt: @@ -82,19 +106,13 @@ class HiDiveIE(InfoExtractor): f['format_note'] = f'{version}, {extra}' formats.extend(frmt) - for cc_file in rendition.get('ccFiles', []): - cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) - # name is used since we cant distinguish subs with same language code - cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) - if cc_url not in urls and cc_lang: - urls.add(cc_url) - subtitles.setdefault(cc_lang, []).append({'url': cc_url}) + self._extract_subtitles_from_rendition(rendition, subtitles, parsed_urls) self._sort_formats(formats) return { 'id': video_id, 'title': video_id, - 'subtitles': subtitles, + 'subtitles': self.extract_subtitles(url, video_id, title, key, subtitles, parsed_urls), 'formats': formats, 'series': title, 'season_number': int_or_none( From b922db9fe58f73aacd5dab4fe5ba1001d803a798 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 02:06:30 +0530 Subject: [PATCH 0025/2552] [http] Respect user-provided chunk size over extractor's --- yt_dlp/downloader/http.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 9e79051ad..5d7c988c7 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -48,8 +48,9 @@ class HttpFD(FileDownloader): is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( + self.params.get('http_chunk_size') or info_dict.get('downloader_options', {}).get('http_chunk_size') - or self.params.get('http_chunk_size') or 0) + or 0) ctx.open_mode = 'wb' ctx.resume_len = 0 From 2614f64600f9249682897786f5345a61d98dafeb Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 08:14:41 +0530 Subject: [PATCH 0026/2552] [utils] Let traverse_obj accept functions as keys --- yt_dlp/utils.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 027387897..db9b9de94 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6335,7 +6335,9 @@ def traverse_obj( ''' Traverse nested list/dict/tuple @param path_list A list of paths which are checked one by one. Each path is a list of keys where each key is a string, - a tuple of strings or "...". When a tuple is given, + a function, a tuple of strings or "...". + When a fuction is given, it takes the key as argument and + returns whether the key matches or not. When a tuple is given, all the keys given in the tuple are traversed, and "..." traverses all the keys in the object @param default Default value to return @@ -6368,6 +6370,18 @@ def traverse_obj( _current_depth += 1 depth = max(depth, _current_depth) return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj] + elif callable(key): + if isinstance(obj, (list, tuple, LazyList)): + obj = enumerate(obj) + elif isinstance(obj, dict): + obj = obj.items() + else: + if not traverse_string: + return None + obj = str(obj) + _current_depth += 1 + depth = max(depth, _current_depth) + return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)] elif isinstance(obj, dict) and not (is_user_input and key == ':'): obj = (obj.get(key) if casesense or (key in obj) else next((v for k, v in obj.items() if _lower(k) == key), None)) From 8cd69fc40786d081b5523f9dc20861c130a2843d Mon Sep 17 00:00:00 2001 From: Jules-A Date: Sat, 9 Oct 2021 23:21:41 +0800 Subject: [PATCH 0027/2552] [Funimation] Fix for /v/ urls (#1196) Closes #993 Authored by: pukkandan, Jules-A --- yt_dlp/extractor/funimation.py | 135 ++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 62 deletions(-) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index ede53b326..382cbe159 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -2,26 +2,61 @@ from __future__ import unicode_literals import random +import re import string from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( determine_ext, - dict_get, int_or_none, js_to_json, + orderedSet, + qualities, str_or_none, + traverse_obj, try_get, - qualities, urlencode_postdata, ExtractorError, ) -class FunimationPageIE(InfoExtractor): +class FunimationBaseIE(InfoExtractor): + _NETRC_MACHINE = 'funimation' + _REGION = None + _TOKEN = None + + def _get_region(self): + region_cookie = self._get_cookies('https://www.funimation.com').get('region') + region = region_cookie.value if region_cookie else self.get_param('geo_bypass_country') + return region or traverse_obj( + self._download_json( + 'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False, + note='Checking geo-location', errnote='Unable to fetch geo-location information'), + 'region') or 'US' + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + try: + data = self._download_json( + 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', + None, 'Logging in', data=urlencode_postdata({ + 'username': username, + 'password': password, + })) + return data['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read().decode(), None)['error'] + raise ExtractorError(error, expected=True) + raise + + +class FunimationPageIE(FunimationBaseIE): IE_NAME = 'funimation:page' - _VALID_URL = r'(?Phttps?://(?:www\.)?funimation(?:\.com|now\.uk))/(?P[^/]+/)?(?Pshows/(?P[^/]+/[^/?#&]+).*$)' + _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:(?P[^/]+)/)?(?:shows|v)/(?P[^/]+)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/', @@ -46,38 +81,34 @@ class FunimationPageIE(InfoExtractor): }, { 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', 'only_matching': True, + }, { + 'url': 'https://www.funimation.com/v/a-certain-scientific-railgun/super-powered-level-5', + 'only_matching': True, }] + def _real_initialize(self): + if not self._REGION: + FunimationBaseIE._REGION = self._get_region() + if not self._TOKEN: + FunimationBaseIE._TOKEN = self._login() + def _real_extract(self, url): - mobj = self._match_valid_url(url) - display_id = mobj.group('id').replace('/', '_') - if not mobj.group('lang'): - url = '%s/en/%s' % (mobj.group('origin'), mobj.group('path')) - - webpage = self._download_webpage(url, display_id) - title_data = self._parse_json(self._search_regex( - r'TITLE_DATA\s*=\s*({[^}]+})', - webpage, 'title data', default=''), - display_id, js_to_json, fatal=False) or {} - - video_id = ( - title_data.get('id') - or self._search_regex( - (r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", r']+src="/player/(\d+)'), - webpage, 'video_id', default=None) - or self._search_regex( - r'/player/(\d+)', - self._html_search_meta(['al:web:url', 'og:video:url', 'og:video:secure_url'], webpage, fatal=True), - 'video id')) + locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode') + + video_id = traverse_obj(self._download_json( + f'https://title-api.prd.funimationsvc.com/v1/shows/{show}/episodes/{episode}', + f'{show}_{episode}', query={ + 'deviceType': 'web', + 'region': self._REGION, + 'locale': locale or 'en' + }), ('videoList', ..., 'id'), get_all=False) + return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id) -class FunimationIE(InfoExtractor): +class FunimationIE(FunimationBaseIE): _VALID_URL = r'https?://(?:www\.)?funimation\.com/player/(?P\d+)' - _NETRC_MACHINE = 'funimation' - _TOKEN = None - _TESTS = [{ 'url': 'https://www.funimation.com/player/210051', 'info_dict': { @@ -93,7 +124,7 @@ class FunimationIE(InfoExtractor): 'season_number': 99, 'series': 'Attack on Titan: Junior High', 'description': '', - 'duration': 154, + 'duration': 155, }, 'params': { 'skip_download': 'm3u8', @@ -114,7 +145,7 @@ class FunimationIE(InfoExtractor): 'season_number': 99, 'series': 'Attack on Titan: Junior High', 'description': '', - 'duration': 154, + 'duration': 155, }, 'params': { 'skip_download': 'm3u8', @@ -122,26 +153,9 @@ class FunimationIE(InfoExtractor): }, }] - def _login(self): - username, password = self._get_login_info() - if username is None: - return - try: - data = self._download_json( - 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', - None, 'Logging in', data=urlencode_postdata({ - 'username': username, - 'password': password, - })) - self._TOKEN = data['token'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - error = self._parse_json(e.cause.read().decode(), None)['error'] - raise ExtractorError(error, expected=True) - raise - def _real_initialize(self): - self._login() + if not self._TOKEN: + FunimationBaseIE._TOKEN = self._login() @staticmethod def _get_experiences(episode): @@ -283,7 +297,7 @@ class FunimationIE(InfoExtractor): return subtitles -class FunimationShowIE(FunimationIE): +class FunimationShowIE(FunimationBaseIE): IE_NAME = 'funimation:show' _VALID_URL = r'(?Phttps?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P[^/]+)?/?shows/(?P[^/?#&]+))/?(?:[?#]|$)' @@ -311,31 +325,28 @@ class FunimationShowIE(FunimationIE): }] def _real_initialize(self): - region = self._get_cookies('https://www.funimation.com').get('region') - self._region = region.value if region else try_get( - self._download_json( - 'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False, - note='Checking geo-location', errnote='Unable to fetch geo-location information'), - lambda x: x['region']) or 'US' + if not self._REGION: + FunimationBaseIE._REGION = self._get_region() def _real_extract(self, url): base_url, locale, display_id = self._match_valid_url(url).groups() show_info = self._download_json( 'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=%s&deviceType=web&locale=%s' - % (display_id, self._region, locale or 'en'), display_id) - items = self._download_json( + % (display_id, self._REGION, locale or 'en'), display_id) + items_info = self._download_json( 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s' - % show_info.get('id'), display_id).get('items') - vod_items = map(lambda k: dict_get(k, ('mostRecentSvod', 'mostRecentAvod')).get('item'), items) + % show_info.get('id'), display_id) + + vod_items = traverse_obj(items_info, ('items', ..., re.compile('(?i)mostRecent[AS]vod').match, 'item')) return { '_type': 'playlist', 'id': show_info['id'], 'title': show_info['name'], - 'entries': [ + 'entries': orderedSet( self.url_result( '%s/%s' % (base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(), vod_item.get('episodeId'), vod_item.get('episodeName')) - for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder'))], + for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder', -1))), } From 4e3b637d5be70b92ee511743405f3c907fed20f6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 21:48:46 +0530 Subject: [PATCH 0028/2552] Merge webm formats into mkv if thumbnails are to be embedded This was originally implemented in 4d971a16b831a45147b6ae7ce53b3e105d204da7 (#173) by @damianoamatruda but was reverted in 3b297919e046082cc4ab26ecb959d9f4f584102b since it was unintentionally being triggered for `write_thumbnail` (See #500) --- yt_dlp/YoutubeDL.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1d865161a..398fb67af 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -137,6 +137,7 @@ from .downloader import ( from .downloader.rtmp import rtmpdump_version from .postprocessor import ( get_postprocessor, + EmbedThumbnailPP, FFmpegFixupDurationPP, FFmpegFixupM3u8PP, FFmpegFixupM4aPP, @@ -2696,10 +2697,19 @@ class YoutubeDL(object): requested_formats = info_dict['requested_formats'] old_ext = info_dict['ext'] - if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): - info_dict['ext'] = 'mkv' - self.report_warning( - 'Requested formats are incompatible for merge and will be merged into mkv.') + if self.params.get('merge_output_format') is None: + if not compatible_formats(requested_formats): + info_dict['ext'] = 'mkv' + self.report_warning( + 'Requested formats are incompatible for merge and will be merged into mkv') + if (info_dict['ext'] == 'webm' + and info_dict.get('thumbnails') + # check with type instead of pp_key, __name__, or isinstance + # since we dont want any custom PPs to trigger this + and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): + info_dict['ext'] = 'mkv' + self.report_warning( + 'webm doesn\'t support embedding a thumbnail, mkv will be used') new_ext = info_dict['ext'] def correct_ext(filename, ext=new_ext): From b5ae35ee6d3f913898770b8c74ee5f5e5cc33560 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 05:53:15 +0530 Subject: [PATCH 0029/2552] [cleanup] Misc cleanup --- .github/ISSUE_TEMPLATE/1_broken_site.md | 15 ++++--- .../ISSUE_TEMPLATE/2_site_support_request.md | 15 ++++--- .../ISSUE_TEMPLATE/3_site_feature_request.md | 13 +++--- .github/ISSUE_TEMPLATE/4_bug_report.md | 17 ++++---- .github/ISSUE_TEMPLATE/5_feature_request.md | 11 +++-- .github/ISSUE_TEMPLATE/6_question.md | 11 +++-- .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md | 15 ++++--- .../2_site_support_request.md | 15 ++++--- .../3_site_feature_request.md | 13 +++--- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md | 17 ++++---- .../ISSUE_TEMPLATE_tmpl/5_feature_request.md | 11 +++-- .github/PULL_REQUEST_TEMPLATE.md | 2 +- README.md | 5 ++- setup.py | 2 +- test/helper.py | 6 +-- yt_dlp/YoutubeDL.py | 29 ++++++++----- yt_dlp/__init__.py | 4 -- yt_dlp/downloader/http.py | 4 +- yt_dlp/extractor/common.py | 42 +++++++------------ yt_dlp/extractor/hidive.py | 6 +-- yt_dlp/extractor/minoto.py | 2 +- yt_dlp/extractor/palcomp3.py | 2 +- yt_dlp/minicurses.py | 2 + yt_dlp/options.py | 17 +------- ytdlp_plugins/extractor/sample.py | 2 +- 25 files changed, 142 insertions(+), 136 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 53ca71219..8a5503510 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -1,8 +1,8 @@ --- name: Broken site support about: Report broken or misfunctioning site -title: "[Broken]" -labels: Broken +title: "[Broken] Website Name: A short description of the issue" +labels: ['triage', 'extractor-bug'] assignees: '' --- @@ -21,11 +21,12 @@ assignees: '' - [ ] I'm reporting a broken site support @@ -33,6 +34,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Verbose log diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index a9e2a9c53..7f58fc8a7 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -1,8 +1,8 @@ --- name: Site support request about: Request support for a new site -title: "[Site Request]" -labels: Request +title: "[Site Request] Website Name" +labels: ['triage', 'site-request'] assignees: '' --- @@ -21,11 +21,12 @@ assignees: '' - [ ] I'm reporting a new site support request @@ -34,6 +35,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've searched the bugtracker for similar site support requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Example URLs diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 6cd8b8ba0..38b38c803 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -1,8 +1,8 @@ --- name: Site feature request about: Request a new functionality for a site -title: "[Site Request]" -labels: Request +title: "[Site Feature] Website Name: A short description of the feature" +labels: ['triage', 'site-enhancement'] assignees: '' --- @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm reporting a site feature request - [ ] I've verified that I'm running yt-dlp version **2021.09.25** - [ ] I've searched the bugtracker for similar site feature requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Description diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index a302daab6..b2f7efcda 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -1,8 +1,8 @@ --- name: Bug report about: Report a bug unrelated to any particular site or extractor -title: '' -labels: '' +title: '[Bug] A short description of the issue' +labels: ['triage', 'bug'] assignees: '' --- @@ -21,12 +21,12 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site @@ -35,7 +35,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read bugs section in FAQ +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Verbose log diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index c40a5ad35..4aad8ab18 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -1,8 +1,8 @@ --- name: Feature request about: Request a new functionality unrelated to any particular site or extractor -title: "[Feature Request]" -labels: Request +title: "[Feature Request] A short description of your feature" +labels: ['triage', 'enhancement'] assignees: '' --- @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm reporting a feature request - [ ] I've verified that I'm running yt-dlp version **2021.09.25** - [ ] I've searched the bugtracker for similar feature requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Description diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md index 9f052090a..5ab17802a 100644 --- a/.github/ISSUE_TEMPLATE/6_question.md +++ b/.github/ISSUE_TEMPLATE/6_question.md @@ -1,7 +1,7 @@ --- name: Ask question about: Ask yt-dlp related question -title: "[Question]" +title: "[Question] A short description of your question" labels: question assignees: '' @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm asking a question -- [ ] I've looked through the README and FAQ for similar questions +- [ ] I've looked through the README +- [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I've searched the bugtracker for similar questions including closed ones +- [ ] I have given an appropriate title to the issue ## Question diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md index 6da13a7b5..9ee002296 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md @@ -1,8 +1,8 @@ --- name: Broken site support about: Report broken or misfunctioning site -title: "[Broken]" -labels: Broken +title: "[Broken] Website Name: A short description of the issue" +labels: ['triage', 'extractor-bug'] assignees: '' --- @@ -21,11 +21,12 @@ assignees: '' - [ ] I'm reporting a broken site support @@ -33,6 +34,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Verbose log diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md index 79adb709c..e71abbab2 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md @@ -1,8 +1,8 @@ --- name: Site support request about: Request support for a new site -title: "[Site Request]" -labels: Request +title: "[Site Request] Website Name" +labels: ['triage', 'site-request'] assignees: '' --- @@ -21,11 +21,12 @@ assignees: '' - [ ] I'm reporting a new site support request @@ -34,6 +35,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've searched the bugtracker for similar site support requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Example URLs diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md index d74b6e279..e0ccd5416 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md @@ -1,8 +1,8 @@ --- name: Site feature request about: Request a new functionality for a site -title: "[Site Request]" -labels: Request +title: "[Site Feature] Website Name: A short description of the feature" +labels: ['triage', 'site-enhancement'] assignees: '' --- @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm reporting a site feature request - [ ] I've verified that I'm running yt-dlp version **%(version)s** - [ ] I've searched the bugtracker for similar site feature requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Description diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md index 13b577f86..43e91b052 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md @@ -1,8 +1,8 @@ --- name: Bug report about: Report a bug unrelated to any particular site or extractor -title: '' -labels: '' +title: '[Bug] A short description of the issue' +labels: ['triage', 'bug'] assignees: '' --- @@ -21,12 +21,12 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site @@ -35,7 +35,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read bugs section in FAQ +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Verbose log diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md index 4a0209db1..075e0b1b3 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md @@ -1,8 +1,8 @@ --- name: Feature request about: Request a new functionality unrelated to any particular site or extractor -title: "[Feature Request]" -labels: Request +title: "[Feature Request] A short description of your feature" +labels: ['triage', 'enhancement'] assignees: '' --- @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm reporting a feature request - [ ] I've verified that I'm running yt-dlp version **%(version)s** - [ ] I've searched the bugtracker for similar feature requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Description diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 7ef08d68a..684bf59e9 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,7 +7,7 @@ --- ### Before submitting a *pull request* make sure you have: -- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-support-for-a-new-site) and [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) sections +- [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) diff --git a/README.md b/README.md index f98fe98b6..c0f84fcac 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodomex**](https://github.com/Legrandin/pycryptodomex) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodomex/blob/master/LICENSE.rst) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) @@ -287,7 +287,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --flat-playlist Do not extract the videos of a playlist, only list them --no-flat-playlist Extract the videos of a playlist - --mark-watched Mark videos watched (YouTube only) + --mark-watched Mark videos watched (even with --simulate). + Currently only supported for YouTube --no-mark-watched Do not mark videos watched (default) --no-colors Do not emit color codes in output --compat-options OPTS Options that can help keep compatibility diff --git a/setup.py b/setup.py index ff23877dc..fbd2be0ae 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,7 @@ setup( 'Documentation': 'https://yt-dlp.readthedocs.io', 'Source': 'https://github.com/yt-dlp/yt-dlp', 'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues', - #'Funding': 'https://donate.pypi.org', + 'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators', }, classifiers=[ 'Topic :: Multimedia :: Video', diff --git a/test/helper.py b/test/helper.py index 9599eab8e..5c0e645f9 100644 --- a/test/helper.py +++ b/test/helper.py @@ -22,7 +22,7 @@ from yt_dlp.utils import ( ) -if "pytest" in sys.modules: +if 'pytest' in sys.modules: import pytest is_download_test = pytest.mark.download else: @@ -32,9 +32,9 @@ else: def get_params(override=None): PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), - "parameters.json") + 'parameters.json') LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), - "local_parameters.json") + 'local_parameters.json') with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if os.path.exists(LOCAL_PARAMETERS_FILE): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 398fb67af..2b3c33ce5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -9,6 +9,7 @@ import copy import datetime import errno import fileinput +import functools import io import itertools import json @@ -330,7 +331,8 @@ class YoutubeDL(object): * when: When to run the postprocessor. Can be one of pre_process|before_dl|post_process|after_move. Assumed to be 'post_process' if not given - post_hooks: A list of functions that get called as the final step + post_hooks: Deprecated - Register a custom postprocessor instead + A list of functions that get called as the final step for each video file, after all postprocessors have been called. The filename will be passed as the only argument. progress_hooks: A list of functions that get called on download @@ -423,7 +425,7 @@ class YoutubeDL(object): use downloader suggested by extractor if None. compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: - filename, abort-on-error, multistreams, no-live-chat, + filename, abort-on-error, multistreams, no-live-chat, format-sort no-clean-infojson, no-playlist-metafiles, no-keep-subs. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. @@ -434,8 +436,9 @@ class YoutubeDL(object): The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, - max_filesize, test, noresizebuffer, retries, continuedl, noprogress, - xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size. + max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl, + noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size, + external_downloader_args. The following options are used by the post processors: prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, @@ -541,13 +544,13 @@ class YoutubeDL(object): for msg in self.params.get('warnings', []): self.report_warning(msg) - if self.params.get('overwrites') is None: - self.params.pop('overwrites', None) - elif self.params.get('nooverwrites') is not None: + if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: # nooverwrites was unnecessarily changed to overwrites # in 0c3d0f51778b153f65c21906031c2e091fcfb641 # This ensures compatibility with both keys self.params['overwrites'] = not self.params['nooverwrites'] + elif self.params.get('overwrites') is None: + self.params.pop('overwrites', None) else: self.params['nooverwrites'] = not self.params['overwrites'] @@ -1253,7 +1256,7 @@ class YoutubeDL(object): self.report_error('no suitable InfoExtractor for URL %s' % url) def __handle_extraction_exceptions(func): - + @functools.wraps(func) def wrapper(self, *args, **kwargs): try: return func(self, *args, **kwargs) @@ -1973,7 +1976,7 @@ class YoutubeDL(object): elif format_spec in ('mhtml', ): # storyboards extension filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' else: - filter_f = (lambda f: f.get('format_id') == format_spec) # id + filter_f = lambda f: f.get('format_id') == format_spec # id def selector_function(ctx): formats = list(ctx['formats']) @@ -2453,8 +2456,12 @@ class YoutubeDL(object): if self.params.get('forceprint') or self.params.get('forcejson'): self.post_extract(info_dict) for tmpl in self.params.get('forceprint', []): - self.to_stdout(self.evaluate_outtmpl( - f'%({tmpl})s' if re.match(r'\w+$', tmpl) else tmpl, info_dict)) + mobj = re.match(r'\w+(=?)$', tmpl) + if mobj and mobj.group(1): + tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s' + elif mobj: + tmpl = '%({})s'.format(tmpl) + self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict)) print_mandatory('title') print_mandatory('id') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index ade822299..4b82efea7 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -735,10 +735,6 @@ def _real_main(argv=None): 'geo_bypass_ip_block': opts.geo_bypass_ip_block, 'warnings': warnings, 'compat_opts': compat_opts, - # just for deprecation check - 'autonumber': opts.autonumber or None, - 'usetitle': opts.usetitle or None, - 'useid': opts.useid or None, } with YoutubeDL(ydl_opts) as ydl: diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 5d7c988c7..704ae6f5a 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -48,8 +48,8 @@ class HttpFD(FileDownloader): is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( - self.params.get('http_chunk_size') or - info_dict.get('downloader_options', {}).get('http_chunk_size') + self.params.get('http_chunk_size') + or info_dict.get('downloader_options', {}).get('http_chunk_size') or 0) ctx.open_mode = 'wb' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 4f940730a..65444d3bf 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1678,7 +1678,7 @@ class InfoExtractor(object): has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit') fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,) - limits = limit_text.split(":") if has_multiple_limits else (limit_text,) if has_limit else tuple() + limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple() limit_count = len(limits) for (i, f) in enumerate(fields): add_item(f, reverse, closest, @@ -1762,9 +1762,9 @@ class InfoExtractor(object): if format.get('vbr') is not None and format.get('abr') is not None: format['tbr'] = format.get('vbr', 0) + format.get('abr', 0) else: - if format.get('vcodec') != "none" and format.get('vbr') is None: + if format.get('vcodec') != 'none' and format.get('vbr') is None: format['vbr'] = format.get('tbr') - format.get('abr', 0) - if format.get('acodec') != "none" and format.get('abr') is None: + if format.get('acodec') != 'none' and format.get('abr') is None: format['abr'] = format.get('tbr') - format.get('vbr', 0) return tuple(self._calculate_field_preference(format, field) for field in self._order) @@ -1966,13 +1966,16 @@ class InfoExtractor(object): 'format_note': 'Quality selection URL', } + def _report_ignoring_subs(self, name): + self.report_warning(bug_reports_message( + f'Ignoring subtitle tracks found in the {name} manifest; ' + 'if any subtitle tracks are missing,' + ), only_once=True) + def _extract_m3u8_formats(self, *args, **kwargs): fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the HLS manifest; " - "if any subtitle tracks are missing," - ), only_once=True) + self._report_ignoring_subs('HLS') return fmts def _extract_m3u8_formats_and_subtitles( @@ -2270,10 +2273,7 @@ class InfoExtractor(object): def _extract_smil_formats(self, *args, **kwargs): fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the SMIL manifest; " - "if any subtitle tracks are missing," - ), only_once=True) + self._report_ignoring_subs('SMIL') return fmts def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None): @@ -2515,10 +2515,7 @@ class InfoExtractor(object): def _extract_mpd_formats(self, *args, **kwargs): fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the DASH manifest; " - "if any subtitle tracks are missing," - ), only_once=True) + self._report_ignoring_subs('DASH') return fmts def _extract_mpd_formats_and_subtitles( @@ -2542,10 +2539,7 @@ class InfoExtractor(object): def _parse_mpd_formats(self, *args, **kwargs): fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the DASH manifest; " - "if any subtitle tracks are missing," - ), only_once=True) + self._report_ignoring_subs('DASH') return fmts def _parse_mpd_formats_and_subtitles( @@ -2873,10 +2867,7 @@ class InfoExtractor(object): def _extract_ism_formats(self, *args, **kwargs): fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the ISM manifest; " - "if any subtitle tracks are missing," - )) + self._report_ignoring_subs('ISM') return fmts def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): @@ -3136,10 +3127,7 @@ class InfoExtractor(object): def _extract_akamai_formats(self, *args, **kwargs): fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the manifests; " - "if any subtitle tracks are missing," - )) + self._report_ignoring_subs('akamai') return fmts def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}): diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 18ae4d379..ef1ca197e 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -93,7 +93,7 @@ class HiDiveIE(InfoExtractor): raise ExtractorError( '%s said: %s' % (self.IE_NAME, restriction), expected=True) - formats, subtitles, parsed_urls = [], {}, {None} + formats, parsed_urls = [], {}, {None} for rendition_id, rendition in settings['renditions'].items(): audio, version, extra = rendition_id.split('_') m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) @@ -105,14 +105,12 @@ class HiDiveIE(InfoExtractor): f['language'] = audio f['format_note'] = f'{version}, {extra}' formats.extend(frmt) - - self._extract_subtitles_from_rendition(rendition, subtitles, parsed_urls) self._sort_formats(formats) return { 'id': video_id, 'title': video_id, - 'subtitles': self.extract_subtitles(url, video_id, title, key, subtitles, parsed_urls), + 'subtitles': self.extract_subtitles(url, video_id, title, key, parsed_urls), 'formats': formats, 'series': title, 'season_number': int_or_none( diff --git a/yt_dlp/extractor/minoto.py b/yt_dlp/extractor/minoto.py index dba82db5f..603ce940b 100644 --- a/yt_dlp/extractor/minoto.py +++ b/yt_dlp/extractor/minoto.py @@ -37,7 +37,7 @@ class MinotoIE(InfoExtractor): 'filesize': int_or_none(fmt.get('filesize')), 'width': int_or_none(fmt.get('width')), 'height': int_or_none(fmt.get('height')), - 'codecs': parse_codecs(fmt.get('codecs')), + **parse_codecs(fmt.get('codecs')), }) self._sort_formats(formats) diff --git a/yt_dlp/extractor/palcomp3.py b/yt_dlp/extractor/palcomp3.py index 269e67a57..d0a62fb17 100644 --- a/yt_dlp/extractor/palcomp3.py +++ b/yt_dlp/extractor/palcomp3.py @@ -108,7 +108,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE): } name''' - @ classmethod + @classmethod def suitable(cls, url): return False if PalcoMP3IE._match_valid_url(url) else super(PalcoMP3ArtistIE, cls).suitable(url) diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index a466fb4b0..0e37ed818 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,3 +1,4 @@ +import functools from threading import Lock from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES @@ -49,6 +50,7 @@ class MultilinePrinter(MultilinePrinterBase): self._HAVE_FULLCAP = supports_terminal_sequences(self.stream) def lock(func): + @functools.wraps(func) def wrapper(self, *args, **kwargs): with self._movelock: return func(self, *args, **kwargs) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 4652e8c58..f45c548f2 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -971,9 +971,6 @@ def parseOpts(overrideArguments=None): dest='batchfile', metavar='FILE', help="File containing URLs to download ('-' for stdin), one URL per line. " "Lines starting with '#', ';' or ']' are considered as comments and ignored") - filesystem.add_option( - '--id', default=False, - action='store_true', dest='useid', help=optparse.SUPPRESS_HELP) filesystem.add_option( '-P', '--paths', metavar='[TYPES:]PATH', dest='paths', default={}, type='str', @@ -1029,18 +1026,6 @@ def parseOpts(overrideArguments=None): '--trim-filenames', '--trim-file-names', metavar='LENGTH', dest='trim_file_name', default=0, type=int, help='Limit the filename length (excluding extension) to the specified number of characters') - filesystem.add_option( - '--auto-number', - action='store_true', dest='autonumber', default=False, - help=optparse.SUPPRESS_HELP) - filesystem.add_option( - '--title', - action='store_true', dest='usetitle', default=False, - help=optparse.SUPPRESS_HELP) - filesystem.add_option( - '--literal', default=False, - action='store_true', dest='usetitle', - help=optparse.SUPPRESS_HELP) filesystem.add_option( '-w', '--no-overwrites', action='store_false', dest='overwrites', default=None, @@ -1625,7 +1610,7 @@ def parseOpts(overrideArguments=None): argv = configs['system'] + configs['user'] + configs['home'] + configs['portable'] + configs['custom'] + configs['command-line'] opts, args = parser.parse_args(argv) if opts.verbose: - for label in ('System', 'User', 'Portable', 'Home', 'Custom', 'Command-line'): + for label in ('Command-line', 'Custom', 'Portable', 'Home', 'User', 'System'): key = label.lower() if paths.get(key): write_string(f'[debug] {label} config file: {paths[key]}\n') diff --git a/ytdlp_plugins/extractor/sample.py b/ytdlp_plugins/extractor/sample.py index 986e5bb22..d99b7ca33 100644 --- a/ytdlp_plugins/extractor/sample.py +++ b/ytdlp_plugins/extractor/sample.py @@ -5,7 +5,7 @@ from yt_dlp.extractor.common import InfoExtractor # ℹ️ Instructions on making extractors can be found at: -# 🔗 https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site +# 🔗 https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-support-for-a-new-site class SamplePluginIE(InfoExtractor): _WORKING = False From 81bcd43a033e62a2663d91ac1f7f1be6a785c182 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 9 Oct 2021 23:57:08 +0530 Subject: [PATCH 0030/2552] [HotStarSeries] Fix cookies (#1187) Authored by: Ashish0804 --- yt_dlp/extractor/hotstar.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 8f0c67303..f66d3e433 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -290,7 +290,7 @@ class HotStarPlaylistIE(HotStarBaseIE): class HotStarSeriesIE(HotStarBaseIE): IE_NAME = 'hotstar:series' - _VALID_URL = r'(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P\d+)' + _VALID_URL = r'(?P(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P\d+))' _TESTS = [{ 'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646', 'info_dict': { @@ -312,7 +312,7 @@ class HotStarSeriesIE(HotStarBaseIE): }] def _real_extract(self, url): - series_id = self._match_id(url) + url, series_id = self._match_valid_url(url).groups() headers = { 'x-country-code': 'IN', 'x-platform-code': 'PCTV', @@ -324,7 +324,7 @@ class HotStarSeriesIE(HotStarBaseIE): video_id=series_id, headers=headers) entries = [ self.url_result( - 'hotstar:episode:%d' % video['contentId'], + '%s/ignoreme/%d' % (url, video['contentId']), ie=HotStarIE.ie_key(), video_id=video['contentId']) for video in item_json['body']['results']['items'] if video.get('contentId')] From 90d55df3304b13ffbc1dbf2db5bcb4c03c086d4f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 00:39:23 +0530 Subject: [PATCH 0031/2552] Release 2021.10.09 --- CONTRIBUTORS | 5 ++++ Changelog.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +-- supportedsites.md | 14 +++++++--- 4 files changed, 84 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index e44302d57..a535411c6 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -118,3 +118,8 @@ sleaux-meaux sulyi tmarki Vangelis66 +AjaxGb +ajj8 +jakubadamw +jfogelman +timethrow diff --git a/Changelog.md b/Changelog.md index 7334f87c5..2350f67ad 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,73 @@ --> +### 2021.10.09 + +* Improved progress reporting + * Separate `--console-title` and `--no-progress` + * Add option `--progress` to show progress-bar even in quiet mode + * Fix and refactor `minicurses` and use it for all progress reporting + * Standardize use of terminal sequences and enable color support for windows 10 + * Add option `--progress-template` to customize progress-bar and console-title + * Add postprocessor hooks and progress reporting +* [postprocessor] Add plugin support with option `--use-postprocessor` +* [extractor] Extract storyboards from SMIL manifests by [fstirlitz](https://github.com/fstirlitz) +* [outtmpl] Alternate form of format type `l` for `\n` delimited list +* [outtmpl] Format type `U` for unicode normalization +* [outtmpl] Allow empty output template to skip a type of file +* Merge webm formats into mkv if thumbnails are to be embedded +* [adobepass] Add RCN as MSO by [jfogelman](https://github.com/jfogelman) +* [ciscowebex] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) +* [Gettr] Add extractor by [i6t](https://github.com/i6t) +* [GoPro] Add extractor by [i6t](https://github.com/i6t) +* [N1] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [Theta] Add video extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [Veo] Add extractor by [i6t](https://github.com/i6t) +* [Vupload] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [bbc] Extract better quality videos by [ajj8](https://github.com/ajj8) +* [Bilibili] Add subtitle converter by [u-spec-png](https://github.com/u-spec-png) +* [CBC] Cleanup tests by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [Douyin] Rewrite extractor by [MinePlayersPE](https://github.com/MinePlayersPE) +* [Funimation] Fix for /v/ urls by [pukkandan](https://github.com/pukkandan), [Jules-A](https://github.com/Jules-A) +* [Funimation] Sort formats according to the relevant extractor-args +* [Hidive] Fix duplicate and incorrect formats +* [HotStarSeries] Fix cookies by [Ashish0804](https://github.com/Ashish0804) +* [LinkedInLearning] Add subtitles by [Ashish0804](https://github.com/Ashish0804) +* [Mediaite] Relax valid url by [coletdjnz](https://github.com/coletdjnz) +* [Newgrounds] Add age_limit and fix duration by [u-spec-png](https://github.com/u-spec-png) +* [Newgrounds] Fix view count on songs by [u-spec-png](https://github.com/u-spec-png) +* [parliamentlive.tv] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [PolskieRadio] Fix extractors by [jakubadamw](https://github.com/jakubadamw), [u-spec-png](https://github.com/u-spec-png) +* [reddit] Add embedded url by [u-spec-png](https://github.com/u-spec-png) +* [reddit] Fix 429 by generating a random `reddit_session` by [AjaxGb](https://github.com/AjaxGb) +* [Rumble] Add RumbleChannelIE by [Ashish0804](https://github.com/Ashish0804) +* [soundcloud:playlist] Detect last page correctly +* [SovietsCloset] Add duration from m3u8 by [ChillingPepper](https://github.com/ChillingPepper) +* [Streamable] Add codecs by [u-spec-png](https://github.com/u-spec-png) +* [vidme] Remove extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [youtube:tab] Fallback to API when webpage fails to download by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix non-fatal errors in fetching player +* Fix `--flat-playlist` when neither IE nor id is known +* Fix `-f mp4` behaving differently from youtube-dl +* Workaround for bug in `ssl.SSLContext.load_default_certs` +* [aes] Improve performance slightly by [sulyi](https://github.com/sulyi) +* [cookies] Fix keyring fallback by [mbway](https://github.com/mbway) +* [embedsubtitle] Fix error when duration is unknown +* [ffmpeg] Fix error when subtitle file is missing +* [ffmpeg] Set max probesize to workaround AAC HLS stream issues by [shirt](https://github.com/shirt-dev) +* [FixupM3u8] Remove redundant run if merged is needed +* [hls] Fix decryption issues by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) +* [http] Respect user-provided chunk size over extractor's +* [utils] Let traverse_obj accept functions as keys +* [docs] Add note about our custom ffmpeg builds +* [docs] Write embedding and contributing documentation by [pukkandan](https://github.com/pukkandan), [timethrow](https://github.com/timethrow) +* [update] Check for new version even if not updateable +* [build] Add more files to the tarball +* [build] Allow building with py2exe (and misc fixes) +* [build] Use pycryptodomex by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) +* [cleanup] Some minor refactoring, improve docs and misc cleanup + + ### 2021.09.25 * Add new option `--netrc-location` diff --git a/README.md b/README.md index c0f84fcac..56755f00b 100644 --- a/README.md +++ b/README.md @@ -92,9 +92,9 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari +* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU +* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details diff --git a/supportedsites.md b/supportedsites.md index e883351a9..3fe79683a 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -199,6 +199,7 @@ - **Cinemax** - **CiscoLiveSearch** - **CiscoLiveSession** + - **ciscowebex**: Cisco Webex - **CJSW** - **cliphunter** - **Clippit** @@ -379,6 +380,7 @@ - **gem.cbc.ca:live** - **gem.cbc.ca:playlist** - **generic**: Generic downloader that works on some sites + - **Gettr** - **Gfycat** - **GiantBomb** - **Giga** @@ -392,6 +394,7 @@ - **google:podcasts** - **google:podcasts:feed** - **GoogleDrive** + - **GoPro** - **Goshgay** - **GoToStage** - **GPUTechConf** @@ -630,6 +633,8 @@ - **MyviEmbed** - **MyVisionTV** - **n-tv.de** + - **N1Info:article** + - **N1InfoAsset** - **natgeo:video** - **NationalGeographicTV** - **Naver** @@ -905,6 +910,7 @@ - **RTVNH** - **RTVS** - **RUHD** + - **RumbleChannel** - **RumbleEmbed** - **rutube**: Rutube videos - **rutube:channel**: Rutube channels @@ -1065,7 +1071,8 @@ - **TheScene** - **TheStar** - **TheSun** - - **Theta** + - **ThetaStream** + - **ThetaVideo** - **TheWeatherChannel** - **ThisAmericanLife** - **ThisAV** @@ -1174,6 +1181,7 @@ - **Varzesh3** - **Vbox7** - **VeeHD** + - **Veo** - **Veoh** - **Vesti**: Вести.Ru - **Vevo** @@ -1202,9 +1210,6 @@ - **VidioLive** - **VidioPremier** - **VidLii** - - **vidme** - - **vidme:user** - - **vidme:user:likes** - **vier**: vier.be and vijf.be - **vier:videos** - **viewlift** @@ -1256,6 +1261,7 @@ - **VTXTV** - **vube**: Vube.com - **VuClip** + - **Vupload** - **VVVVID** - **VVVVIDShow** - **VyboryMos** From a170527e1fc382dd7be214c5134f5013a5f0747f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 19:11:24 +0000 Subject: [PATCH 0032/2552] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- yt_dlp/version.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 8a5503510..d07c33e11 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -47,7 +47,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 38b38c803..dd9bc1faa 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -21,14 +21,14 @@ assignees: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I have given an appropriate title to the issue diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index b2f7efcda..8981eca0e 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped @@ -48,7 +48,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I have given an appropriate title to the issue diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 965a89b88..34b6e9a5d 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.09.25' +__version__ = '2021.10.09' From aa9a92fdbbca172689495f2990af6a135bae90d5 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 02:23:42 +0530 Subject: [PATCH 0033/2552] [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor` When `FFmpegFD` initializes the PP, it passes `self` as the `downloader` But it does not have a `_postprocessor_hooks` attribute Closes #1211 --- yt_dlp/postprocessor/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index 376a1c95e..d2daeb0fb 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -90,9 +90,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass): def set_downloader(self, downloader): """Sets the downloader for this PP.""" self._downloader = downloader - if not downloader: - return - for ph in downloader._postprocessor_hooks: + for ph in getattr(downloader, '_postprocessor_hooks', []): self.add_progress_hook(ph) @staticmethod From 28fe35b4e3da41ec78a092d06ad76f5ff67c12e8 Mon Sep 17 00:00:00 2001 From: Felix S Date: Sat, 9 Oct 2021 21:50:17 +0000 Subject: [PATCH 0034/2552] [francetv] Update extractor (#1096) Original PR: https://github.com/ytdl-org/youtube-dl/pull/29996 Closes: https://github.com/yt-dlp/yt-dlp/issues/970, https://github.com/ytdl-org/youtube-dl/issues/29956, https://github.com/ytdl-org/youtube-dl/issues/29957, https://github.com/ytdl-org/youtube-dl/issues/29969, https://github.com/ytdl-org/youtube-dl/issues/29990, https://github.com/ytdl-org/youtube-dl/issues/30010 Authored by: fstirlitz, sarnoud --- yt_dlp/extractor/extractors.py | 5 - yt_dlp/extractor/francetv.py | 351 +++++++++------------------------ 2 files changed, 89 insertions(+), 267 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a224c4f9a..d8e3cd738 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -474,12 +474,7 @@ from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, FranceTVSiteIE, - FranceTVEmbedIE, FranceTVInfoIE, - FranceTVInfoSportIE, - FranceTVJeunesseIE, - GenerationWhatIE, - CultureboxIE, ) from .freesound import FreesoundIE from .freespeech import FreespeechIE diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 41910cefb..3bbab69e6 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -4,19 +4,12 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( - clean_html, determine_ext, ExtractorError, - int_or_none, - parse_duration, + format_field, + parse_iso8601, parse_qs, - try_get, - url_or_none, - urljoin, ) from .dailymotion import DailymotionIE @@ -89,97 +82,81 @@ class FranceTVIE(InfoExtractor): # Videos are identified by idDiffusion so catalogue part is optional. # However when provided, some extra formats may be returned so we pass # it if available. - info = self._download_json( - 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/', - video_id, 'Downloading video JSON', query={ - 'idDiffusion': video_id, - 'catalogue': catalogue or '', - }) - - if info.get('status') == 'NOK': - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, info['message']), - expected=True) - allowed_countries = info['videos'][0].get('geoblocage') - if allowed_countries: - georestricted = True - geo_info = self._download_json( - 'http://geo.francetv.fr/ws/edgescape.json', video_id, - 'Downloading geo restriction info') - country = geo_info['reponse']['geo_info']['country_code'] - if country not in allowed_countries: - raise ExtractorError( - 'The video is not available from your location', - expected=True) - else: - georestricted = False - - def sign(manifest_url, manifest_id): - for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'): - signed_url = url_or_none(self._download_webpage( - 'https://%s/esi/TA' % host, video_id, - 'Downloading signed %s manifest URL' % manifest_id, - fatal=False, query={ - 'url': manifest_url, - })) - if signed_url: - return signed_url - return manifest_url - is_live = None - videos = [] - - for video in (info.get('videos') or []): - if video.get('statut') != 'ONLINE': - continue - if not video.get('url'): + title = None + subtitle = None + image = None + duration = None + timestamp = None + spritesheets = None + + for device_type in ('desktop', 'mobile'): + dinfo = self._download_json( + 'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id, + video_id, 'Downloading %s video JSON' % device_type, query={ + 'device_type': device_type, + 'browser': 'chrome', + }, fatal=False) + + if not dinfo: continue - videos.append(video) - if not videos: - for device_type in ['desktop', 'mobile']: - fallback_info = self._download_json( - 'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id, - video_id, 'Downloading fallback %s video JSON' % device_type, query={ - 'device_type': device_type, - 'browser': 'chrome', - }, fatal=False) - - if fallback_info and fallback_info.get('video'): - videos.append(fallback_info['video']) + video = dinfo.get('video') + if video: + videos.append(video) + if duration is None: + duration = video.get('duration') + if is_live is None: + is_live = video.get('is_live') + if spritesheets is None: + spritesheets = video.get('spritesheets') + + meta = dinfo.get('meta') + if meta: + if title is None: + title = meta.get('title') + # XXX: what is meta['pre_title']? + if subtitle is None: + subtitle = meta.get('additional_title') + if image is None: + image = meta.get('image_url') + if timestamp is None: + timestamp = parse_iso8601(meta.get('broadcasted_at')) formats = [] subtitles = {} for video in videos: - video_url = video.get('url') - if not video_url: - continue - if is_live is None: - is_live = (try_get( - video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True - or video.get('is_live') is True - or '/live.francetv.fr/' in video_url) format_id = video.get('format') + + video_url = None + if video.get('workflow') == 'token-akamai': + token_url = video.get('token') + if token_url: + token_json = self._download_json( + token_url, video_id, + 'Downloading signed %s manifest URL' % format_id) + if token_json: + video_url = token_json.get('url') + if not video_url: + video_url = video.get('url') + ext = determine_ext(video_url) if ext == 'f4m': - if georestricted: - # See https://github.com/ytdl-org/youtube-dl/issues/3963 - # m3u8 urls work fine - continue formats.extend(self._extract_f4m_formats( - sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', - video_id, f4m_id=format_id, fatal=False)) + video_url, video_id, f4m_id=format_id, fatal=False)) elif ext == 'm3u8': - m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( - sign(video_url, format_id), video_id, 'mp4', + fmts, subs = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) - formats.extend(m3u8_fmts) - subtitles = self._merge_subtitles(subtitles, m3u8_subs) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False)) + fmts, subs = self._extract_mpd_formats_and_subtitles( + video_url, video_id, mpd_id=format_id, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif video_url.startswith('rtmp'): formats.append({ 'url': video_url, @@ -193,28 +170,43 @@ class FranceTVIE(InfoExtractor): 'format_id': format_id, }) + # XXX: what is video['captions']? + + for f in formats: + if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'): + f['language_preference'] = -10 + f['format_note'] = 'audio description%s' % format_field(f, 'format_note', ', %s') + + if spritesheets: + formats.append({ + 'format_id': 'spritesheets', + 'format_note': 'storyboard', + 'acodec': 'none', + 'vcodec': 'none', + 'ext': 'mhtml', + 'protocol': 'mhtml', + 'url': 'about:dummy', + 'fragments': [{ + 'path': sheet, + # XXX: not entirely accurate; each spritesheet seems to be + # a 10×10 grid of thumbnails corresponding to approximately + # 2 seconds of the video; the last spritesheet may be shorter + 'duration': 200, + } for sheet in spritesheets] + }) + self._sort_formats(formats) - title = info['titre'] - subtitle = info.get('sous_titre') if subtitle: title += ' - %s' % subtitle title = title.strip() - subtitles.setdefault('fr', []).extend( - [{ - 'url': subformat['url'], - 'ext': subformat.get('format'), - } for subformat in info.get('subtitles', []) if subformat.get('url')] - ) - return { 'id': video_id, 'title': self._live_title(title) if is_live else title, - 'description': clean_html(info.get('synopsis')), - 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')), - 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')), - 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])), + 'thumbnail': image, + 'duration': duration, + 'timestamp': timestamp, 'is_live': is_live, 'formats': formats, 'subtitles': subtitles, @@ -308,35 +300,6 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): return self._make_url_result(video_id, catalogue) -class FranceTVEmbedIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P[^&]+)' - - _TESTS = [{ - 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961', - 'info_dict': { - 'id': 'NI_983319', - 'ext': 'mp4', - 'title': 'Le Pen Reims', - 'upload_date': '20170505', - 'timestamp': 1493981780, - 'duration': 16, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id, - video_id) - - return self._make_url_result(video['video_id'], video.get('catalog')) - - class FranceTVInfoIE(FranceTVBaseInfoExtractor): IE_NAME = 'francetvinfo.fr' _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&.]+)' @@ -426,139 +389,3 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): webpage, 'video id') return self._make_url_result(video_id) - - -class FranceTVInfoSportIE(FranceTVBaseInfoExtractor): - IE_NAME = 'sport.francetvinfo.fr' - _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018', - 'info_dict': { - 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea', - 'ext': 'mp4', - 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018', - 'timestamp': 1523639962, - 'upload_date': '20180413', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id') - return self._make_url_result(video_id, 'Sport-web') - - -class GenerationWhatIE(InfoExtractor): - IE_NAME = 'france2.fr:generation-what' - _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms', - 'info_dict': { - 'id': 'wtvKYUG45iw', - 'ext': 'mp4', - 'title': 'Generation What - Garde à vous - FRA', - 'uploader': 'Generation What', - 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w', - 'upload_date': '20160411', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://generation-what.francetv.fr/europe/video/present-arms', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - youtube_id = self._search_regex( - r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';", - webpage, 'youtube id') - - return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id) - - -class CultureboxIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689', - 'info_dict': { - 'id': 'EV_134885', - 'ext': 'mp4', - 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7', - 'description': 'md5:19c44af004b88219f4daa50fa9a351d4', - 'upload_date': '20180206', - 'timestamp': 1517945220, - 'duration': 5981, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - if ">Ce live n'est plus disponible en replay<" in webpage: - raise ExtractorError( - 'Video %s is not available' % display_id, expected=True) - - video_id, catalogue = self._search_regex( - r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]', - webpage, 'video id').split('@') - - return self._make_url_result(video_id, catalogue) - - -class FranceTVJeunesseIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P[^/?#&]+))' - - _TESTS = [{ - 'url': 'https://www.zouzous.fr/heros/simon', - 'info_dict': { - 'id': 'simon', - }, - 'playlist_count': 9, - }, { - 'url': 'https://www.ludo.fr/heros/ninjago', - 'info_dict': { - 'id': 'ninjago', - }, - 'playlist_count': 10, - }, { - 'url': 'https://www.zouzous.fr/heros/simon?abc', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - playlist_id = mobj.group('id') - - playlist = self._download_json( - '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id) - - if not playlist.get('count'): - raise ExtractorError( - '%s is not available' % playlist_id, expected=True) - - entries = [] - for item in playlist['items']: - identity = item.get('identity') - if identity and isinstance(identity, compat_str): - entries.append(self._make_url_result(identity)) - - return self.playlist_result(entries, playlist_id) From 91b6c884c9c02a2a8ffe247131d05e8e8a6021a4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 02:56:30 +0530 Subject: [PATCH 0035/2552] Revert "[ffmpeg] Set max probesize to workaround AAC HLS stream issues (#1109)" This reverts commit 250a938de82fb6b023c09ce3d89471c5871ff830. This is no longer necessary since 7687c8ac6e223a725b3ef8f56f04779bebdc86c5 --- yt_dlp/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 6bb66569a..5f6861f93 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -262,7 +262,7 @@ class FFmpegPostProcessor(PostProcessor): oldest_mtime = min( os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path) - cmd = [encodeFilename(self.executable, True), encodeArgument('-y'), encodeArgument('-probesize'), encodeArgument('max')] + cmd = [encodeFilename(self.executable, True), encodeArgument('-y')] # avconv does not have repeat option if self.basename == 'ffmpeg': cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] From c9652aa4185afa1c93aeba4e0b06a14b9bb78b5c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 03:23:47 +0530 Subject: [PATCH 0036/2552] [docs] Remove incorrect dependency on VC++10 Closes #1163 --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 56755f00b..8acb57484 100644 --- a/README.md +++ b/README.md @@ -193,8 +193,10 @@ If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` ### DEPENDENCIES Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. - + While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) From 2e01ba62181fee12bf44b8f3f6cb0f46cd591e61 Mon Sep 17 00:00:00 2001 From: Bojidar Qnkov <41879217+Bojidarist@users.noreply.github.com> Date: Sun, 10 Oct 2021 03:11:10 +0300 Subject: [PATCH 0037/2552] [NovaPlay] Add extractor (#1209) Authored by: Bojidarist --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/novaplay.py | 63 ++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 yt_dlp/extractor/novaplay.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d8e3cd738..0a761135e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -933,6 +933,7 @@ from .nova import ( NovaEmbedIE, NovaIE, ) +from .novaplay import NovaPlayIE from .nowness import ( NownessIE, NownessPlaylistIE, diff --git a/yt_dlp/extractor/novaplay.py b/yt_dlp/extractor/novaplay.py new file mode 100644 index 000000000..724986a06 --- /dev/null +++ b/yt_dlp/extractor/novaplay.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import int_or_none, parse_duration, parse_iso8601 + + +class NovaPlayIE(InfoExtractor): + _VALID_URL = r'https://play.nova\.bg/video/.*/(?P\d+)' + _TESTS = [ + { + 'url': 'https://play.nova.bg/video/bratya/season-3/bratq-2021-10-08/548677', + 'md5': 'b1127a84e61bed1632b7c2ca9cbb4153', + 'info_dict': { + 'id': '548677', + 'ext': 'mp4', + 'title': 'Братя', + 'alt_title': 'bratya/season-3/bratq-2021-10-08', + 'duration': 1603.0, + 'timestamp': 1633724150, + 'upload_date': '20211008', + 'thumbnail': 'https://nbg-img.fite.tv/img/548677_460x260.jpg', + 'description': 'Сезон 3 Епизод 25' + }, + }, + { + 'url': 'https://play.nova.bg/video/igri-na-volqta/season-3/igri-na-volqta-2021-09-20-1/548227', + 'md5': '5fd61b8ecbe582fc021019d570965d58', + 'info_dict': { + 'id': '548227', + 'ext': 'mp4', + 'title': 'Игри на волята: България (20.09.2021) - част 1', + 'alt_title': 'gri-na-volqta/season-3/igri-na-volqta-2021-09-20-1', + 'duration': 4060.0, + 'timestamp': 1632167564, + 'upload_date': '20210920', + 'thumbnail': 'https://nbg-img.fite.tv/img/548227_460x260.jpg', + 'description': 'Сезон 3 Епизод 13' + }, + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_props = self._parse_json(self._search_regex( + r'({.+})', + webpage, 'video_props'), video_id)['props']['pageProps']['video'] + m3u8_url = self._download_json( + f'https://nbg-api.fite.tv/api/v2/videos/{video_id}/streams', + video_id, headers={'x-flipps-user-agent': 'Flipps/75/9.7'})[0]['url'] + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_props['title'], + 'alt_title': video_props.get('slug'), + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + 'formats': formats, + 'duration': parse_duration(video_props['duration']), + 'timestamp': parse_iso8601(video_props['published_at']), + 'view_count': int_or_none(video_props['view_count']), + } From d1d5c08f29b3b1d60d8b11b812029757fe3fd90a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 07:08:22 +0530 Subject: [PATCH 0038/2552] [minicurses] Fix when printing to file Closes #1215 --- yt_dlp/YoutubeDL.py | 4 ++++ yt_dlp/downloader/common.py | 5 ++--- yt_dlp/minicurses.py | 42 +++++++++++++++++++------------------ yt_dlp/utils.py | 2 +- 4 files changed, 29 insertions(+), 24 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2b3c33ce5..49d6b3779 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -514,6 +514,7 @@ class YoutubeDL(object): self.cache = Cache(self) windows_enable_vt_mode() + # FIXME: This will break if we ever print color to stdout self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file) if sys.version_info < (3, 6): @@ -3298,6 +3299,9 @@ class YoutubeDL(object): KEYRING_AVAILABLE and 'keyring', )))) or 'none' self._write_string('[debug] Optional libraries: %s\n' % lib_str) + self._write_string('[debug] ANSI escape support: stdout = %s, stderr = %s\n' % ( + supports_terminal_sequences(self._screen_file), + supports_terminal_sequences(self._err_file))) proxy_map = {} for handler in self._opener.handlers: diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 50e674829..89cdffd24 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -3,7 +3,6 @@ from __future__ import division, unicode_literals import copy import os import re -import sys import time import random @@ -247,9 +246,9 @@ class FileDownloader(object): elif self.ydl.params.get('logger'): self._multiline = MultilineLogger(self.ydl.params['logger'], lines) elif self.params.get('progress_with_newline'): - self._multiline = BreaklineStatusPrinter(sys.stderr, lines) + self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) else: - self._multiline = MultilinePrinter(sys.stderr, lines, not self.params.get('quiet')) + self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) def _finish_multiline_status(self): self._multiline.end() diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index 0e37ed818..a6e159a14 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,6 +1,6 @@ import functools from threading import Lock -from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES +from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES, write_string class MultilinePrinterBase: @@ -25,20 +25,26 @@ class MultilinePrinterBase: return f'{line + 1}: {text}' return text + def write(self, *text): + write_string(''.join(text), self.stream) + class QuietMultilinePrinter(MultilinePrinterBase): pass class MultilineLogger(MultilinePrinterBase): + def write(self, *text): + self.stream.debug(''.join(text)) + def print_at_line(self, text, pos): # stream is the logger object, not an actual stream - self.stream.debug(self._add_line_number(text, pos)) + self.write(self._add_line_number(text, pos)) class BreaklineStatusPrinter(MultilinePrinterBase): def print_at_line(self, text, pos): - self.stream.write(self._add_line_number(text, pos) + '\n') + self.write(self._add_line_number(text, pos), '\n') class MultilinePrinter(MultilinePrinterBase): @@ -58,50 +64,46 @@ class MultilinePrinter(MultilinePrinterBase): def _move_cursor(self, dest): current = min(self._lastline, self.maximum) - self.stream.write('\r') + yield '\r' distance = dest - current if distance < 0: - self.stream.write(TERMINAL_SEQUENCES['UP'] * -distance) + yield TERMINAL_SEQUENCES['UP'] * -distance elif distance > 0: - self.stream.write(TERMINAL_SEQUENCES['DOWN'] * distance) + yield TERMINAL_SEQUENCES['DOWN'] * distance self._lastline = dest @lock def print_at_line(self, text, pos): if self._HAVE_FULLCAP: - self._move_cursor(pos) - self.stream.write(TERMINAL_SEQUENCES['ERASE_LINE']) - self.stream.write(text) - return + self.write(*self._move_cursor(pos), TERMINAL_SEQUENCES['ERASE_LINE'], text) text = self._add_line_number(text, pos) textlen = len(text) if self._lastline == pos: # move cursor at the start of progress when writing to same line - self.stream.write('\r') + prefix = '\r' if self._lastlength > textlen: text += ' ' * (self._lastlength - textlen) self._lastlength = textlen else: # otherwise, break the line - self.stream.write('\n') + prefix = '\n' self._lastlength = textlen - self.stream.write(text) + self.write(prefix, text) self._lastline = pos @lock def end(self): # move cursor to the end of the last line, and write line break # so that other to_screen calls can precede - if self._HAVE_FULLCAP: - self._move_cursor(self.maximum) + text = self._move_cursor(self.maximum) if self._HAVE_FULLCAP else [] if self.preserve_output: - self.stream.write('\n') + self.write(*text, '\n') return if self._HAVE_FULLCAP: - self.stream.write( - TERMINAL_SEQUENCES['ERASE_LINE'] - + f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum) + self.write( + *text, TERMINAL_SEQUENCES['ERASE_LINE'], + f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum) else: - self.stream.write(' ' * self._lastlength) + self.write(*text, ' ' * self._lastlength) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index db9b9de94..8e5c08ce5 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6458,7 +6458,7 @@ def jwt_encode_hs256(payload_data, key, headers={}): def supports_terminal_sequences(stream): if compat_os_name == 'nt': - if get_windows_version() < (10, ): + if get_windows_version() < (10, 0, 10586): return False elif not os.getenv('TERM'): return False From 84999521c89a1146feaa0e58d735155df06a6fe5 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 07:19:06 +0530 Subject: [PATCH 0039/2552] [build] Allow to release without changelog so that forks can build using GHA easily --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 324cf7eb6..5717ce8ee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -47,7 +47,7 @@ jobs: - name: Get Changelog id: get_changelog run: | - changelog=$(cat Changelog.md | grep -oPz '(?s)(?<=### ${{ steps.bump_version.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)') + changelog=$(cat Changelog.md | grep -oPz '(?s)(?<=### ${{ steps.bump_version.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)') || true echo "changelog<> $GITHUB_ENV echo "$changelog" >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV From 21186af70a8809f59ad39d1d01f63203ce74da3b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 09:28:43 +0530 Subject: [PATCH 0040/2552] [downloader] Fix throttledratelimit The timer should not reset at start of each block --- yt_dlp/downloader/http.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 704ae6f5a..3bc41e5b2 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -58,6 +58,7 @@ class HttpFD(FileDownloader): ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() ctx.chunk_size = None + throttle_start = None if self.params.get('continuedl', True): # Establish possible resume length @@ -197,6 +198,7 @@ class HttpFD(FileDownloader): raise RetryDownload(err) def download(): + nonlocal throttle_start data_len = ctx.data.info().get('Content-length', None) # Range HTTP header may be ignored/unsupported by a webserver @@ -225,7 +227,6 @@ class HttpFD(FileDownloader): # measure time over whole while-loop, so slow_down() and best_block_size() work together properly now = None # needed for slow_down() in the first loop run before = start # start measuring - throttle_start = None def retry(e): to_stdout = ctx.tmpfilename == '-' @@ -326,7 +327,7 @@ class HttpFD(FileDownloader): if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() raise ThrottledDownload() - else: + elif speed: throttle_start = None if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: From dec0d56fa9bee6a9c10ed33184a1a852e3d6180b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 04:59:54 +0530 Subject: [PATCH 0041/2552] Release 2021.10.10 --- CONTRIBUTORS | 2 ++ Changelog.md | 11 +++++++++++ README.md | 4 ++-- supportedsites.md | 6 +----- yt_dlp/extractor/trovolive.py | 0 5 files changed, 16 insertions(+), 7 deletions(-) delete mode 100644 yt_dlp/extractor/trovolive.py diff --git a/CONTRIBUTORS b/CONTRIBUTORS index a535411c6..048d98852 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -123,3 +123,5 @@ ajj8 jakubadamw jfogelman timethrow +sarnoud +Bojidarist diff --git a/Changelog.md b/Changelog.md index 2350f67ad..2e6da33fb 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,17 @@ --> +### 2021.10.10 + +* [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor` +* [minicurses] Fix when printing to file +* [downloader] Fix throttledratelimit +* [francetv] Fix extractor by [fstirlitz](https://github.com/fstirlitz), [sarnoud](https://github.com/sarnoud) +* [NovaPlay] Add extractor by [Bojidarist](https://github.com/Bojidarist) +* [ffmpeg] Revert "Set max probesize" - No longer needed +* [docs] Remove incorrect dependency on VC++10 +* [build] Allow to release without changelog + ### 2021.10.09 * Improved progress reporting diff --git a/README.md b/README.md index 8acb57484..1a46b25f4 100644 --- a/README.md +++ b/README.md @@ -92,9 +92,9 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload +* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload, NovaPlay -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme +* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme, francetv * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details diff --git a/supportedsites.md b/supportedsites.md index 3fe79683a..02be6b918 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -232,7 +232,6 @@ - **CTV** - **CTVNews** - **cu.ntv.co.jp**: Nippon Television Network - - **Culturebox** - **CultureUnplugged** - **curiositystream** - **curiositystream:collection** @@ -346,13 +345,10 @@ - **foxnews**: Fox News and Fox Business Video - **foxnews:article** - **FoxSports** - - **france2.fr:generation-what** - **FranceCulture** - **FranceInter** - **FranceTV** - - **FranceTVEmbed** - **francetvinfo.fr** - - **FranceTVJeunesse** - **FranceTVSite** - **Freesound** - **freespeech.org** @@ -705,6 +701,7 @@ - **NosVideo** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** + - **NovaPlay** - **nowness** - **nowness:playlist** - **nowness:series** @@ -991,7 +988,6 @@ - **SpankBangPlaylist** - **Spankwire** - **Spiegel** - - **sport.francetvinfo.fr** - **Sport5** - **SportBox** - **SportDeutschland** diff --git a/yt_dlp/extractor/trovolive.py b/yt_dlp/extractor/trovolive.py deleted file mode 100644 index e69de29bb..000000000 From 8063de51097a7af719c7a8c95b8f7c097573046f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 04:03:13 +0000 Subject: [PATCH 0042/2552] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- yt_dlp/version.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d07c33e11..157eca91b 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -47,7 +47,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index dd9bc1faa..54536fce6 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -21,14 +21,14 @@ assignees: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I have given an appropriate title to the issue diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8981eca0e..6413e8b7e 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped @@ -48,7 +48,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I have given an appropriate title to the issue diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 34b6e9a5d..83b6fea9f 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.10.09' +__version__ = '2021.10.10' From e8f726a57fe144cb5a6f548e4654944ac2b2aa50 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 11:06:23 +0530 Subject: [PATCH 0043/2552] [hidive] Fix typo in b5ae35ee6d3f913898770b8c74ee5f5e5cc33560 --- yt_dlp/extractor/hidive.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index ef1ca197e..15bd444f9 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -72,8 +72,9 @@ class HiDiveIE(InfoExtractor): parsed_urls.add(cc_url) subtitles.setdefault(cc_lang, []).append({'url': cc_url}) - def _get_subtitles(self, url, video_id, title, key, subtitles, parsed_urls): + def _get_subtitles(self, url, video_id, title, key, parsed_urls): webpage = self._download_webpage(url, video_id, fatal=False) or '' + subtitles = {} for caption in set(re.findall(r'data-captions=\"([^\"]+)\"', webpage)): renditions = self._call_api( video_id, title, key, {'Captions': caption}, fatal=False, @@ -93,7 +94,7 @@ class HiDiveIE(InfoExtractor): raise ExtractorError( '%s said: %s' % (self.IE_NAME, restriction), expected=True) - formats, parsed_urls = [], {}, {None} + formats, parsed_urls = [], {None} for rendition_id, rendition in settings['renditions'].items(): audio, version, extra = rendition_id.split('_') m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) From 2c4bba96acb64e23470ccae804c659b56ebb93b5 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Mon, 11 Oct 2021 03:36:27 +0530 Subject: [PATCH 0044/2552] [EUScreen] Add Extractor (#1219) Closes #1207 Authored by: Ashish0804 --- yt_dlp/extractor/euscreen.py | 64 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 65 insertions(+) create mode 100644 yt_dlp/extractor/euscreen.py diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py new file mode 100644 index 000000000..3980c2349 --- /dev/null +++ b/yt_dlp/extractor/euscreen.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + parse_duration, + js_to_json, +) + + +class EUScreenIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P[^&?$/]+)' + + _TESTS = [{ + 'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C', + 'info_dict': { + 'id': 'EUS_0EBCBF356BFC4E12A014023BA41BD98C', + 'ext': 'mp4', + 'title': "L'effondrement du stade du Heysel", + 'alt_title': 'Collapse of the Heysel Stadium', + 'duration': 318.0, + 'description': 'md5:f0ffffdfce6821139357a1b8359d6152', + 'series': 'JA2 DERNIERE', + 'episode': '-', + 'uploader': 'INA / France', + 'thumbnail': 'http://images3.noterik.com/domain/euscreenxl/user/eu_ina/video/EUS_0EBCBF356BFC4E12A014023BA41BD98C/image.jpg' + }, + 'params': {'skip_download': True} + }] + + _payload = b'-1Win32MozillaNetscape5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36true784758undefinedSat, 07 Oct 2021 08:56:50 GMT1633769810758' + + def _real_extract(self, url): + id = self._match_id(url) + args_for_js_request = self._download_webpage( + 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem', + id, data=self._payload, query={'actionlist': 'itempage', 'id': id}) + info_js = self._download_webpage( + 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem', + id, data=args_for_js_request.replace('screenid', 'screenId').encode()) + video_json = self._parse_json( + self._search_regex(r'setVideo\(({.+})\)\(\$end\$\)put', info_js, 'Video JSON'), + id, transform_source=js_to_json) + meta_json = self._parse_json( + self._search_regex(r'setData\(({.+})\)\(\$end\$\)', info_js, 'Metadata JSON'), + id, transform_source=js_to_json) + formats = [{ + 'url': source['src'], + } for source in video_json.get('sources', [])] + self._sort_formats(formats) + + return { + 'id': id, + 'title': meta_json.get('originalTitle'), + 'alt_title': meta_json.get('title'), + 'duration': parse_duration(meta_json.get('duration')), + 'description': '%s\n%s' % (meta_json.get('summaryOriginal', ''), meta_json.get('summaryEnglish', '')), + 'series': meta_json.get('series') or meta_json.get('seriesEnglish'), + 'episode': meta_json.get('episodeNumber'), + 'uploader': meta_json.get('provider'), + 'thumbnail': meta_json.get('screenshot') or video_json.get('screenshot'), + 'formats': formats, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0a761135e..adf54ca7e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -420,6 +420,7 @@ from .espn import ( ) from .esri import EsriVideoIE from .europa import EuropaIE +from .euscreen import EUScreenIE from .expotv import ExpoTVIE from .expressen import ExpressenIE from .extremetube import ExtremeTubeIE From 0481e266f590d835a010019a63b1821c24c8e178 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 09:49:51 +0530 Subject: [PATCH 0045/2552] [tiktok] Fix typo in 943d5ab13305b6a37424e6572d10f562384ada9a and update tests Closes #1226 --- yt_dlp/extractor/tiktok.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index fc0915fb0..1db6327e2 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -208,7 +208,7 @@ class TikTokBaseIE(InfoExtractor): 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000) } - def _parse_aweme_video_web(self, aweme_detail, webpage, url): + def _parse_aweme_video_web(self, aweme_detail, webpage_url): video_info = aweme_detail['video'] author_info = traverse_obj(aweme_detail, 'author', 'authorInfo', default={}) music_info = aweme_detail.get('music') or {} @@ -277,7 +277,7 @@ class TikTokBaseIE(InfoExtractor): 'thumbnails': thumbnails, 'description': str_or_none(aweme_detail.get('desc')), 'http_headers': { - 'Referer': url + 'Referer': webpage_url } } @@ -287,18 +287,18 @@ class TikTokIE(TikTokBaseIE): _TESTS = [{ 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', - 'md5': '34a7543afd5a151b0840ba6736fb633b', + 'md5': '736bb7a466c6f0a6afeb597da1e6f5b7', 'info_dict': { 'id': '6748451240264420610', 'ext': 'mp4', 'title': '#jassmanak #lehanga #leenabhushan', 'description': '#jassmanak #lehanga #leenabhushan', 'duration': 13, - 'height': 1280, - 'width': 720, + 'height': 1024, + 'width': 576, 'uploader': 'leenabhushan', 'uploader_id': '6691488002098119685', - 'uploader_url': 'https://www.tiktok.com/@leenabhushan', + 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA_Eb4t1vodM1IuTy_cvp9CY22RAb59xqrO0Xtz9CYQJvgXaDvZxYnZYRzDWhhgJmy', 'creator': 'facestoriesbyleenabh', 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'upload_date': '20191016', @@ -310,7 +310,7 @@ class TikTokIE(TikTokBaseIE): } }, { 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', - 'md5': '06b9800d47d5fe51a19e322dd86e61c9', + 'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b', 'info_dict': { 'id': '6742501081818877190', 'ext': 'mp4', @@ -321,7 +321,7 @@ class TikTokIE(TikTokBaseIE): 'width': 540, 'uploader': 'patrox', 'uploader_id': '18702747', - 'uploader_url': 'https://www.tiktok.com/@patrox', + 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', 'creator': 'patroX', 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'upload_date': '20190930', @@ -362,7 +362,7 @@ class TikTokIE(TikTokBaseIE): # Chech statusCode for success status = props_data.get('pageProps').get('statusCode') if status == 0: - return self._parse_aweme_video_web(props_data['pageProps']['itemInfo']['itemStruct'], webpage, url) + return self._parse_aweme_video_web(props_data['pageProps']['itemInfo']['itemStruct'], url) elif status == 10216: raise ExtractorError('This video is private', expected=True) @@ -377,13 +377,17 @@ class TikTokUserIE(TikTokBaseIE): 'playlist_mincount': 45, 'info_dict': { 'id': '6935371178089399301', + 'title': 'corgibobaa', }, + 'expected_warnings': ['Retrying'] }, { 'url': 'https://www.tiktok.com/@meme', 'playlist_mincount': 593, 'info_dict': { 'id': '79005827461758976', + 'title': 'meme', }, + 'expected_warnings': ['Retrying'] }] r''' # TODO: Fix by adding _signature to api_url @@ -430,7 +434,7 @@ class TikTokUserIE(TikTokBaseIE): break for video in post_list.get('aweme_list', []): yield { - **self._parse_aweme_video(video), + **self._parse_aweme_video_app(video), 'ie_key': TikTokIE.ie_key(), 'extractor': 'TikTok', } @@ -439,12 +443,12 @@ class TikTokUserIE(TikTokBaseIE): query['max_cursor'] = post_list['max_cursor'] def _real_extract(self, url): - user_id = self._match_id(url) - webpage = self._download_webpage(url, user_id, headers={ + user_name = self._match_id(url) + webpage = self._download_webpage(url, user_name, headers={ 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' }) - own_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID') - return self.playlist_result(self._entries_api(webpage, own_id, user_id), user_id) + user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID') + return self.playlist_result(self._entries_api(webpage, user_id, user_name), user_id, user_name) class DouyinIE(TikTokIE): @@ -556,4 +560,4 @@ class DouyinIE(TikTokIE): render_data = self._parse_json( render_data_json, video_id, transform_source=compat_urllib_parse_unquote) return self._parse_aweme_video_web( - traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), webpage, url) + traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url) From a169858f2409eefb66ac30085fddba81123f63b7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 03:59:55 +0530 Subject: [PATCH 0046/2552] Fix `check_formats` output being written to stdout when `-qv` Closes #1229 --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 49d6b3779..2730d2e19 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2485,7 +2485,7 @@ class YoutubeDL(object): verbose = self.params.get('verbose') params = { 'test': True, - 'quiet': not verbose, + 'quiet': self.params.get('quiet') or not verbose, 'verbose': verbose, 'noprogress': not verbose, 'nopart': True, From ed39cac53d0dcb51623918a9c8abdbe18b653459 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 04:00:52 +0530 Subject: [PATCH 0047/2552] Load archive only after printing verbose head If there is some issue in loading archive, the verbose head should be visible in the logs --- yt_dlp/YoutubeDL.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2730d2e19..59a3e3df1 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -601,24 +601,6 @@ class YoutubeDL(object): self._setup_opener() - def preload_download_archive(fn): - """Preload the archive, if any is specified""" - if fn is None: - return False - self.write_debug('Loading archive file %r\n' % fn) - try: - with locked_file(fn, 'r', encoding='utf-8') as archive_file: - for line in archive_file: - self.archive.add(line.strip()) - except IOError as ioe: - if ioe.errno != errno.ENOENT: - raise - return False - return True - - self.archive = set() - preload_download_archive(self.params.get('download_archive')) - if auto_init: self.print_debug_header() self.add_default_info_extractors() @@ -638,6 +620,24 @@ class YoutubeDL(object): register_socks_protocols() + def preload_download_archive(fn): + """Preload the archive, if any is specified""" + if fn is None: + return False + self.write_debug('Loading archive file %r\n' % fn) + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + self.archive.add(line.strip()) + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + return True + + self.archive = set() + preload_download_archive(self.params.get('download_archive')) + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ From e6faf2be366fcebb6147739363ebd1f690b961bf Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 09:55:30 +0530 Subject: [PATCH 0048/2552] [update] Clean up error reporting Closes #1224 --- yt_dlp/update.py | 135 ++++++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 61 deletions(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 4fbe7bd7e..26f18bdda 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -48,10 +48,10 @@ def detect_variant(): _NON_UPDATEABLE_REASONS = { 'exe': None, 'zip': None, - 'dir': 'Auto-update is not supported for unpackaged windows executable. Re-download the latest release', - 'py2exe': 'There is no official release for py2exe executable. Build it again with the latest source code', - 'source': 'You cannot update when running from source code', - 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball. Use that to update', + 'dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release', + 'py2exe': 'There is no official release for py2exe executable; Build it again with the latest source code', + 'source': 'You cannot update when running from source code; Use git to pull the latest changes', + 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball; Use that to update', } @@ -59,40 +59,6 @@ def is_non_updateable(): return _NON_UPDATEABLE_REASONS.get(detect_variant(), _NON_UPDATEABLE_REASONS['unknown']) -def update_self(to_screen, verbose, opener): - ''' Exists for backward compatibility. Use run_update(ydl) instead ''' - - printfn = to_screen - - class FakeYDL(): - _opener = opener - to_screen = printfn - - @staticmethod - def report_warning(msg, *args, **kwargs): - return printfn('WARNING: %s' % msg, *args, **kwargs) - - @staticmethod - def report_error(msg, tb=None): - printfn('ERROR: %s' % msg) - if not verbose: - return - if tb is None: - # Copied from YoutubeDl.trouble - if sys.exc_info()[0]: - tb = '' - if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: - tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) - tb += encode_compat_str(traceback.format_exc()) - else: - tb_data = traceback.format_list(traceback.extract_stack()) - tb = ''.join(tb_data) - if tb: - printfn(tb) - - return run_update(FakeYDL()) - - def run_update(ydl): """ Update the program file with the latest version from the repository @@ -101,10 +67,17 @@ def run_update(ydl): JSON_URL = 'https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest' - def report_error(msg, network=False, expected=False, delim=';'): - if network: - msg += '%s Visit https://github.com/yt-dlp/yt-dlp/releases/latest' % delim - ydl.report_error(msg, tb='' if network or expected else None) + def report_error(msg, expected=False): + ydl.report_error(msg, tb='' if expected else None) + + def report_unable(action, expected=False): + report_error(f'Unable to {action}', expected) + + def report_permission_error(file): + report_unable(f'write to {file}; Try running as administrator', True) + + def report_network_error(action, delim=';'): + report_unable(f'{action}{delim} Visit https://github.com/yt-dlp/yt-dlp/releases/latest', True) def calc_sha256sum(path): h = hashlib.sha256() @@ -120,7 +93,7 @@ def run_update(ydl): version_info = ydl._opener.open(JSON_URL).read().decode('utf-8') version_info = json.loads(version_info) except Exception: - return report_error('can\'t obtain versions info. Please try again later ', True, delim='or') + return report_network_error('obtain version info', delim='; Please try again later or') def version_tuple(version_str): return tuple(map(int, version_str.split('.'))) @@ -133,7 +106,7 @@ def run_update(ydl): err = is_non_updateable() if err: ydl.to_screen(f'Latest version: {version_id}, Current version: {__version__}') - return report_error(err, expected=True) + return report_error(err, True) # sys.executable is set to the full pathname of the exe-file for py2exe # though symlinks are not followed so that we need to do this manually @@ -163,55 +136,57 @@ def run_update(ydl): return dict(ln.split()[::-1] for ln in hash_data.splitlines()).get(filename) if not os.access(filename, os.W_OK): - return report_error('no write permissions on %s' % filename, expected=True) + return report_permission_error(filename) # PyInstaller if hasattr(sys, 'frozen'): exe = filename directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): - return report_error('no write permissions on %s' % directory, expected=True) + return report_permission_error(directory) try: if os.path.exists(filename + '.old'): os.remove(filename + '.old') except (IOError, OSError): - return report_error('unable to remove the old version') + return report_unable('remove the old version') try: arch = platform.architecture()[0][:2] url = get_bin_info('exe', arch).get('browser_download_url') if not url: - return report_error('unable to fetch updates', True) + return report_network_error('fetch updates') urlh = ydl._opener.open(url) newcontent = urlh.read() urlh.close() - except (IOError, OSError, StopIteration): - return report_error('unable to download latest version', True) + except (IOError, OSError): + return report_network_error('download latest version') + if not os.access(exe + '.new', os.W_OK): + return report_permission_error(f'{exe}.new') try: with open(exe + '.new', 'wb') as outf: outf.write(newcontent) except (IOError, OSError): - return report_error('unable to write the new version') + return report_unable('write the new version') expected_sum = get_sha256sum('exe', arch) if not expected_sum: ydl.report_warning('no hash information found for the release') elif calc_sha256sum(exe + '.new') != expected_sum: - report_error('unable to verify the new executable', True) + report_network_error('verify the new executable') try: os.remove(exe + '.new') except OSError: - return report_error('unable to remove corrupt download') + return report_unable('remove corrupt download') try: os.rename(exe, exe + '.old') except (IOError, OSError): - return report_error('unable to move current version') + return report_unable('move current version') try: os.rename(exe + '.new', exe) except (IOError, OSError): - report_error('unable to overwrite current version') + report_unable('overwrite current version') os.rename(exe + '.old', exe) return try: @@ -222,31 +197,31 @@ def run_update(ydl): ydl.to_screen('Updated yt-dlp to version %s' % version_id) return True # Exit app except OSError: - report_error('unable to delete old version') + report_unable('delete the old version') # Zip unix package elif isinstance(globals().get('__loader__'), zipimporter): try: url = get_bin_info('zip', '3').get('browser_download_url') if not url: - return report_error('unable to fetch updates', True) + return report_network_error('fetch updates') urlh = ydl._opener.open(url) newcontent = urlh.read() urlh.close() - except (IOError, OSError, StopIteration): - return report_error('unable to download latest version', True) + except (IOError, OSError): + return report_network_error('download the latest version') expected_sum = get_sha256sum('zip', '3') if not expected_sum: ydl.report_warning('no hash information found for the release') elif hashlib.sha256(newcontent).hexdigest() != expected_sum: - return report_error('unable to verify the new zip', True) + return report_network_error('verify the new zip') try: with open(filename, 'wb') as outf: outf.write(newcontent) except (IOError, OSError): - return report_error('unable to overwrite current version') + return report_unable('overwrite current version') ydl.to_screen('Updated yt-dlp to version %s; Restart yt-dlp to use the new version' % version_id) @@ -267,3 +242,41 @@ def print_notes(to_screen, versions, fromVersion=__version__): for note in notes: to_screen(note) ''' + + +def update_self(to_screen, verbose, opener): + ''' Exists for backward compatibility ''' + + printfn = to_screen + + printfn( + 'WARNING: "yt_dlp.update.update_self" is deprecated and may be removed in a future version. ' + 'Use "yt_dlp.update.run_update(ydl)" instead') + + class FakeYDL(): + _opener = opener + to_screen = printfn + + @staticmethod + def report_warning(msg, *args, **kwargs): + return printfn('WARNING: %s' % msg, *args, **kwargs) + + @staticmethod + def report_error(msg, tb=None): + printfn('ERROR: %s' % msg) + if not verbose: + return + if tb is None: + # Copied from YoutubeDl.trouble + if sys.exc_info()[0]: + tb = '' + if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: + tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) + tb += encode_compat_str(traceback.format_exc()) + else: + tb_data = traceback.format_list(traceback.extract_stack()) + tb = ''.join(tb_data) + if tb: + printfn(tb) + + return run_update(FakeYDL()) From ba107574128aa2bf9769819658931053449fecf9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 15:21:37 +0530 Subject: [PATCH 0049/2552] [extractor] Detect `EXT-X-KEY` Apple FairPlay --- yt_dlp/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 65444d3bf..af0f01f37 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2012,7 +2012,7 @@ class InfoExtractor(object): if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access return formats, subtitles - has_drm = re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc) + has_drm = re.search(r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', m3u8_doc) def format_url(url): return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url) From 9dda99f2fca7342c8f19150ac8730d67fceed42d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 15:27:00 +0530 Subject: [PATCH 0050/2552] [Merger] Do not add `aac_adtstoasc` to non-hls audio --- yt_dlp/postprocessor/ffmpeg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 5f6861f93..e6aa2940a 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -732,7 +732,8 @@ class FFmpegMergerPP(FFmpegPostProcessor): for (i, fmt) in enumerate(info['requested_formats']): if fmt.get('acodec') != 'none': args.extend(['-map', f'{i}:a:0']) - if self.get_audio_codec(fmt['filepath']) == 'aac': + aac_fixup = fmt['protocol'].startswith('m3u8') and self.get_audio_codec(fmt['filepath']) == 'aac' + if aac_fixup: args.extend([f'-bsf:a:{audio_streams}', 'aac_adtstoasc']) audio_streams += 1 if fmt.get('vcodec') != 'none': From a903d8285c96b2c7ac7915f228a17e84cbfe3ba4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 17:25:37 +0530 Subject: [PATCH 0051/2552] Fix bug in storyboards Caused by 9359f3d4f02856128f5626e754c7f64e2232b02f --- yt_dlp/YoutubeDL.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 59a3e3df1..8878d710f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3072,6 +3072,7 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): + is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none' if format.get('vcodec') == 'none' and format.get('acodec') != 'none': return 'audio only' if format.get('resolution') is not None: @@ -3082,11 +3083,11 @@ class YoutubeDL(object): res = '%sp' % format['height'] elif format.get('width'): res = '%dx?' % format['width'] + elif is_images: + return 'images' else: - res = default - if format.get('vcodec') == 'none' and format.get('acodec') == 'none': - res += ' (images)' - return res + return default + return f'{res} images' if is_images else res def _format_note(self, fdict): res = '' From cc16383ff36b3971064bae8106a45d38dbddc31b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 02:09:55 +0530 Subject: [PATCH 0052/2552] [extractor] Simplify search extractors --- yt_dlp/extractor/common.py | 10 +++++++++- yt_dlp/extractor/googlesearch.py | 28 ++++++---------------------- yt_dlp/extractor/niconico.py | 8 +++----- yt_dlp/extractor/soundcloud.py | 21 +++++---------------- yt_dlp/extractor/yahoo.py | 22 +++------------------- yt_dlp/extractor/youtube.py | 10 +--------- 6 files changed, 27 insertions(+), 72 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index af0f01f37..d02a808b6 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import base64 import datetime import hashlib +import itertools import json import netrc import os @@ -3617,7 +3618,14 @@ class SearchInfoExtractor(InfoExtractor): return self._get_n_results(query, n) def _get_n_results(self, query, n): - """Get a specified number of results for a query""" + """Get a specified number of results for a query. + Either this function or _search_results must be overridden by subclasses """ + return self.playlist_result( + itertools.islice(self._search_results(query), 0, None if n == float('inf') else n), + query, query) + + def _search_results(self, query): + """Returns an iterator of search results""" raise NotImplementedError('This method must be implemented by subclasses') @property diff --git a/yt_dlp/extractor/googlesearch.py b/yt_dlp/extractor/googlesearch.py index 5279fa807..f605c0c35 100644 --- a/yt_dlp/extractor/googlesearch.py +++ b/yt_dlp/extractor/googlesearch.py @@ -11,6 +11,7 @@ class GoogleSearchIE(SearchInfoExtractor): _MAX_RESULTS = 1000 IE_NAME = 'video.google:search' _SEARCH_KEY = 'gvsearch' + _WORKING = False _TEST = { 'url': 'gvsearch15:python language', 'info_dict': { @@ -20,16 +21,7 @@ class GoogleSearchIE(SearchInfoExtractor): 'playlist_count': 15, } - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - - entries = [] - res = { - '_type': 'playlist', - 'id': query, - 'title': query, - } - + def _search_results(self, query): for pagenum in itertools.count(): webpage = self._download_webpage( 'http://www.google.com/search', @@ -44,16 +36,8 @@ class GoogleSearchIE(SearchInfoExtractor): for hit_idx, mobj in enumerate(re.finditer( r'

= n) or not re.search(r'id="pnnext"', webpage): - res['entries'] = entries[:n] - return res + if not re.search(r'id="pnnext"', webpage): + return diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index f19afa485..76f087057 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -709,11 +709,9 @@ class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): _SEARCH_KEY = 'nicosearch' _TESTS = [] - def _get_n_results(self, query, n): - entries = self._entries(self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query) - if n < float('inf'): - entries = itertools.islice(entries, 0, n) - return self.playlist_result(entries, query, query) + def _search_results(self, query): + return self._entries( + self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query) class NicovideoSearchDateIE(NicovideoSearchIE): diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index ad3a32a02..e89383ff1 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -880,25 +880,14 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): }) next_url = update_url_query(self._API_V2_BASE + endpoint, query) - collected_results = 0 - for i in itertools.count(1): response = self._download_json( - next_url, collection_id, 'Downloading page {0}'.format(i), + next_url, collection_id, f'Downloading page {i}', 'Unable to download API page', headers=self._HEADERS) - collection = response.get('collection', []) - if not collection: - break - - collection = list(filter(bool, collection)) - collected_results += len(collection) - - for item in collection: - yield self.url_result(item['uri'], SoundcloudIE.ie_key()) - - if not collection or collected_results >= limit: - break + for item in response.get('collection') or []: + if item: + yield self.url_result(item['uri'], SoundcloudIE.ie_key()) next_url = response.get('next_href') if not next_url: @@ -906,4 +895,4 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): def _get_n_results(self, query, n): tracks = self._get_collection('search/tracks', query, limit=n, q=query) - return self.playlist_result(tracks, playlist_title=query) + return self.playlist_result(tracks, query, query) diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 741efefc8..53556de00 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -334,31 +334,15 @@ class YahooSearchIE(SearchInfoExtractor): IE_NAME = 'screen.yahoo:search' _SEARCH_KEY = 'yvsearch' - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - entries = [] + def _search_results(self, query): for pagenum in itertools.count(0): result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) info = self._download_json(result_url, query, note='Downloading results page ' + str(pagenum + 1)) - m = info['m'] - results = info['results'] - - for (i, r) in enumerate(results): - if (pagenum * 30) + i >= n: - break - mobj = re.search(r'(?Pscreen\.yahoo\.com/.*?-\d*?\.html)"', r) - e = self.url_result('http://' + mobj.group('url'), 'Yahoo') - entries.append(e) - if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)): + yield from (self.url_result(result['rurl']) for result in info['results']) + if info['m']['last'] >= info['m']['total'] - 1: break - return { - '_type': 'playlist', - 'id': query, - 'entries': entries, - } - class YahooGyaOPlayerIE(InfoExtractor): IE_NAME = 'yahoo:gyao:player' diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 97d02dc0b..41fd0aef7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4615,11 +4615,10 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): _SEARCH_PARAMS = None _TESTS = [] - def _entries(self, query, n): + def _search_results(self, query): data = {'query': query} if self._SEARCH_PARAMS: data['params'] = self._SEARCH_PARAMS - total = 0 continuation = {} for page_num in itertools.count(1): data.update(continuation) @@ -4662,17 +4661,10 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): continue yield self._extract_video(video) - total += 1 - if total == n: - return if not continuation: break - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - return self.playlist_result(self._entries(query, n), query, query) - class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' From a2160aa45f4019e02ced01c9030aa9519b40b24f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 12 Oct 2021 15:20:50 +0530 Subject: [PATCH 0053/2552] [extractor] Generalize `getcomments` implementation --- yt_dlp/extractor/bannedvideo.py | 17 +++++---------- yt_dlp/extractor/common.py | 26 +++++++++++++++++++++++ yt_dlp/extractor/youtube.py | 37 +++++++-------------------------- 3 files changed, 38 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 8f8f5ef5f..3db1151f6 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -97,21 +97,16 @@ query GetCommentReplies($id: String!) { 'query': self._GRAPHQL_QUERIES[operation] }).encode('utf8')).get('data') - def _extract_comments(self, video_id, comments, comment_data): + def _get_comments(self, video_id, comments, comment_data): + yield from comments for comment in comment_data.copy(): comment_id = comment.get('_id') if comment.get('replyCount') > 0: reply_json = self._call_api( video_id, comment_id, 'GetCommentReplies', f'Downloading replies for comment {comment_id}') - comments.extend( - self._parse_comment(reply, comment_id) - for reply in reply_json.get('getCommentReplies')) - - return { - 'comments': comments, - 'comment_count': len(comments), - } + for reply in reply_json.get('getCommentReplies'): + yield self._parse_comment(reply, comment_id) @staticmethod def _parse_comment(comment_data, parent): @@ -159,7 +154,5 @@ query GetCommentReplies($id: String!) { 'tags': [tag.get('name') for tag in video_info.get('tags')], 'availability': self._availability(is_unlisted=video_info.get('unlisted')), 'comments': comments, - '__post_extractor': ( - (lambda: self._extract_comments(video_id, comments, video_json.get('getVideoComments'))) - if self.get_param('getcomments') else None) + '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')) } diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d02a808b6..5b7b8891a 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3502,6 +3502,32 @@ class InfoExtractor(object): def _get_subtitles(self, *args, **kwargs): raise NotImplementedError('This method must be implemented by subclasses') + def extract_comments(self, *args, **kwargs): + if not self.get_param('getcomments'): + return None + generator = self._get_comments(*args, **kwargs) + + def extractor(): + comments = [] + try: + while True: + comments.append(next(generator)) + except KeyboardInterrupt: + interrupted = True + self.to_screen('Interrupted by user') + except StopIteration: + interrupted = False + comment_count = len(comments) + self.to_screen(f'Extracted {comment_count} comments') + return { + 'comments': comments, + 'comment_count': None if interrupted else comment_count + } + return extractor + + def _get_comments(self, *args, **kwargs): + raise NotImplementedError('This method must be implemented by subclasses') + @staticmethod def _merge_subtitle_items(subtitle_list1, subtitle_list2): """ Merge subtitle items for one language. Items with duplicated URLs diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 41fd0aef7..3e93c9934 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2241,7 +2241,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None): def extract_header(contents): - _total_comments = 0 _continuation = None for content in contents: comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer']) @@ -2251,7 +2250,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if expected_comment_count: comment_counts[1] = expected_comment_count self.to_screen('Downloading ~%d comments' % expected_comment_count) - _total_comments = comment_counts[1] sort_mode_str = self._configuration_arg('comment_sort', [''])[0] comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top @@ -2271,7 +2269,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sort_text = 'top comments' if comment_sort_index == 0 else 'newest first' self.to_screen('Sorting comments by %s' % sort_text) break - return _total_comments, _continuation + return _continuation def extract_thread(contents): if not parent: @@ -2359,9 +2357,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): lambda x: x['appendContinuationItemsAction']['continuationItems']), list) or [] if is_first_continuation: - total_comments, continuation = extract_header(continuation_items) - if total_comments: - yield total_comments + continuation = extract_header(continuation_items) is_first_continuation = False if continuation: break @@ -2389,9 +2385,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue if is_first_continuation: header_continuation_items = [continuation_renderer.get('header') or {}] - total_comments, continuation = extract_header(header_continuation_items) - if total_comments: - yield total_comments + continuation = extract_header(header_continuation_items) is_first_continuation = False if continuation: break @@ -2419,35 +2413,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): [bytes_to_intlist(base64.b64decode(part)) for part in parts])) return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8') - def _extract_comments(self, ytcfg, video_id, contents, webpage): + def _get_comments(self, ytcfg, video_id, contents, webpage): """Entry for comment extraction""" def _real_comment_extract(contents): yield from self._comment_entries( traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id) - comments = [] - estimated_total = 0 - max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf') + max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) # Force English regardless of account setting to prevent parsing issues # See: https://github.com/yt-dlp/yt-dlp/issues/532 ytcfg = copy.deepcopy(ytcfg) traverse_obj( ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en' - try: - for comment in _real_comment_extract(contents): - if len(comments) >= max_comments: - break - if isinstance(comment, int): - estimated_total = comment - continue - comments.append(comment) - except KeyboardInterrupt: - self.to_screen('Interrupted by user') - self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total)) - return { - 'comments': comments, - 'comment_count': len(comments), - } + return itertools.islice(_real_comment_extract(contents), 0, max_comments) @staticmethod def _get_checkok_params(): @@ -3209,8 +3187,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): needs_auth=info['age_limit'] >= 18, is_unlisted=None if is_private is None else is_unlisted) - if self.get_param('getcomments', False): - info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage) + info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage) self.mark_watched(video_id, player_responses) From e88d44c6ee66e2a1b814c2fe89fc53b3c3e029ef Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 02:14:23 +0530 Subject: [PATCH 0054/2552] [cleanup] Cleanup bilibili code Closes #1169 Authored by pukkandan, u-spec-png --- yt_dlp/extractor/bilibili.py | 224 +++++++++++++---------------------- 1 file changed, 82 insertions(+), 142 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a1be7e04b..d6c77e418 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1,16 +1,13 @@ # coding: utf-8 -from __future__ import unicode_literals import hashlib import itertools -import json import functools import re import math from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( - compat_str, compat_parse_qs, compat_urlparse, compat_urllib_parse_urlparse @@ -20,6 +17,7 @@ from ..utils import ( int_or_none, float_or_none, parse_iso8601, + traverse_obj, try_get, smuggle_url, srt_subtitles_timecode, @@ -101,7 +99,7 @@ class BiliBiliIE(InfoExtractor): 'upload_date': '20170301', }, 'params': { - 'skip_download': True, # Test metadata only + 'skip_download': True, }, }, { 'info_dict': { @@ -115,7 +113,7 @@ class BiliBiliIE(InfoExtractor): 'upload_date': '20170301', }, 'params': { - 'skip_download': True, # Test metadata only + 'skip_download': True, }, }] }, { @@ -169,7 +167,7 @@ class BiliBiliIE(InfoExtractor): if 'anime/' not in url: cid = self._search_regex( - r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + compat_str(page_id), webpage, 'cid', + r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid', default=None ) or self._search_regex( r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid', @@ -259,7 +257,7 @@ class BiliBiliIE(InfoExtractor): # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video part_title = try_get( self._download_json( - "https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id, + f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', video_id, note='Extracting videos in anthology'), lambda x: x['data'][int(page_id) - 1]['part']) title = part_title or title @@ -273,7 +271,7 @@ class BiliBiliIE(InfoExtractor): # TODO 'view_count' requires deobfuscating Javascript info = { - 'id': compat_str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id), + 'id': str(video_id) if page_id is None else '%s_part%s' % (video_id, page_id), 'cid': cid, 'title': title, 'description': description, @@ -295,29 +293,25 @@ class BiliBiliIE(InfoExtractor): info['uploader'] = self._html_search_meta( 'author', webpage, 'uploader', default=None) - raw_danmaku = self._get_raw_danmaku(video_id, cid) - - raw_tags = self._get_tags(video_id) - tags = list(map(lambda x: x['tag_name'], raw_tags)) - top_level_info = { - 'raw_danmaku': raw_danmaku, - 'tags': tags, - 'raw_tags': raw_tags, + 'tags': traverse_obj(self._download_json( + f'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}', + video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')), } - if self.get_param('getcomments', False): - def get_comments(): - comments = self._get_all_comment_pages(video_id) - return { - 'comments': comments, - 'comment_count': len(comments) - } - top_level_info['__post_extractor'] = get_comments + entries[0]['subtitles'] = { + 'danmaku': [{ + 'ext': 'xml', + 'url': f'https://comment.bilibili.com/{cid}.xml', + }] + } - ''' + r''' # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3 # See https://github.com/animelover1984/youtube-dl + + raw_danmaku = self._download_webpage( + f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments') danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576) entries[0]['subtitles'] = { 'danmaku': [{ @@ -327,29 +321,27 @@ class BiliBiliIE(InfoExtractor): } ''' + top_level_info['__post_extractor'] = self.extract_comments(video_id) + for entry in entries: entry.update(info) if len(entries) == 1: entries[0].update(top_level_info) return entries[0] - else: - for idx, entry in enumerate(entries): - entry['id'] = '%s_part%d' % (video_id, (idx + 1)) - - global_info = { - '_type': 'multi_video', - 'id': compat_str(video_id), - 'bv_id': bv_id, - 'title': title, - 'description': description, - 'entries': entries, - } - global_info.update(info) - global_info.update(top_level_info) + for idx, entry in enumerate(entries): + entry['id'] = '%s_part%d' % (video_id, (idx + 1)) - return global_info + return { + '_type': 'multi_video', + 'id': str(video_id), + 'bv_id': bv_id, + 'title': title, + 'description': description, + 'entries': entries, + **info, **top_level_info + } def _extract_anthology_entries(self, bv_id, video_id, webpage): title = self._html_search_regex( @@ -357,10 +349,10 @@ class BiliBiliIE(InfoExtractor): r'(?s)]*>(?P.+?)</h1>'), webpage, 'title', group='title') json_data = self._download_json( - "https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id, + f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', video_id, note='Extracting videos in anthology') - if len(json_data['data']) > 1: + if json_data['data']: return self.playlist_from_matches( json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(), getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page'])) @@ -375,65 +367,31 @@ class BiliBiliIE(InfoExtractor): if response['code'] == -400: raise ExtractorError('Video ID does not exist', expected=True, video_id=id) elif response['code'] != 0: - raise ExtractorError('Unknown error occurred during API check (code %s)' % response['code'], expected=True, video_id=id) - return (response['data']['aid'], response['data']['bvid']) - - # recursive solution to getting every page of comments for the video - # we can stop when we reach a page without any comments - def _get_all_comment_pages(self, video_id, commentPageNumber=0): - comment_url = "https://api.bilibili.com/x/v2/reply?jsonp=jsonp&pn=%s&type=1&oid=%s&sort=2&_=1567227301685" % (commentPageNumber, video_id) - json_str = self._download_webpage( - comment_url, video_id, - note='Extracting comments from page %s' % (commentPageNumber)) - replies = json.loads(json_str)['data']['replies'] - if replies is None: - return [] - return self._get_all_children(replies) + self._get_all_comment_pages(video_id, commentPageNumber + 1) - - # extracts all comments in the tree - def _get_all_children(self, replies): - if replies is None: - return [] - - ret = [] - for reply in replies: - author = reply['member']['uname'] - author_id = reply['member']['mid'] - id = reply['rpid'] - text = reply['content']['message'] - timestamp = reply['ctime'] - parent = reply['parent'] if reply['parent'] != 0 else 'root' - - comment = { - "author": author, - "author_id": author_id, - "id": id, - "text": text, - "timestamp": timestamp, - "parent": parent, - } - ret.append(comment) - - # from the JSON, the comment structure seems arbitrarily deep, but I could be wrong. - # Regardless, this should work. - ret += self._get_all_children(reply['replies']) - - return ret + raise ExtractorError(f'Unknown error occurred during API check (code {response["code"]})', + expected=True, video_id=id) + return response['data']['aid'], response['data']['bvid'] - def _get_raw_danmaku(self, video_id, cid): - # This will be useful if I decide to scrape all pages instead of doing them individually - # cid_url = "https://www.bilibili.com/widget/getPageList?aid=%s" % (video_id) - # cid_str = self._download_webpage(cid_url, video_id, note=False) - # cid = json.loads(cid_str)[0]['cid'] - - danmaku_url = "https://comment.bilibili.com/%s.xml" % (cid) - danmaku = self._download_webpage(danmaku_url, video_id, note='Downloading danmaku comments') - return danmaku - - def _get_tags(self, video_id): - tags_url = "https://api.bilibili.com/x/tag/archive/tags?aid=%s" % (video_id) - tags_json = self._download_json(tags_url, video_id, note='Downloading tags') - return tags_json['data'] + def _get_comments(self, video_id, commentPageNumber=0): + for idx in itertools.count(1): + replies = traverse_obj( + self._download_json( + f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685', + video_id, note=f'Extracting comments from page {idx}'), + ('data', 'replies')) or [] + for children in map(self._get_all_children, replies): + yield from children + + def _get_all_children(self, reply): + yield { + 'author': traverse_obj(reply, ('member', 'uname')), + 'author_id': traverse_obj(reply, ('member', 'mid')), + 'id': reply.get('rpid'), + 'text': traverse_obj(reply, ('content', 'message')), + 'timestamp': reply.get('ctime'), + 'parent': reply.get('parent') or 'root', + } + for children in map(self._get_all_children, reply.get('replies') or []): + yield from children class BiliBiliBangumiIE(InfoExtractor): @@ -516,11 +474,8 @@ class BilibiliChannelIE(InfoExtractor): count, max_count = 0, None for page_num in itertools.count(1): - data = self._parse_json( - self._download_webpage( - self._API_URL % (list_id, page_num), list_id, - note='Downloading page %d' % page_num), - list_id)['data'] + data = self._download_json( + self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data'] max_count = max_count or try_get(data, lambda x: x['page']['count']) @@ -583,11 +538,11 @@ class BilibiliCategoryIE(InfoExtractor): } if category not in rid_map: - raise ExtractorError('The supplied category, %s, is not supported. List of supported categories: %s' % (category, list(rid_map.keys()))) - + raise ExtractorError( + f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}') if subcategory not in rid_map[category]: - raise ExtractorError('The subcategory, %s, isn\'t supported for this category. Supported subcategories: %s' % (subcategory, list(rid_map[category].keys()))) - + raise ExtractorError( + f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}') rid_value = rid_map[category][subcategory] api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value @@ -614,41 +569,26 @@ class BiliBiliSearchIE(SearchInfoExtractor): IE_DESC = 'Bilibili video search, "bilisearch" keyword' _MAX_RESULTS = 100000 _SEARCH_KEY = 'bilisearch' - MAX_NUMBER_OF_RESULTS = 1000 - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - - entries = [] - pageNumber = 0 - while True: - pageNumber += 1 - # FIXME - api_url = 'https://api.bilibili.com/x/web-interface/search/type?context=&page=%s&order=pubdate&keyword=%s&duration=0&tids_2=&__refresh__=true&search_type=video&tids=0&highlight=1' % (pageNumber, query) - json_str = self._download_webpage( - api_url, "None", query={"Search_key": query}, - note='Extracting results from page %s' % pageNumber) - data = json.loads(json_str)['data'] - - # FIXME: this is hideous - if "result" not in data: - return { - '_type': 'playlist', - 'id': query, - 'entries': entries[:n] - } - - videos = data['result'] + def _search_results(self, query): + for page_num in itertools.count(1): + videos = self._download_json( + 'https://api.bilibili.com/x/web-interface/search/type', query, + note=f'Extracting results from page {page_num}', query={ + 'Search_key': query, + 'keyword': query, + 'page': page_num, + 'context': '', + 'order': 'pubdate', + 'duration': 0, + 'tids_2': '', + '__refresh__': 'true', + 'search_type': 'video', + 'tids': 0, + 'highlight': 1, + })['data'].get('result') or [] for video in videos: - e = self.url_result(video['arcurl'], 'BiliBili', compat_str(video['aid'])) - entries.append(e) - - if(len(entries) >= n or len(videos) >= BiliBiliSearchIE.MAX_NUMBER_OF_RESULTS): - return { - '_type': 'playlist', - 'id': query, - 'entries': entries[:n] - } + yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid'])) class BilibiliAudioBaseIE(InfoExtractor): From 7b38649845c1516e4ab4e29b6bb84b2302269663 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 11 Oct 2021 20:21:04 +0530 Subject: [PATCH 0055/2552] Fix verbose head not showing custom configs --- yt_dlp/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index f45c548f2..d2dc7687b 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1590,7 +1590,7 @@ def parseOpts(overrideArguments=None): parser.error('config-location %s does not exist.' % location) config = _readOptions(location, default=None) if config: - configs['custom'], paths['config'] = config, location + configs['custom'], paths['custom'] = config, location if opts.ignoreconfig: return From ecdc9049c0d8c00ad9ea5218126eefb1e7049385 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 12 Oct 2021 12:03:56 +0530 Subject: [PATCH 0056/2552] [YouTube] Add auto-translated subtitles Closes #1245 --- yt_dlp/extractor/youtube.py | 49 ++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 3e93c9934..1ef80445e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2964,15 +2964,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict) - # Converted into dicts to remove duplicates - captions = { - sub.get('baseUrl'): sub - for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])} - translation_languages = { - lang.get('languageCode'): lang.get('languageName') - for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])} - subtitles = {} if pctr: + def get_lang_code(track): + return (remove_start(track.get('vssId') or '', '.').replace('.', '-') + or track.get('languageCode')) + + # Converted into dicts to remove duplicates + captions = { + get_lang_code(sub): sub + for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])} + translation_languages = { + lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1) + for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])} + def process_language(container, base_url, lang_code, sub_name, query): lang_subs = container.setdefault(lang_code, []) for fmt in self._SUBTITLE_FORMATS: @@ -2985,30 +2989,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'name': sub_name, }) - for base_url, caption_track in captions.items(): + subtitles, automatic_captions = {}, {} + for lang_code, caption_track in captions.items(): + base_url = caption_track.get('baseUrl') if not base_url: continue + lang_name = self._get_text(caption_track, 'name', max_runs=1) if caption_track.get('kind') != 'asr': - lang_code = ( - remove_start(caption_track.get('vssId') or '', '.').replace('.', '-') - or caption_track.get('languageCode')) if not lang_code: continue process_language( - subtitles, base_url, lang_code, - traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False), - {}) - continue - automatic_captions = {} + subtitles, base_url, lang_code, lang_name, {}) + if not caption_track.get('isTranslatable'): + continue for trans_code, trans_name in translation_languages.items(): if not trans_code: continue + if caption_track.get('kind') != 'asr': + trans_code += f'-{lang_code}' + trans_name += format_field(lang_name, template=' from %s') process_language( - automatic_captions, base_url, trans_code, - self._get_text(trans_name, max_runs=1), - {'tlang': trans_code}) - info['automatic_captions'] = automatic_captions - info['subtitles'] = subtitles + automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code}) + info['automatic_captions'] = automatic_captions + info['subtitles'] = subtitles parsed_url = compat_urllib_parse_urlparse(url) for component in [parsed_url.fragment, parsed_url.query]: @@ -3054,7 +3057,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: # This will error if there is no livechat initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] - info['subtitles']['live_chat'] = [{ + info.setdefault('subtitles', {})['live_chat'] = [{ 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies 'video_id': video_id, 'ext': 'json', From a387b69a7cb55afb160d8f59df2593cb337a9db7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81kos=20S=C3=BClyi?= <sulyi.gbox@gmail.com> Date: Tue, 12 Oct 2021 20:54:27 +0200 Subject: [PATCH 0057/2552] [devscripts/run_tests] Use markers to filter tests (#1258) `-k` filters using a substring match on test name. `-m` checks markers for an exact match. Authored by: sulyi --- devscripts/run_tests.bat | 6 +++--- devscripts/run_tests.sh | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat index f12ae1c1b..b8bb393d9 100644 --- a/devscripts/run_tests.bat +++ b/devscripts/run_tests.bat @@ -3,11 +3,11 @@ cd /d %~dp0.. if ["%~1"]==[""] ( - set "test_set=" + set "test_set="test"" ) else if ["%~1"]==["core"] ( - set "test_set=-k "not download"" + set "test_set="-m not download"" ) else if ["%~1"]==["download"] ( - set "test_set=-k download" + set "test_set="-m "download"" ) else ( echo.Invalid test type "%~1". Use "core" ^| "download" exit /b 1 diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index fb405b569..c9a75ba00 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -3,12 +3,12 @@ if [ -z $1 ]; then test_set='test' elif [ $1 = 'core' ]; then - test_set='not download' + test_set="-m not download" elif [ $1 = 'download' ]; then - test_set='download' + test_set="-m download" else echo 'Invalid test type "'$1'". Use "core" | "download"' exit 1 fi -python3 -m pytest -k "$test_set" +python3 -m pytest "$test_set" From 975a0d0df98a68d936c86a77175f2b0e86b576f5 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 12 Oct 2021 16:47:18 +0530 Subject: [PATCH 0058/2552] Calculate more fields for merged formats Closes #947 --- yt_dlp/YoutubeDL.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8878d710f..d9b3ce98d 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1848,11 +1848,18 @@ class YoutubeDL(object): else: output_ext = 'mkv' + filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) + new_dict = { 'requested_formats': formats_info, - 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info), - 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info), + 'format': '+'.join(filtered('format')), + 'format_id': '+'.join(filtered('format_id')), 'ext': output_ext, + 'protocol': '+'.join(map(determine_protocol, formats_info)), + 'language': '+'.join(orderedSet(filtered('language'))), + 'format_note': '+'.join(orderedSet(filtered('format_note'))), + 'filesize_approx': sum(filtered('filesize', 'filesize_approx')), + 'tbr': sum(filtered('tbr', 'vbr', 'abr')), } if the_only_video: @@ -1870,6 +1877,7 @@ class YoutubeDL(object): new_dict.update({ 'acodec': the_only_audio.get('acodec'), 'abr': the_only_audio.get('abr'), + 'asr': the_only_audio.get('asr'), }) return new_dict From c111cefa5de2337fc677367ee2d727b8a56e3fd0 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 12 Oct 2021 16:50:04 +0530 Subject: [PATCH 0059/2552] [downloader/ffmpeg] Improve simultaneous download and merge --- README.md | 2 ++ yt_dlp/YoutubeDL.py | 9 ++------- yt_dlp/downloader/__init__.py | 15 ++++++++++----- yt_dlp/downloader/external.py | 4 ++++ 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 1a46b25f4..dd9cbc7fc 100644 --- a/README.md +++ b/README.md @@ -1179,6 +1179,8 @@ $ yt-dlp -o - BaW_jenozKc By default, yt-dlp tries to download the best available quality if you **don't** pass any options. This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`. +**Deprecation warning**: Latest versions of yt-dlp can stream multiple formats to the stdout simultaneously using ffmpeg. So, in future versions, the default for this will be set to `-f bv*+ba/b` similar to normal downloads. If you want to preserve the `-f b/bv+ba` setting, it is recommended to explicitly specify it in the configuration options. + The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. **tl;dr:** [navigate me to examples](#format-selection-examples). diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d9b3ce98d..1afe17639 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2744,14 +2744,9 @@ class YoutubeDL(object): dl_filename = existing_file(full_filename, temp_filename) info_dict['__real_download'] = False - _protocols = set(determine_protocol(f) for f in requested_formats) - if len(_protocols) == 1: # All requested formats have same protocol - info_dict['protocol'] = _protocols.pop() - directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params) if dl_filename is not None: self.report_file_already_downloaded(dl_filename) - elif (directly_mergable and get_suitable_downloader( - info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD): + elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'): info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download @@ -2769,7 +2764,7 @@ class YoutubeDL(object): 'The formats won\'t be merged.') if temp_filename == '-': - reason = ('using a downloader other than ffmpeg' if directly_mergable + reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict) else 'but the formats are incompatible for simultaneous download' if merger.available else 'but ffmpeg is not installed') self.report_warning( diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 739d98c2b..2449c7411 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -10,10 +10,15 @@ from ..utils import ( def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): info_dict['protocol'] = determine_protocol(info_dict) info_copy = info_dict.copy() - if protocol: - info_copy['protocol'] = protocol info_copy['to_stdout'] = to_stdout - return _get_suitable_downloader(info_copy, params, default) + + downloaders = [_get_suitable_downloader(info_copy, proto, params, default) + for proto in (protocol or info_copy['protocol']).split('+')] + if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): + return FFmpegFD + elif len(downloaders) == 1: + return downloaders[0] + return None # Some of these require get_suitable_downloader @@ -72,7 +77,7 @@ def shorten_protocol_name(proto, simplify=False): return short_protocol_names.get(proto, proto) -def _get_suitable_downloader(info_dict, params, default): +def _get_suitable_downloader(info_dict, protocol, params, default): """Get the downloader class that can handle the info dict.""" if default is NO_DEFAULT: default = HttpFD @@ -80,7 +85,7 @@ def _get_suitable_downloader(info_dict, params, default): # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): # return FFmpegFD - protocol = info_dict['protocol'] + info_dict['protocol'] = protocol downloaders = params.get('external_downloader') external_downloader = ( downloaders if isinstance(downloaders, compat_str) or downloaders is None diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 9c1229cf6..3c0202f22 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -327,6 +327,10 @@ class FFmpegFD(ExternalFD): # Fixme: This may be wrong when --ffmpeg-location is used return FFmpegPostProcessor().available + @classmethod + def supports(cls, info_dict): + return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')) + def on_process_started(self, proc, stdin): """ Override this in subclasses """ pass From b836dc94f2ba0d9953f61ba6bcec2a4ced504beb Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 12 Oct 2021 17:34:24 +0530 Subject: [PATCH 0060/2552] [outtmpl] Fix bug in expanding environment variables --- test/test_YoutubeDL.py | 6 ++++++ yt_dlp/YoutubeDL.py | 8 +++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 06963f7a8..bd2d752e2 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -817,6 +817,12 @@ class TestYoutubeDL(unittest.TestCase): compat_setenv('__yt_dlp_var', 'expanded') envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' test(envvar, (envvar, 'expanded')) + if compat_os_name == 'nt': + test('%s%', ('%s%', '%s%')) + compat_setenv('s', 'expanded') + test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s + compat_setenv('(test)s', 'expanded') + test('%(test)s%', ('NA%', 'expanded')) # Environment should take priority over template # Path expansion and escaping test('Hello %(title1)s', 'Hello $PATH') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1afe17639..9d91d72ec 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1034,7 +1034,7 @@ class YoutubeDL(object): def create_key(outer_mobj): if not outer_mobj.group('has_key'): - return f'%{outer_mobj.group(0)}' + return outer_mobj.group(0) key = outer_mobj.group('key') mobj = re.match(INTERNAL_FORMAT_RE, key) initial_field = mobj.group('fields').split('.')[-1] if mobj else '' @@ -1105,10 +1105,8 @@ class YoutubeDL(object): compat_str(v), restricted=self.params.get('restrictfilenames'), is_id=(k == 'id' or k.endswith('_id'))) - outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']) - outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize) - outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl)) - filename = outtmpl % template_dict + outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])) + filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize) force_ext = OUTTMPL_TYPES.get(tmpl_type) if filename and force_ext is not None: From fc5c8b6492d0c269191a32d7836b8a94416b804e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 13 Oct 2021 04:11:25 +0530 Subject: [PATCH 0061/2552] [eria2c] Fix --skip-unavailable fragment --- yt_dlp/downloader/external.py | 89 ++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 3c0202f22..40b9dcfe3 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -115,55 +115,56 @@ class ExternalFD(FragmentFD): self._debug_cmd(cmd) - if 'fragments' in info_dict: - fragment_retries = self.params.get('fragment_retries', 0) - skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - - count = 0 - while count <= fragment_retries: - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = process_communicate_or_kill(p) - if p.returncode == 0: - break - # TODO: Decide whether to retry based on error code - # https://aria2.github.io/manual/en/html/aria2c.html#exit-status - self.to_stderr(stderr.decode('utf-8', 'replace')) - count += 1 - if count <= fragment_retries: - self.to_screen( - '[%s] Got error. Retrying fragments (attempt %d of %s)...' - % (self.get_basename(), count, self.format_retries(fragment_retries))) - if count > fragment_retries: - if not skip_unavailable_fragments: - self.report_error('Giving up after %s fragment retries' % fragment_retries) - return -1 - - decrypt_fragment = self.decrypter(info_dict) - dest, _ = sanitize_open(tmpfilename, 'wb') - for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) - try: - src, _ = sanitize_open(fragment_filename, 'rb') - except IOError: - if skip_unavailable_fragments and frag_index > 1: - self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) - continue - self.report_error('Unable to open fragment %d' % frag_index) - return -1 - dest.write(decrypt_fragment(fragment, src.read())) - src.close() - if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(fragment_filename)) - dest.close() - os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) - else: + if 'fragments' not in info_dict: p = subprocess.Popen( cmd, stderr=subprocess.PIPE) _, stderr = process_communicate_or_kill(p) if p.returncode != 0: self.to_stderr(stderr.decode('utf-8', 'replace')) - return p.returncode + return p.returncode + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + count = 0 + while count <= fragment_retries: + p = subprocess.Popen( + cmd, stderr=subprocess.PIPE) + _, stderr = process_communicate_or_kill(p) + if p.returncode == 0: + break + # TODO: Decide whether to retry based on error code + # https://aria2.github.io/manual/en/html/aria2c.html#exit-status + self.to_stderr(stderr.decode('utf-8', 'replace')) + count += 1 + if count <= fragment_retries: + self.to_screen( + '[%s] Got error. Retrying fragments (attempt %d of %s)...' + % (self.get_basename(), count, self.format_retries(fragment_retries))) + if count > fragment_retries: + if not skip_unavailable_fragments: + self.report_error('Giving up after %s fragment retries' % fragment_retries) + return -1 + + decrypt_fragment = self.decrypter(info_dict) + dest, _ = sanitize_open(tmpfilename, 'wb') + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + try: + src, _ = sanitize_open(fragment_filename, 'rb') + except IOError: + if skip_unavailable_fragments and frag_index > 1: + self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) + continue + self.report_error('Unable to open fragment %d' % frag_index) + return -1 + dest.write(decrypt_fragment(fragment, src.read())) + src.close() + if not self.params.get('keep_fragments', False): + os.remove(encodeFilename(fragment_filename)) + dest.close() + os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) + return 0 class CurlFD(ExternalFD): From 993191c0d5f711d4978c680d705ce09d957aa176 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 13 Oct 2021 04:42:31 +0530 Subject: [PATCH 0062/2552] Fix bug in c111cefa5de2337fc677367ee2d727b8a56e3fd0 --- yt_dlp/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9d91d72ec..2a8c658eb 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2856,8 +2856,8 @@ class YoutubeDL(object): 'writing DASH m4a. Only some players support this container', FFmpegFixupM4aPP) - downloader = (get_suitable_downloader(info_dict, self.params).__name__ - if 'protocol' in info_dict else None) + downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None + downloader = downloader.__name__ if downloader else None ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP) ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP) From 6993f78d1bbb62b24dd77ac7fce3ead250fbe01f Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 13 Oct 2021 05:03:40 +0530 Subject: [PATCH 0063/2552] [extractor,utils] Detect more codecs/mimetypes Fixes: https://github.com/ytdl-org/youtube-dl/issues/29943 --- yt_dlp/extractor/common.py | 2 ++ yt_dlp/utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5b7b8891a..14201c538 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2646,6 +2646,8 @@ class InfoExtractor(object): content_type = mime_type elif codecs.split('.')[0] == 'stpp': content_type = 'text' + elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): + content_type = 'text' else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) continue diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 8e5c08ce5..7a40258cf 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4621,7 +4621,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in split_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): From a64907d0ac89102c9380361e385fc67167595661 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 14 Oct 2021 14:44:14 +0530 Subject: [PATCH 0064/2552] [Hotstar] Mention Dynamic Range in format id (#1265) Authored by: Ashish0804 --- yt_dlp/extractor/hotstar.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index f66d3e433..af679b906 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -70,7 +70,7 @@ class HotStarBaseIE(InfoExtractor): def _call_api_v2(self, path, video_id, st=None, cookies=None): return self._call_api_impl( '%s/content/%s' % (path, video_id), video_id, st=st, cookies=cookies, query={ - 'desired-config': 'audio_channel:stereo|dynamic_range:sdr|encryption:plain|ladder:tv|package:dash|resolution:hd|subs-tag:HotstarVIP|video_codec:vp9', + 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265', 'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()), 'os-name': 'Windows', 'os-version': '10', @@ -196,6 +196,7 @@ class HotStarIE(HotStarBaseIE): for playback_set in playback_sets: if not isinstance(playback_set, dict): continue + dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr') format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: continue @@ -210,12 +211,12 @@ class HotStarIE(HotStarBaseIE): hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', headers=headers) + m3u8_id=f'{dr}-hls', headers=headers) formats.extend(hls_formats) subs = self._merge_subtitles(subs, hls_subs) elif 'package:dash' in tags or ext == 'mpd': dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles( - format_url, video_id, mpd_id='dash', headers=headers) + format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) formats.extend(dash_formats) subs = self._merge_subtitles(subs, dash_subs) elif ext == 'f4m': From d5a39f0badbf6155eeed5c03d14489227fc9dab2 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 14 Oct 2021 14:40:37 +0530 Subject: [PATCH 0065/2552] [http] Show the last encountered error Closes #1262 --- yt_dlp/downloader/http.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 3bc41e5b2..2e95bb9d1 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -373,6 +373,8 @@ class HttpFD(FileDownloader): count += 1 if count <= retries: self.report_retry(e.source_error, count, retries) + else: + self.to_screen(f'[download] Got server HTTP error: {e.source_error}') continue except NextFragment: continue From a0c716bb618e525b3fbafd4ba19a8ea345db7afc Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 14 Oct 2021 14:35:10 +0530 Subject: [PATCH 0066/2552] [instagram] Show appropriate error when login is needed Closes #1264 --- yt_dlp/extractor/common.py | 4 ++-- yt_dlp/extractor/instagram.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 14201c538..4f358c53b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -441,8 +441,8 @@ class InfoExtractor(object): _LOGIN_HINTS = { 'any': 'Use --cookies, --username and --password or --netrc to provide account credentials', 'cookies': ( - 'Use --cookies for the authentication. ' - 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to pass cookies'), + 'Use --cookies-from-browser or --cookies for the authentication. ' + 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), 'password': 'Use --username and --password or --netrc to provide account credentials', } diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 9aad804cf..3801c7af9 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -145,7 +145,9 @@ class InstagramIE(InfoExtractor): video_id = mobj.group('id') url = mobj.group('url') - webpage = self._download_webpage(url, video_id) + webpage, urlh = self._download_webpage_handle(url, video_id) + if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'): + self.raise_login_required('You need to log in to access this content', method='cookies') (media, video_url, description, thumbnail, timestamp, uploader, uploader_id, like_count, comment_count, comments, height, From 883d4b1eecca98f069e3a75fb7667a2750d4a106 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 14 Oct 2021 09:58:29 +0530 Subject: [PATCH 0067/2552] [YoutubeDL] Write verbose header to logger --- yt_dlp/YoutubeDL.py | 48 ++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2a8c658eb..542a97794 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -495,7 +495,10 @@ class YoutubeDL(object): _screen_file = None def __init__(self, params=None, auto_init=True): - """Create a FileDownloader object with the given options.""" + """Create a FileDownloader object with the given options. + @param auto_init Whether to load the default extractors and print header (if verbose). + Set to 'no_verbose_header' to not ptint the header + """ if params is None: params = {} self._ies = {} @@ -602,7 +605,8 @@ class YoutubeDL(object): self._setup_opener() if auto_init: - self.print_debug_header() + if auto_init != 'no_verbose_header': + self.print_debug_header() self.add_default_info_extractors() for pp_def_raw in self.params.get('postprocessors', []): @@ -3232,28 +3236,32 @@ class YoutubeDL(object): def print_debug_header(self): if not self.params.get('verbose'): return - - stdout_encoding = getattr( - sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__) + get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) encoding_str = ( - '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( + '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), - stdout_encoding, + get_encoding(self._screen_file), get_encoding(self._err_file), self.get_encoding())) - write_string(encoding_str, encoding=None) + + logger = self.params.get('logger') + if logger: + write_debug = lambda msg: logger.debug(f'[debug] {msg}') + write_debug(encoding_str) + else: + write_debug = lambda msg: self._write_string(f'[debug] {msg}') + write_string(encoding_str, encoding=None) source = detect_variant() - self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) + write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) if _LAZY_LOADER: - self._write_string('[debug] Lazy loading extractors enabled\n') + write_debug('Lazy loading extractors enabled\n') if plugin_extractors or plugin_postprocessors: - self._write_string('[debug] Plugins: %s\n' % [ + write_debug('Plugins: %s\n' % [ '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params.get('compat_opts'): - self._write_string( - '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) + write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -3262,7 +3270,7 @@ class YoutubeDL(object): out, err = process_communicate_or_kill(sp) out = out.decode().strip() if re.match('[0-9a-f]+', out): - self._write_string('[debug] Git HEAD: %s\n' % out) + write_debug('Git HEAD: %s\n' % out) except Exception: try: sys.exc_clear() @@ -3275,7 +3283,7 @@ class YoutubeDL(object): return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] return impl_name - self._write_string('[debug] Python version %s (%s %s) - %s\n' % ( + write_debug('Python version %s (%s %s) - %s\n' % ( platform.python_version(), python_implementation(), platform.architecture()[0], @@ -3287,7 +3295,7 @@ class YoutubeDL(object): exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' - self._write_string('[debug] exe versions: %s\n' % exe_str) + write_debug('exe versions: %s\n' % exe_str) from .downloader.websocket import has_websockets from .postprocessor.embedthumbnail import has_mutagen @@ -3300,8 +3308,8 @@ class YoutubeDL(object): SQLITE_AVAILABLE and 'sqlite', KEYRING_AVAILABLE and 'keyring', )))) or 'none' - self._write_string('[debug] Optional libraries: %s\n' % lib_str) - self._write_string('[debug] ANSI escape support: stdout = %s, stderr = %s\n' % ( + write_debug('Optional libraries: %s\n' % lib_str) + write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % ( supports_terminal_sequences(self._screen_file), supports_terminal_sequences(self._err_file))) @@ -3309,11 +3317,11 @@ class YoutubeDL(object): for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) - self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') + write_debug('Proxy map: ' + compat_str(proxy_map) + '\n') if self.params.get('call_home', False): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') - self._write_string('[debug] Public IP address: %s\n' % ipaddr) + write_debug('Public IP address: %s\n' % ipaddr) return latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode('utf-8') From 974208e15105b6bd467f1ab59ba7173ac3d0ede5 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 14 Oct 2021 17:32:48 +0530 Subject: [PATCH 0068/2552] [trovo] Support channel clips and VODs (#1246) Closes #229 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 2 + yt_dlp/extractor/trovo.py | 67 ++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index adf54ca7e..6bc9a2b1e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1470,6 +1470,8 @@ from .trilulilu import TriluliluIE from .trovo import ( TrovoIE, TrovoVodIE, + TrovoChannelVodIE, + TrovoChannelClipIE, ) from .trunews import TruNewsIE from .trutv import TruTVIE diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 7d6b2b88e..ec55f41f2 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import json from .common import InfoExtractor @@ -194,3 +195,69 @@ class TrovoVodIE(TrovoBaseIE): } info.update(self._extract_streamer_info(vod_detail_info)) return info + + +class TrovoChannelBaseIE(InfoExtractor): + def _get_vod_json(self, page, uid): + raise NotImplementedError('This method must be implemented by subclasses') + + def _entries(self, uid): + for page in itertools.count(1): + vod_json = self._get_vod_json(page, uid) + vods = vod_json.get('vodInfos', []) + for vod in vods: + yield self.url_result( + 'https://trovo.live/%s/%s' % (self._TYPE, vod.get('vid')), + ie=TrovoVodIE.ie_key()) + has_more = vod_json['hasMore'] + if not has_more: + break + + def _real_extract(self, url): + id = self._match_id(url) + uid = str(self._download_json('https://gql.trovo.live/', id, query={ + 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id + })['data']['getLiveInfo']['streamerInfo']['uid']) + return self.playlist_result(self._entries(uid), playlist_id=uid) + + +class TrovoChannelVodIE(TrovoChannelBaseIE): + _VALID_URL = r'trovovod:(?P<id>[^\s]+)' + IE_DESC = 'All VODs of a trovo.live channel, "trovovod" keyword' + + _TESTS = [{ + 'url': 'trovovod:OneTappedYou', + 'playlist_mincount': 24, + 'info_dict': { + 'id': '100719456', + }, + }] + + _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}' + _TYPE = 'video' + + def _get_vod_json(self, page, uid): + return self._download_json('https://gql.trovo.live/', uid, query={ + 'query': self._QUERY % (page, uid) + })['data']['getChannelLtvVideoInfos'] + + +class TrovoChannelClipIE(TrovoChannelBaseIE): + _VALID_URL = r'trovoclip:(?P<id>[^\s]+)' + IE_DESC = 'All Clips of a trovo.live channel, "trovoclip" keyword' + + _TESTS = [{ + 'url': 'trovoclip:OneTappedYou', + 'playlist_mincount': 29, + 'info_dict': { + 'id': '100719456', + }, + }] + + _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}' + _TYPE = 'clip' + + def _get_vod_json(self, page, uid): + return self._download_json('https://gql.trovo.live/', uid, query={ + 'query': self._QUERY % (page, uid) + })['data']['getChannelClipVideoInfos'] From e3950399e4d471b987a2d693f8a6a476568e7c8a Mon Sep 17 00:00:00 2001 From: gustaf <86112802+18928172992817182@users.noreply.github.com> Date: Thu, 14 Oct 2021 14:04:40 +0200 Subject: [PATCH 0069/2552] [Viafree] add support for Finland (#1253) Authored by: 18928172992817182 (gustaf) --- yt_dlp/extractor/tvplay.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index 9b6d17f61..fbafb41f8 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -336,8 +336,8 @@ class ViafreeIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?:www\.)? - viafree\.(?P<country>dk|no|se) - /(?P<id>program(?:mer)?/(?:[^/]+/)+[^/?#&]+) + viafree\.(?P<country>dk|no|se|fi) + /(?P<id>(?:program(?:mer)?|ohjelmat)?/(?:[^/]+/)+[^/?#&]+) ''' _TESTS = [{ 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', @@ -389,6 +389,9 @@ class ViafreeIE(InfoExtractor): }, { 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869', 'only_matching': True, + }, { + 'url': 'https://www.viafree.fi/ohjelmat/entertainment/amazing-makeovers/kausi-7/jakso-2', + 'only_matching': True, }] _GEO_BYPASS = False From 6ff34542d2ddfe3369f7e1b321891f155690ae80 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <Ashish08@protonmail.com> Date: Sat, 16 Oct 2021 13:21:59 +0530 Subject: [PATCH 0070/2552] [Hotstar] Raise appropriate error for DRM --- yt_dlp/extractor/hotstar.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index af679b906..12e6c53d4 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -203,35 +203,35 @@ class HotStarIE(HotStarBaseIE): format_url = re.sub( r'(?<=//staragvod)(\d)', r'web\1', format_url) tags = str_or_none(playback_set.get('tagsCombination')) or '' - if tags and 'encryption:plain' not in tags: - continue ext = determine_ext(format_url) + current_formats, current_subs = [], {} try: if 'package:hls' in tags or ext == 'm3u8': - hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles( + current_formats, current_subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=f'{dr}-hls', headers=headers) - formats.extend(hls_formats) - subs = self._merge_subtitles(subs, hls_subs) elif 'package:dash' in tags or ext == 'mpd': - dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles( + current_formats, current_subs = self._extract_mpd_formats_and_subtitles( format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) - formats.extend(dash_formats) - subs = self._merge_subtitles(subs, dash_subs) elif ext == 'f4m': # produce broken files pass else: - formats.append({ + current_formats = [{ 'url': format_url, 'width': int_or_none(playback_set.get('width')), 'height': int_or_none(playback_set.get('height')), - }) + }] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: geo_restricted = True continue + if tags and 'encryption:plain' not in tags: + for f in current_formats: + f['has_drm'] = True + formats.extend(current_formats) + subs = self._merge_subtitles(subs, current_subs) if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) self._sort_formats(formats) From 48ee10ee8adcf61e1136a252462670ec230e9439 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 15 Oct 2021 18:50:28 +0530 Subject: [PATCH 0071/2552] Fix conflict b/w id and ext in format selection Closes #1282 --- yt_dlp/YoutubeDL.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 542a97794..aff7d6ddb 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -483,6 +483,12 @@ class YoutubeDL(object): 'track_number', 'disc_number', 'release_year', )) + _format_selection_exts = { + 'audio': {'m4a', 'mp3', 'ogg', 'aac'}, + 'video': {'mp4', 'flv', 'webm', '3gp'}, + 'storyboards': {'mhtml'}, + } + params = None _ies = {} _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} @@ -1980,11 +1986,11 @@ class YoutubeDL(object): filter_f = lambda f: _filter_f(f) and ( f.get('vcodec') != 'none' or f.get('acodec') != 'none') else: - if format_spec in ('m4a', 'mp3', 'ogg', 'aac'): # audio extension + if format_spec in self._format_selection_exts['audio']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' - elif format_spec in ('mp4', 'flv', 'webm', '3gp'): # video extension + elif format_spec in self._format_selection_exts['video']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' - elif format_spec in ('mhtml', ): # storyboards extension + elif format_spec in self._format_selection_exts['storyboards']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' else: filter_f = lambda f: f.get('format_id') == format_spec # id @@ -2259,10 +2265,18 @@ class YoutubeDL(object): formats_dict[format_id].append(format) # Make sure all formats have unique format_id + common_exts = set(ext for exts in self._format_selection_exts.values() for ext in exts) for format_id, ambiguous_formats in formats_dict.items(): - if len(ambiguous_formats) > 1: - for i, format in enumerate(ambiguous_formats): + ambigious_id = len(ambiguous_formats) > 1 + for i, format in enumerate(ambiguous_formats): + if ambigious_id: format['format_id'] = '%s-%d' % (format_id, i) + if format.get('ext') is None: + format['ext'] = determine_ext(format['url']).lower() + # Ensure there is no conflict between id and ext in format selection + # See https://github.com/yt-dlp/yt-dlp/issues/1282 + if format['format_id'] != format['ext'] and format['format_id'] in common_exts: + format['format_id'] = 'f%s' % format['format_id'] for i, format in enumerate(formats): if format.get('format') is None: @@ -2271,9 +2285,6 @@ class YoutubeDL(object): res=self.format_resolution(format), note=format_field(format, 'format_note', ' (%s)'), ) - # Automatically determine file extension if missing - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() # Automatically determine protocol if missing (useful for format # selection purposes) if format.get('protocol') is None: From 03b4de722a6cf86dbcc6d17a63145ec59a573bf6 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 16 Oct 2021 18:31:00 +0530 Subject: [PATCH 0072/2552] [downloader] Fix slow progress hooks Closes #1301 --- yt_dlp/YoutubeDL.py | 16 +++++++++++----- yt_dlp/downloader/common.py | 5 +---- yt_dlp/downloader/dash.py | 5 ++--- yt_dlp/downloader/hls.py | 5 ++--- yt_dlp/postprocessor/common.py | 13 +++++++------ 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index aff7d6ddb..fd8ad0f98 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -950,13 +950,18 @@ class YoutubeDL(object): except ValueError as err: return err + @staticmethod + def _copy_infodict(info_dict): + info_dict = dict(info_dict) + for key in ('__original_infodict', '__postprocessors'): + info_dict.pop(key, None) + return info_dict + def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set - info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) + info_dict = self._copy_infodict(info_dict) info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None @@ -2265,7 +2270,7 @@ class YoutubeDL(object): formats_dict[format_id].append(format) # Make sure all formats have unique format_id - common_exts = set(ext for exts in self._format_selection_exts.values() for ext in exts) + common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): ambigious_id = len(ambiguous_formats) > 1 for i, format in enumerate(ambiguous_formats): @@ -2523,7 +2528,8 @@ class YoutubeDL(object): fd.add_progress_hook(ph) urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']]) self.write_debug('Invoking downloader on "%s"' % urls) - new_info = dict(info) + + new_info = copy.deepcopy(self._copy_infodict(info)) if new_info.get('http_headers') is None: new_info['http_headers'] = self._calc_headers(new_info) return fd.download(name, new_info, subtitle) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 89cdffd24..96b78a968 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -405,13 +405,10 @@ class FileDownloader(object): def _hook_progress(self, status, info_dict): if not self._progress_hooks: return - info_dict = dict(info_dict) - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) + status['info_dict'] = info_dict # youtube-dl passes the same status object to all the hooks. # Some third party scripts seems to be relying on this. # So keep this behavior if possible - status['info_dict'] = copy.deepcopy(info_dict) for ph in self._progress_hooks: ph(status) diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index 734eab3ef..6444ad692 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -55,9 +55,8 @@ class DashSegmentsFD(FragmentFD): if real_downloader: self.to_screen( '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) - info_copy = info_dict.copy() - info_copy['fragments'] = fragments_to_download + info_dict['fragments'] = fragments_to_download fd = real_downloader(self.ydl, self.params) - return fd.real_download(filename, info_copy) + return fd.real_download(filename, info_dict) return self.download_and_append_fragments(ctx, fragments_to_download, info_dict) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 3c5a2617d..61312c5ba 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -245,13 +245,12 @@ class HlsFD(FragmentFD): fragments = [fragments[0] if fragments else None] if real_downloader: - info_copy = info_dict.copy() - info_copy['fragments'] = fragments + info_dict['fragments'] = fragments fd = real_downloader(self.ydl, self.params) # TODO: Make progress updates work without hooking twice # for ph in self._progress_hooks: # fd.add_progress_hook(ph) - return fd.real_download(filename, info_copy) + return fd.real_download(filename, info_dict) if is_webvtt: def pack_fragment(frag_content, frag_index): diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index d2daeb0fb..b36716743 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -17,11 +17,12 @@ class PostProcessorMetaClass(type): def run_wrapper(func): @functools.wraps(func) def run(self, info, *args, **kwargs): - self._hook_progress({'status': 'started'}, info) + info_copy = copy.deepcopy(self._copy_infodict(info)) + self._hook_progress({'status': 'started'}, info_copy) ret = func(self, info, *args, **kwargs) if ret is not None: _, info = ret - self._hook_progress({'status': 'finished'}, info) + self._hook_progress({'status': 'finished'}, info_copy) return ret return run @@ -93,6 +94,9 @@ class PostProcessor(metaclass=PostProcessorMetaClass): for ph in getattr(downloader, '_postprocessor_hooks', []): self.add_progress_hook(ph) + def _copy_infodict(self, info_dict): + return getattr(self._downloader, '_copy_infodict', dict)(info_dict) + @staticmethod def _restrict_to(*, video=True, audio=True, images=True): allowed = {'video': video, 'audio': audio, 'images': images} @@ -142,11 +146,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass): def _hook_progress(self, status, info_dict): if not self._progress_hooks: return - info_dict = dict(info_dict) - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) status.update({ - 'info_dict': copy.deepcopy(info_dict), + 'info_dict': info_dict, 'postprocessor': self.pp_key(), }) for ph in self._progress_hooks: From 580d3274e50d9cca79189689ba53db7295ea267c Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 16 Oct 2021 20:13:23 +0530 Subject: [PATCH 0073/2552] [youtube] Expose different formats with same itag --- yt_dlp/downloader/common.py | 1 - yt_dlp/extractor/youtube.py | 9 +++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 96b78a968..9081794db 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -1,6 +1,5 @@ from __future__ import division, unicode_literals -import copy import os import re import time diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1ef80445e..dc9aa8ab7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2692,7 +2692,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): itag = self._search_regex( r'/itag/(\d+)', f['url'], 'itag', default=None) if itag in itags: - continue + itag += '-hls' + if itag in itags: + continue if itag: f['format_id'] = itag itags.append(itag) @@ -2704,8 +2706,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False): itag = f['format_id'] if itag in itags: - continue + itag += '-dash' + if itag in itags: + continue if itag: + f['format_id'] = itag itags.append(itag) f['quality'] = guess_quality(f) filesize = int_or_none(self._search_regex( From 71ce444a3fece1f7de779b358943de4ac14aa0f4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 01:03:04 +0530 Subject: [PATCH 0074/2552] Fix --restrict-filename when used with default template --- yt_dlp/YoutubeDL.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index fd8ad0f98..8cfb18e03 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -889,8 +889,13 @@ class YoutubeDL(object): outtmpl_dict = self.params.get('outtmpl', {}) if not isinstance(outtmpl_dict, dict): outtmpl_dict = {'default': outtmpl_dict} + # Remove spaces in the default template + if self.params.get('restrictfilenames'): + sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-') + else: + sanitize = lambda x: x outtmpl_dict.update({ - k: v for k, v in DEFAULT_OUTTMPL.items() + k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl_dict.get(k) is None}) for key, val in outtmpl_dict.items(): if isinstance(val, bytes): From dd078970ba1739cfd4fcc798a4b5026cb11c427a Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 17:16:05 +0530 Subject: [PATCH 0075/2552] [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code --- yt_dlp/extractor/crunchyroll.py | 56 ++++++++++++++++++++++++++++++++- yt_dlp/extractor/extractors.py | 4 ++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 256c6943f..fb05415fc 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -650,7 +650,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): IE_NAME = 'crunchyroll:playlist' - _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)' + _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:\w{1,2}/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)' _TESTS = [{ 'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', @@ -672,6 +672,9 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1', 'only_matching': True, + }, { + 'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers', + 'only_matching': True, }] def _real_extract(self, url): @@ -698,3 +701,54 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): 'title': title, 'entries': entries, } + + +class CrunchyrollBetaIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll:beta' + _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)watch/(?P<internal_id>\w+)/(?P<id>[\w\-]+)/?(?:\?|$)' + _TESTS = [{ + 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future', + 'info_dict': { + 'id': '696363', + 'ext': 'mp4', + 'timestamp': 1459610100, + 'description': 'md5:a022fbec4fbb023d43631032c91ed64b', + 'uploader': 'Toei Animation', + 'title': 'World Trigger Episode 73 – To the Future', + 'upload_date': '20160402', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Unable to download XML'] + }] + + def _real_extract(self, url): + lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id') + webpage = self._download_webpage(url, display_id) + episode_data = self._parse_json( + self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'), + display_id)['content']['byId'][internal_id] + video_id = episode_data['external_id'].split('.')[1] + series_id = episode_data['episode_metadata']['series_slug_title'] + return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}', + CrunchyrollIE.ie_key(), video_id) + + +class CrunchyrollBetaShowIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll:playlist:beta' + _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)series/\w+/(?P<id>[\w\-]+)/?(?:\?|$)' + _TESTS = [{ + 'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', + 'info_dict': { + 'id': 'girl-friend-beta', + 'title': 'Girl Friend BETA', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA', + 'only_matching': True, + }] + + def _real_extract(self, url): + lang, series_id = self._match_valid_url(url).group('lang', 'id') + return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id.lower()}', + CrunchyrollShowPlaylistIE.ie_key(), series_id) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6bc9a2b1e..4c89c5a18 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -298,7 +298,9 @@ from .crackle import CrackleIE from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( CrunchyrollIE, - CrunchyrollShowPlaylistIE + CrunchyrollShowPlaylistIE, + CrunchyrollBetaIE, + CrunchyrollBetaShowIE, ) from .cspan import CSpanIE from .ctsnews import CtsNewsIE From ec3f6640c1a5391380ff7d47769fb710cf817638 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 17:34:03 +0530 Subject: [PATCH 0076/2552] [crunchyroll] Add season to flat-playlist Closes #1319 --- yt_dlp/extractor/common.py | 3 ++- yt_dlp/extractor/crunchyroll.py | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 4f358c53b..dbe7dfcbf 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1087,12 +1087,13 @@ class InfoExtractor(object): # Methods for following #608 @staticmethod - def url_result(url, ie=None, video_id=None, video_title=None): + def url_result(url, ie=None, video_id=None, video_title=None, **kwargs): """Returns a URL that points to a page that should be processed""" # TODO: ie should be the class used for getting the info video_info = {'_type': 'url', 'url': url, 'ie_key': ie} + video_info.update(kwargs) if video_id is not None: video_info['id'] = video_id if video_title is not None: diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index fb05415fc..511ac1b2c 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -686,20 +686,23 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): headers=self.geo_verification_headers()) title = self._html_search_meta('name', webpage, default=None) - episode_paths = re.findall( - r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"', - webpage) - entries = [ - self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id) - for ep_id, ep in episode_paths - ] - entries.reverse() + episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"' + season_re = r'<a [^>]+season-dropdown[^>]+>([^<]+)' + paths = re.findall(f'(?s){episode_re}|{season_re}', webpage) + + entries, current_season = [], None + for ep_id, ep, season in paths: + if season: + current_season = season + continue + entries.append(self.url_result( + f'http://www.crunchyroll.com{ep}', CrunchyrollIE.ie_key(), ep_id, season=current_season)) return { '_type': 'playlist', 'id': show_id, 'title': title, - 'entries': entries, + 'entries': reversed(entries), } From 18f96d129b24200debf257153bcc762125d2a1f7 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 01:04:00 +0530 Subject: [PATCH 0077/2552] [utils] Allow duration strings in filter Closes #1309 --- test/test_utils.py | 1 + yt_dlp/utils.py | 46 ++++++++++++++++++++-------------------------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index d20bca795..7fc431505 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1231,6 +1231,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertFalse(match_str('x>2K', {'x': 1200})) self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) + self.assertTrue(match_str('x > 1:0:0', {'x': 3700})) # String self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 7a40258cf..15bee0c47 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4756,7 +4756,6 @@ def _match_one(filter_part, dct, incomplete): (?P<key>[a-z_]+) \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* (?: - (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)| (?P<strval>.+?) ) @@ -4764,40 +4763,35 @@ def _match_one(filter_part, dct, incomplete): ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) m = operator_rex.search(filter_part) if m: - unnegated_op = COMPARISON_OPERATORS[m.group('op')] - if m.group('negation'): + m = m.groupdict() + unnegated_op = COMPARISON_OPERATORS[m['op']] + if m['negation']: op = lambda attr, value: not unnegated_op(attr, value) else: op = unnegated_op - actual_value = dct.get(m.group('key')) - if (m.group('quotedstrval') is not None - or m.group('strval') is not None + comparison_value = m['quotedstrval'] or m['strval'] or m['intval'] + if m['quote']: + comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote']) + actual_value = dct.get(m['key']) + numeric_comparison = None + if isinstance(actual_value, compat_numeric_types): # If the original field is a string and matching comparisonvalue is # a number we should respect the origin of the original field # and process comparison value as a string (see - # https://github.com/ytdl-org/youtube-dl/issues/11082). - or actual_value is not None and m.group('intval') is not None - and isinstance(actual_value, compat_str)): - comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') - quote = m.group('quote') - if quote is not None: - comparison_value = comparison_value.replace(r'\%s' % quote, quote) - else: - if m.group('op') in STRING_OPERATORS: - raise ValueError('Operator %s only supports string values!' % m.group('op')) + # https://github.com/ytdl-org/youtube-dl/issues/11082) try: - comparison_value = int(m.group('intval')) + numeric_comparison = int(comparison_value) except ValueError: - comparison_value = parse_filesize(m.group('intval')) - if comparison_value is None: - comparison_value = parse_filesize(m.group('intval') + 'B') - if comparison_value is None: - raise ValueError( - 'Invalid integer value %r in filter part %r' % ( - m.group('intval'), filter_part)) + numeric_comparison = parse_filesize(comparison_value) + if numeric_comparison is None: + numeric_comparison = parse_filesize(f'{comparison_value}B') + if numeric_comparison is None: + numeric_comparison = parse_duration(comparison_value) + if numeric_comparison is not None and m['op'] in STRING_OPERATORS: + raise ValueError('Operator %s only supports string values!' % m['op']) if actual_value is None: - return incomplete or m.group('none_inclusive') - return op(actual_value, comparison_value) + return incomplete or m['none_inclusive'] + return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison) UNARY_OPERATORS = { '': lambda v: (v is True) if isinstance(v, bool) else (v is not None), From 239df021037447f71ac8b7cf3c58edc9c6abe3a6 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 01:05:16 +0530 Subject: [PATCH 0078/2552] Make `duration_string` and `resolution` available in --match-filter Related: #1309 --- yt_dlp/YoutubeDL.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8cfb18e03..4a7712cb6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2177,6 +2177,9 @@ class YoutubeDL(object): if info_dict.get('display_id') is None and 'id' in info_dict: info_dict['display_id'] = info_dict['id'] + if info_dict.get('duration') is not None: + info_dict['duration_string'] = formatSeconds(info_dict['duration']) + for ts_key, date_key in ( ('timestamp', 'upload_date'), ('release_timestamp', 'release_date'), @@ -2295,10 +2298,10 @@ class YoutubeDL(object): res=self.format_resolution(format), note=format_field(format, 'format_note', ' (%s)'), ) - # Automatically determine protocol if missing (useful for format - # selection purposes) if format.get('protocol') is None: format['protocol'] = determine_protocol(format) + if format.get('resolution') is None: + format['resolution'] = self.format_resolution(format, default=None) # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() From 693ec74401fa8d42b0cfd5f1ef24aabade5cc275 Mon Sep 17 00:00:00 2001 From: Damiano Amatruda <damiano.amatruda@outlook.com> Date: Mon, 18 Oct 2021 03:32:46 +0200 Subject: [PATCH 0079/2552] [on24] Add extractor (#1200) Authored by: damianoamatruda --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/on24.py | 91 ++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 yt_dlp/extractor/on24.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4c89c5a18..03d4a67f5 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -983,6 +983,7 @@ from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE +from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, diff --git a/yt_dlp/extractor/on24.py b/yt_dlp/extractor/on24.py new file mode 100644 index 000000000..d4d824430 --- /dev/null +++ b/yt_dlp/extractor/on24.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + strip_or_none, + try_get, + urljoin, +) + + +class On24IE(InfoExtractor): + IE_NAME = 'on24' + IE_DESC = 'ON24' + + _VALID_URL = r'''(?x) + https?://event\.on24\.com/(?: + wcc/r/(?P<id_1>\d{7})/(?P<key_1>[0-9A-F]{32})| + eventRegistration/(?:console/EventConsoleApollo|EventLobbyServlet\?target=lobby30) + \.jsp\?(?:[^/#?]*&)?eventid=(?P<id_2>\d{7})[^/#?]*&key=(?P<key_2>[0-9A-F]{32}) + )''' + + _TESTS = [{ + 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?uimode=nextgeneration&eventid=2197467&sessionid=1&key=5DF57BE53237F36A43B478DD36277A84&contenttype=A&eventuserid=305999&playerwidth=1000&playerheight=650&caller=previewLobby&text_language_id=en&format=fhaudio&newConsole=false', + 'info_dict': { + 'id': '2197467', + 'ext': 'wav', + 'title': 'Pearson Test of English General/Pearson English International Certificate Teacher Training Guide', + 'upload_date': '20200219', + 'timestamp': 1582149600.0, + 'view_count': int, + } + }, { + 'url': 'https://event.on24.com/wcc/r/2639291/82829018E813065A122363877975752E?mode=login&email=johnsmith@gmail.com', + 'only_matching': True, + }, { + 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?&eventid=2639291&sessionid=1&username=&partnerref=&format=fhvideo1&mobile=&flashsupportedmobiledevice=&helpcenter=&key=82829018E813065A122363877975752E&newConsole=true&nxChe=true&newTabCon=true&text_language_id=en&playerwidth=748&playerheight=526&eventuserid=338788762&contenttype=A&mediametricsessionid=384764716&mediametricid=3558192&usercd=369267058&mode=launch', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + event_id = mobj.group('id_1') or mobj.group('id_2') + event_key = mobj.group('key_1') or mobj.group('key_2') + + event_data = self._download_json( + 'https://event.on24.com/apic/utilApp/EventConsoleCachedServlet', + event_id, query={ + 'eventId': event_id, + 'displayProfile': 'player', + 'key': event_key, + 'contentType': 'A' + }) + event_id = str(try_get(event_data, lambda x: x['presentationLogInfo']['eventid'])) or event_id + language = event_data.get('localelanguagecode') + + formats = [] + for media in event_data.get('mediaUrlInfo', []): + media_url = urljoin('https://event.on24.com/media/news/corporatevideo/events/', str(media.get('url'))) + if not media_url: + continue + media_type = media.get('code') + if media_type == 'fhvideo1': + formats.append({ + 'format_id': 'video', + 'url': media_url, + 'language': language, + 'ext': 'mp4', + 'vcodec': 'avc1.640020', + 'acodec': 'mp4a.40.2', + }) + elif media_type == 'audio': + formats.append({ + 'format_id': 'audio', + 'url': media_url, + 'language': language, + 'ext': 'wav', + 'vcodec': 'none', + 'acodec': 'wav' + }) + self._sort_formats(formats) + + return { + 'id': event_id, + 'title': strip_or_none(event_data.get('description')), + 'timestamp': int_or_none(try_get(event_data, lambda x: x['session']['startdate']), 1000), + 'webpage_url': f'https://event.on24.com/wcc/r/{event_id}/{event_key}', + 'view_count': event_data.get('registrantcount'), + 'formats': formats, + } From e69585f8c620926d29477bc68ba9b97298646348 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Mon, 18 Oct 2021 03:34:56 +0200 Subject: [PATCH 0080/2552] [7plus] Add cookie based authentication (#1202) Closes #1103 Authored by: nyuszika7h --- yt_dlp/extractor/sevenplus.py | 46 ++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py index 9f15bd7cc..210c44ab2 100644 --- a/yt_dlp/extractor/sevenplus.py +++ b/yt_dlp/extractor/sevenplus.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .brightcove import BrightcoveNewIE @@ -42,9 +43,52 @@ class SevenPlusIE(BrightcoveNewIE): 'only_matching': True, }] + def _real_initialize(self): + self.token = None + + cookies = self._get_cookies('https://7plus.com.au') + api_key = next((x for x in cookies if x.startswith('glt_')), '')[4:] + if not api_key: # Cookies are signed out, skip login + return + + login_resp = self._download_json( + 'https://login.7plus.com.au/accounts.getJWT', None, 'Logging in', fatal=False, + query={ + 'APIKey': api_key, + 'sdk': 'js_latest', + 'login_token': cookies[f'glt_{api_key}'].value, + 'authMode': 'cookie', + 'pageURL': 'https://7plus.com.au/', + 'sdkBuild': '12471', + 'format': 'json', + }) or {} + + if 'errorMessage' in login_resp: + self.report_warning(f'Unable to login: 7plus said: {login_resp["errorMessage"]}') + return + id_token = login_resp.get('id_token') + if not id_token: + self.report_warning('Unable to login: Could not extract id token') + return + + token_resp = self._download_json( + 'https://7plus.com.au/auth/token', None, 'Getting auth token', fatal=False, + headers={'Content-Type': 'application/json'}, data=json.dumps({ + 'idToken': id_token, + 'platformId': 'web', + 'regSource': '7plus', + }).encode('utf-8')) or {} + self.token = token_resp.get('token') + if not self.token: + self.report_warning('Unable to log in: Could not extract auth token') + def _real_extract(self, url): path, episode_id = self._match_valid_url(url).groups() + headers = {} + if self.token: + headers['Authorization'] = f'Bearer {self.token}' + try: media = self._download_json( 'https://videoservice.swm.digital/playback', episode_id, query={ @@ -55,7 +99,7 @@ class SevenPlusIE(BrightcoveNewIE): 'referenceId': 'ref:' + episode_id, 'deliveryId': 'csai', 'videoType': 'vod', - })['media'] + }, headers=headers)['media'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: raise ExtractorError(self._parse_json( From 019a94f7d62cf9fb482ebf28e1c153486a49f319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81kos=20S=C3=BClyi?= <sulyi.gbox@gmail.com> Date: Mon, 18 Oct 2021 03:46:49 +0200 Subject: [PATCH 0081/2552] [utils] Use `importlib` to load plugins (#1277) Authored by: sulyi --- yt_dlp/utils.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 15bee0c47..3ac2fbc4b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -18,7 +18,7 @@ import functools import gzip import hashlib import hmac -import imp +import importlib.util import io import itertools import json @@ -6302,12 +6302,13 @@ def get_executable_path(): def load_plugins(name, suffix, namespace): - plugin_info = [None] classes = {} try: - plugin_info = imp.find_module( - name, [os.path.join(get_executable_path(), 'ytdlp_plugins')]) - plugins = imp.load_module(name, *plugin_info) + plugins_spec = importlib.util.spec_from_file_location( + name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py')) + plugins = importlib.util.module_from_spec(plugins_spec) + sys.modules[plugins_spec.name] = plugins + plugins_spec.loader.exec_module(plugins) for name in dir(plugins): if name in namespace: continue @@ -6315,11 +6316,8 @@ def load_plugins(name, suffix, namespace): continue klass = getattr(plugins, name) classes[name] = namespace[name] = klass - except ImportError: + except FileNotFoundError: pass - finally: - if plugin_info[0] is not None: - plugin_info[0].close() return classes From 01b052b2b19609a5b0f54db8fa2989562dedbdc4 Mon Sep 17 00:00:00 2001 From: LE <llacb47@users.noreply.github.com> Date: Sun, 17 Oct 2021 22:28:20 -0400 Subject: [PATCH 0082/2552] [tbs] Add tbs live streams (#1326) Authored by: llacb47 --- yt_dlp/extractor/tbs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index cad3f2f46..c7d62ff4e 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -16,7 +16,7 @@ from ..utils import ( class TBSIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))' + _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))' _TESTS = [{ 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', 'info_dict': { @@ -45,7 +45,7 @@ class TBSIE(TurnerBaseIE): drupal_settings = self._parse_json(self._search_regex( r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>', webpage, 'drupal setting'), display_id) - isLive = 'watchtnt' in path + isLive = 'watchtnt' in path or 'watchtbs' in path video_data = next(v for v in drupal_settings['turner_playlist'] if isLive or v.get('url') == path) media_id = video_data['mediaID'] From 72ab7687194f353079b4f6e6ac9a59f586c9a9ef Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Mon, 18 Oct 2021 08:09:50 +0530 Subject: [PATCH 0083/2552] [SkyNewsAU] Add extractor (#1308) Closes #1287 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/skynewsau.py | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 yt_dlp/extractor/skynewsau.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 03d4a67f5..ffd26ca0b 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1284,6 +1284,7 @@ from .skynewsarabia import ( SkyNewsArabiaIE, SkyNewsArabiaArticleIE, ) +from .skynewsau import SkyNewsAUIE from .sky import ( SkyNewsIE, SkySportsIE, diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py new file mode 100644 index 000000000..b1d77951e --- /dev/null +++ b/yt_dlp/extractor/skynewsau.py @@ -0,0 +1,46 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_strdate, +) + + +class SkyNewsAUIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)' + + _TESTS = [{ + 'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71', + 'info_dict': { + 'id': '6277184925001', + 'ext': 'mp4', + 'title': 'md5:60594f1ea6d5ae93e292900f4d34e9ae', + 'description': 'md5:60594f1ea6d5ae93e292900f4d34e9ae', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 76.394, + 'timestamp': 1634271300, + 'uploader_id': '5348771529001', + 'tags': ['fblink', 'msn', 'usa', 'world', 'yt'], + 'upload_date': '20211015', + }, + 'params': {'skip_download': True, 'format': 'bv'} + }] + + _API_KEY = '6krsj3w249nk779d8fukqx9f' + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + embedcode = self._search_regex(r'embedcode\s?=\s?\"([^\"]+)\"', webpage, 'embedcode') + data_json = self._download_json( + f'https://content.api.news/v3/videos/brightcove/{embedcode}?api_key={self._API_KEY}', id)['content'] + return { + 'id': id, + '_type': 'url_transparent', + 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % tuple(embedcode.split('-')), + 'ie_key': 'BrightcoveNew', + 'title': data_json.get('caption'), + 'upload_date': unified_strdate(try_get(data_json, lambda x: x['date']['created'])), + } From 920134b2e526ccb39a368add5547788361c78fb3 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Mon, 18 Oct 2021 08:11:31 +0530 Subject: [PATCH 0084/2552] [Gronkh] Add extractor (#1299) Closes #1293 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/gronkh.py | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 yt_dlp/extractor/gronkh.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ffd26ca0b..f4f817fcb 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -527,6 +527,7 @@ from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE +from .gronkh import GronkhIE from .groupon import GrouponIE from .hbo import HBOIE from .hearthisat import HearThisAtIE diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py new file mode 100644 index 000000000..a7792a5e0 --- /dev/null +++ b/yt_dlp/extractor/gronkh.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class GronkhIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)' + + _TESTS = [{ + 'url': 'https://gronkh.tv/stream/536', + 'info_dict': { + 'id': '536', + 'ext': 'mp4', + 'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv', + 'view_count': 19491, + 'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg', + 'upload_date': '20211001' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + data_json = self._download_json(f'https://api.gronkh.tv/v1/video/info?episode={id}', id) + m3u8_url = self._download_json(f'https://api.gronkh.tv/v1/video/playlist?episode={id}', id)['playlist_url'] + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + if data_json.get('vtt_url'): + subtitles.setdefault('en', []).append({ + 'url': data_json['vtt_url'], + 'ext': 'vtt', + }) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title'), + 'view_count': data_json.get('views'), + 'thumbnail': data_json.get('preview_url'), + 'upload_date': unified_strdate(data_json.get('created_at')), + 'formats': formats, + 'subtitles': subtitles, + } From 373475f03553a7fff2d20df878755bfad2fab8e5 Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Sun, 17 Oct 2021 22:44:20 -0400 Subject: [PATCH 0085/2552] [fragments] Pad fragments before decrypting (#1298) Closes #197, #1297, #1007 Authored by: shirt-dev --- yt_dlp/downloader/fragment.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 6a490131b..d0eaede7e 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -355,7 +355,8 @@ class FragmentFD(FileDownloader): # not what it decrypts to. if self.params.get('test', False): return frag_content - decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv) + padding_len = 16 - (len(frag_content) % 16) + decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv) return decrypted_data[:-decrypted_data[-1]] return decrypt_fragment From aae16f6ed9ba1fc6943a8461d0a9aa8be6e5561d Mon Sep 17 00:00:00 2001 From: coletdjnz <colethedj@protonmail.com> Date: Mon, 18 Oct 2021 15:58:42 +1300 Subject: [PATCH 0086/2552] [youtube:comments] Fix comment section not being extracted in new layouts (#1324) Co-authored-by: coletdjnz, pukkandan --- yt_dlp/extractor/youtube.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index dc9aa8ab7..892993c9b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2314,6 +2314,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continuation_token = self._generate_comment_continuation(video_id) continuation = self._build_api_continuation_query(continuation_token, None) + message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) + if message and not parent: + self.report_warning(message, video_id=video_id) + visitor_data = None is_first_continuation = parent is None @@ -2416,8 +2420,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_comments(self, ytcfg, video_id, contents, webpage): """Entry for comment extraction""" def _real_comment_extract(contents): - yield from self._comment_entries( - traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id) + renderer = next(( + item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={}) + if item.get('sectionIdentifier') == 'comment-item-section'), None) + yield from self._comment_entries(renderer, ytcfg, video_id) max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) # Force English regardless of account setting to prevent parsing issues From 24b0a72b302a8ba67eb7301911d8fedfa90f0ecc Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 07:55:34 +0530 Subject: [PATCH 0087/2552] [cleanup] Remove broken youtube login code --- yt_dlp/extractor/youtube.py | 200 +----------------------------------- 1 file changed, 2 insertions(+), 198 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 892993c9b..b71cd4292 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -258,28 +258,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False - r''' # Unused since login is broken - _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' - _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' - - _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup' - _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge' - _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' - ''' - def _login(self): """ Attempt to log in to YouTube. - True is returned if successful or skipped. - False is returned if login failed. - If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised. """ - def warn(message): - self.report_warning(message) - - # username+password login is broken if (self._LOGIN_REQUIRED and self.get_param('cookiefile') is None and self.get_param('cookiesfrombrowser') is None): @@ -287,184 +271,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'Login details are needed to download this content', method='cookies') username, password = self._get_login_info() if username: - warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies']) - return - - # Everything below this is broken! - r''' - # No authentication to be performed - if username is None: - if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None: - raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) - # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them. - # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!') - return True - - login_page = self._download_webpage( - self._LOGIN_URL, None, - note='Downloading login page', - errnote='unable to fetch login page', fatal=False) - if login_page is False: - return - - login_form = self._hidden_inputs(login_page) - - def req(url, f_req, note, errnote): - data = login_form.copy() - data.update({ - 'pstMsg': 1, - 'checkConnection': 'youtube', - 'checkedDomains': 'youtube', - 'hl': 'en', - 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]', - 'f.req': json.dumps(f_req), - 'flowName': 'GlifWebSignIn', - 'flowEntry': 'ServiceLogin', - # TODO: reverse actual botguard identifier generation algo - 'bgRequest': '["identifier",""]', - }) - return self._download_json( - url, None, note=note, errnote=errnote, - transform_source=lambda s: re.sub(r'^[^[]*', '', s), - fatal=False, - data=urlencode_postdata(data), headers={ - 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', - 'Google-Accounts-XSRF': 1, - }) - - lookup_req = [ - username, - None, [], None, 'US', None, None, 2, False, True, - [ - None, None, - [2, 1, None, 1, - 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', - None, [], 4], - 1, [None, None, []], None, None, None, True - ], - username, - ] - - lookup_results = req( - self._LOOKUP_URL, lookup_req, - 'Looking up account info', 'Unable to look up account info') - - if lookup_results is False: - return False - - user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str) - if not user_hash: - warn('Unable to extract user hash') - return False - - challenge_req = [ - user_hash, - None, 1, None, [1, None, None, None, [password, None, True]], - [ - None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4], - 1, [None, None, []], None, None, None, True - ]] - - challenge_results = req( - self._CHALLENGE_URL, challenge_req, - 'Logging in', 'Unable to log in') - - if challenge_results is False: - return - - login_res = try_get(challenge_results, lambda x: x[0][5], list) - if login_res: - login_msg = try_get(login_res, lambda x: x[5], compat_str) - warn( - 'Unable to login: %s' % 'Invalid password' - if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg) - return False - - res = try_get(challenge_results, lambda x: x[0][-1], list) - if not res: - warn('Unable to extract result entry') - return False - - login_challenge = try_get(res, lambda x: x[0][0], list) - if login_challenge: - challenge_str = try_get(login_challenge, lambda x: x[2], compat_str) - if challenge_str == 'TWO_STEP_VERIFICATION': - # SEND_SUCCESS - TFA code has been successfully sent to phone - # QUOTA_EXCEEDED - reached the limit of TFA codes - status = try_get(login_challenge, lambda x: x[5], compat_str) - if status == 'QUOTA_EXCEEDED': - warn('Exceeded the limit of TFA codes, try later') - return False - - tl = try_get(challenge_results, lambda x: x[1][2], compat_str) - if not tl: - warn('Unable to extract TL') - return False - - tfa_code = self._get_tfa_info('2-step verification code') - - if not tfa_code: - warn( - 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>' - '(Note that only TOTP (Google Authenticator App) codes work at this time.)') - return False - - tfa_code = remove_start(tfa_code, 'G-') - - tfa_req = [ - user_hash, None, 2, None, - [ - 9, None, None, None, None, None, None, None, - [None, tfa_code, True, 2] - ]] - - tfa_results = req( - self._TFA_URL.format(tl), tfa_req, - 'Submitting TFA code', 'Unable to submit TFA code') - - if tfa_results is False: - return False - - tfa_res = try_get(tfa_results, lambda x: x[0][5], list) - if tfa_res: - tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str) - warn( - 'Unable to finish TFA: %s' % 'Invalid TFA code' - if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg) - return False - - check_cookie_url = try_get( - tfa_results, lambda x: x[0][-1][2], compat_str) - else: - CHALLENGES = { - 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.", - 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.', - 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.", - } - challenge = CHALLENGES.get( - challenge_str, - '%s returned error %s.' % (self.IE_NAME, challenge_str)) - warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge) - return False - else: - check_cookie_url = try_get(res, lambda x: x[2], compat_str) - - if not check_cookie_url: - warn('Unable to extract CheckCookie URL') - return False - - check_cookie_results = self._download_webpage( - check_cookie_url, None, 'Checking cookie', fatal=False) - - if check_cookie_results is False: - return False - - if 'https://myaccount.google.com/' not in check_cookie_results: - warn('Unable to log in') - return False - - return True - ''' + self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}') def _initialize_consent(self): cookies = self._get_cookies('https://www.youtube.com/') @@ -483,10 +290,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _real_initialize(self): self._initialize_consent() - if self._downloader is None: - return - if not self._login(): - return + self._login() _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' From b11d210156f083f23e1bce284192314e54e4047a Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 09:19:25 +0530 Subject: [PATCH 0088/2552] [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key --- README.md | 2 +- yt_dlp/postprocessor/ffmpeg.py | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index dd9cbc7fc..cbd3f337d 100644 --- a/README.md +++ b/README.md @@ -1433,7 +1433,7 @@ Note that any field created by this can be used in the [output template](#output This option also has a few special uses: * You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description -* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis" +* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". Any value set to the `meta_` field will overwrite all default values. For reference, these are the fields yt-dlp adds by default to the file metadata: diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index e6aa2940a..e5595341d 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -10,7 +10,7 @@ import json from .common import AudioConversionError, PostProcessor -from ..compat import compat_str, compat_numeric_types +from ..compat import compat_str from ..utils import ( dfxp2srt, encodeArgument, @@ -664,15 +664,14 @@ class FFmpegMetadataPP(FFmpegPostProcessor): def _get_metadata_opts(self, info): metadata = {} + meta_prefix = 'meta_' def add(meta_list, info_list=None): - if not meta_list: - return - for info_f in variadic(info_list or meta_list): - if isinstance(info.get(info_f), (compat_str, compat_numeric_types)): - for meta_f in variadic(meta_list): - metadata[meta_f] = info[info_f] - break + value = next(( + str(info[key]) for key in [meta_prefix] + list(variadic(info_list or meta_list)) + if info.get(key) is not None), None) + if value not in ('', None): + metadata.update({meta_f: value for meta_f in variadic(meta_list)}) # See [1-4] for some info on media metadata/metadata supported # by ffmpeg. @@ -695,9 +694,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor): add('episode_id', ('episode', 'episode_id')) add('episode_sort', 'episode_number') - prefix = 'meta_' - for key in filter(lambda k: k.startswith(prefix), info.keys()): - add(key[len(prefix):], key) + for key, value in info.items(): + if value is not None and key != meta_prefix and key.startswith(meta_prefix): + metadata[key[len(meta_prefix):]] = value for name, value in metadata.items(): yield ('-metadata', f'{name}={value}') From e820fbaa6ff41625b6f4d8453253883b86bf9ca4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 15:23:42 +0530 Subject: [PATCH 0089/2552] Do not verify thumbnail URLs by default Partially reverts cca80fe6110653582e8c8a8d06490b4028ffd755 and 0ba692acc8feffd46b6e1085fb4a2849b685945c Unless `--check-formats` is specified, this causes yt-dlp to return incorrect thumbnail urls. See https://github.com/yt-dlp/yt-dlp/issues/340#issuecomment-877909966, #402 But the overhead in general use is not worth it Closes #694, #725 --- yt_dlp/YoutubeDL.py | 17 +++-------------- yt_dlp/extractor/common.py | 1 - yt_dlp/extractor/youtube.py | 7 ++----- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4a7712cb6..cf97ff21c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2095,25 +2095,14 @@ class YoutubeDL(object): t.get('url'))) def thumbnail_tester(): - if self.params.get('check_formats'): - test_all = True - to_screen = lambda msg: self.to_screen(f'[info] {msg}') - else: - test_all = False - to_screen = self.write_debug - def test_thumbnail(t): - if not test_all and not t.get('_test_url'): - return True - to_screen('Testing thumbnail %s' % t['id']) + self.to_screen(f'[info] Testing thumbnail {t["id"]}') try: self.urlopen(HEADRequest(t['url'])) except network_exceptions as err: - to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % ( - t['id'], t['url'], error_to_compat_str(err))) + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') return False return True - return test_thumbnail for i, t in enumerate(thumbnails): @@ -2123,7 +2112,7 @@ class YoutubeDL(object): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) - if self.params.get('check_formats') is not False: + if self.params.get('check_formats'): info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() else: info_dict['thumbnails'] = thumbnails diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index dbe7dfcbf..0a14f7c0d 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -233,7 +233,6 @@ class InfoExtractor(object): * "resolution" (optional, string "{width}x{height}", deprecated) * "filesize" (optional, int) - * "_test_url" (optional, bool) - If true, test the URL thumbnail: Full URL to a video thumbnail image. description: Full video description. uploader: Full name of the video uploader. diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index b71cd4292..b9566a0a7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2699,21 +2699,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # The best resolution thumbnails sometimes does not appear in the webpage # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029> - hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3'] - # TODO: Test them also? - For some videos, even these don't exist - guaranteed_thumbnail_names = [ + thumbnail_names = [ + 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3', 'hqdefault', 'hq1', 'hq2', 'hq3', '0', 'mqdefault', 'mq1', 'mq2', 'mq3', 'default', '1', '2', '3' ] - thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names n_thumbnail_names = len(thumbnail_names) thumbnails.extend({ 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( video_id=video_id, name=name, ext=ext, webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''), - '_test_url': name in hq_thumbnail_names, } for name in thumbnail_names for ext in ('webp', 'jpg')) for thumb in thumbnails: i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names) From 2d9ec70423121dbf280475769690f19b0034ee8b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 16:03:05 +0530 Subject: [PATCH 0090/2552] [ModifyChapters] Allow removing sections by timestamp Eg: --remove-chapters "*10:15-15:00". The `*` prefix is used so as to avoid any conflicts with other valid regex --- README.md | 6 +++++- yt_dlp/__init__.py | 10 +++++++++- yt_dlp/options.py | 6 +++++- yt_dlp/postprocessor/modify_chapters.py | 13 +++++++++++-- 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index cbd3f337d..141be3315 100644 --- a/README.md +++ b/README.md @@ -847,7 +847,11 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --no-split-chapters Do not split video based on chapters (default) --remove-chapters REGEX Remove chapters whose title matches the - given regular expression. This option can + given regular expression. Time ranges + prefixed by a "*" can also be used in place + of chapters to remove the specified range. + Eg: --remove-chapters "*10:15-15:00" + --remove-chapters "intro". This option can be used multiple times --no-remove-chapters Do not remove any chapters from the file (default) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 4b82efea7..b952cc062 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -31,6 +31,7 @@ from .utils import ( expand_path, match_filter_func, MaxDownloadsReached, + parse_duration, preferredencoding, read_batch_urls, RejectedVideoReached, @@ -490,8 +491,14 @@ def _real_main(argv=None): if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True # ModifyChapters must run before FFmpegMetadataPP - remove_chapters_patterns = [] + remove_chapters_patterns, remove_ranges = [], [] for regex in opts.remove_chapters: + if regex.startswith('*'): + dur = list(map(parse_duration, regex[1:].split('-'))) + if len(dur) == 2 and all(t is not None for t in dur): + remove_ranges.append(tuple(dur)) + continue + parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form ?start-end') try: remove_chapters_patterns.append(re.compile(regex)) except re.error as err: @@ -501,6 +508,7 @@ def _real_main(argv=None): 'key': 'ModifyChapters', 'remove_chapters_patterns': remove_chapters_patterns, 'remove_sponsor_segments': opts.sponsorblock_remove, + 'remove_ranges': remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, 'force_keyframes': opts.force_keyframes_at_cuts }) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index d2dc7687b..1c99e7e7c 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1378,7 +1378,11 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--remove-chapters', metavar='REGEX', dest='remove_chapters', action='append', - help='Remove chapters whose title matches the given regular expression. This option can be used multiple times') + help=( + 'Remove chapters whose title matches the given regular expression. ' + 'Time ranges prefixed by a "*" can also be used in place of chapters to remove the specified range. ' + 'Eg: --remove-chapters "*10:15-15:00" --remove-chapters "intro". ' + 'This option can be used multiple times')) postproc.add_option( '--no-remove-chapters', dest='remove_chapters', action='store_const', const=None, help='Do not remove any chapters from the file (default)') diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 72a705fc5..a0818c41b 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -20,11 +20,12 @@ DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' class ModifyChaptersPP(FFmpegPostProcessor): - def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, - sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False): + def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, remove_ranges=None, + *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False): FFmpegPostProcessor.__init__(self, downloader) self._remove_chapters_patterns = set(remove_chapters_patterns or []) self._remove_sponsor_segments = set(remove_sponsor_segments or []) + self._ranges_to_remove = set(remove_ranges or []) self._sponsorblock_chapter_title = sponsorblock_chapter_title self._force_keyframes = force_keyframes @@ -97,6 +98,14 @@ class ModifyChaptersPP(FFmpegPostProcessor): if warn_no_chapter_to_remove: self.to_screen('There are no matching SponsorBlock chapters') + sponsor_chapters.extend({ + 'start_time': start, + 'end_time': end, + 'category': 'manually_removed', + '_categories': [('manually_removed', start, end)], + 'remove': True, + } for start, end in self._ranges_to_remove) + return chapters, sponsor_chapters def _get_supported_subs(self, info): From 17bddf3e95873230d85723e306641b2b3fcb87a9 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 16:10:27 +0530 Subject: [PATCH 0091/2552] Reduce default `--socket-timeout` --- test/parameters.json | 1 - yt_dlp/YoutubeDL.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/parameters.json b/test/parameters.json index 9ca7d2ca9..8544f1ab2 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -44,6 +44,5 @@ "writesubtitles": false, "allsubtitles": false, "listsubtitles": false, - "socket_timeout": 20, "fixup": "never" } diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index cf97ff21c..50cb11d49 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3347,7 +3347,7 @@ class YoutubeDL(object): def _setup_opener(self): timeout_val = self.params.get('socket_timeout') - self._socket_timeout = 600 if timeout_val is None else float(timeout_val) + self._socket_timeout = 20 if timeout_val is None else float(timeout_val) opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser') opts_cookiefile = self.params.get('cookiefile') From 176f1866cb437dd59cf8f600638cfd7ba2a8525e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 18:34:21 +0530 Subject: [PATCH 0092/2552] Add HDR information to formats --- README.md | 6 ++++-- test/test_utils.py | 22 ++++++++++++++++++++++ yt_dlp/YoutubeDL.py | 5 ++++- yt_dlp/extractor/common.py | 6 +++++- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/utils.py | 12 +++++++++++- 6 files changed, 47 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 141be3315..ce76474a2 100644 --- a/README.md +++ b/README.md @@ -1060,6 +1060,7 @@ The available fields are: - `asr` (numeric): Audio sampling rate in Hertz - `vbr` (numeric): Average video bitrate in KBit/s - `fps` (numeric): Frame rate + - `dynamic_range` (string): The dynamic range of the video - `vcodec` (string): Name of the video codec in use - `container` (string): Name of the container format - `filesize` (numeric): The number of bytes, if known in advance @@ -1283,6 +1284,7 @@ The available fields are: - `width`: Width of video - `res`: Video resolution, calculated as the smallest dimension. - `fps`: Framerate of video + - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `SDR`) - `tbr`: Total average bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s - `abr`: Average audio bitrate in KBit/s @@ -1293,9 +1295,9 @@ The available fields are: All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. -The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. +The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. -Note that the default has `codec:vp9.2`; i.e. `av1` is not prefered +Note that the default has `codec:vp9.2`; i.e. `av1` is not prefered. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not prefered. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. diff --git a/test/test_utils.py b/test/test_utils.py index 7fc431505..9a5e3f0f0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -848,30 +848,52 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { 'vcodec': 'avc1.77.30', 'acodec': 'mp4a.40.2', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.2'), { 'vcodec': 'none', 'acodec': 'mp4a.40.2', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { 'vcodec': 'avc1.42001e', 'acodec': 'mp4a.40.5', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('avc3.640028'), { 'vcodec': 'avc3.640028', 'acodec': 'none', + 'dynamic_range': None, }) self.assertEqual(parse_codecs(', h264,,newcodec,aac'), { 'vcodec': 'h264', 'acodec': 'aac', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('av01.0.05M.08'), { 'vcodec': 'av01.0.05M.08', 'acodec': 'none', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs('vp9.2'), { + 'vcodec': 'vp9.2', + 'acodec': 'none', + 'dynamic_range': 'HDR10', + }) + self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { + 'vcodec': 'av01.0.12M.10', + 'acodec': 'none', + 'dynamic_range': 'HDR10', + }) + self.assertEqual(parse_codecs('dvhe'), { + 'vcodec': 'dvhe', + 'acodec': 'none', + 'dynamic_range': 'DV', }) self.assertEqual(parse_codecs('theora, vorbis'), { 'vcodec': 'theora', 'acodec': 'vorbis', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { 'vcodec': 'unknownvcodec', diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 50cb11d49..5d8e0bded 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2291,6 +2291,8 @@ class YoutubeDL(object): format['protocol'] = determine_protocol(format) if format.get('resolution') is None: format['resolution'] = self.format_resolution(format, default=None) + if format.get('dynamic_range') is None and format.get('vcodec') != 'none': + format['dynamic_range'] = 'SDR' # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() @@ -3176,6 +3178,7 @@ class YoutubeDL(object): format_field(f, 'ext'), self.format_resolution(f), format_field(f, 'fps', '%d'), + format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), '|', format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes), format_field(f, 'tbr', '%4dk'), @@ -3193,7 +3196,7 @@ class YoutubeDL(object): format_field(f, 'container', ignore=(None, f.get('ext'))), ))), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] - header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO', + header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO', '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO'] else: table = [ diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 0a14f7c0d..e00d8c42b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -147,6 +147,8 @@ class InfoExtractor(object): * width Width of the video, if known * height Height of the video, if known * resolution Textual description of width and height + * dynamic_range The dynamic range of the video. One of: + "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV" * tbr Average bitrate of audio and video in KBit/s * abr Average audio bitrate in KBit/s * acodec Name of the audio codec in use @@ -1507,7 +1509,7 @@ class InfoExtractor(object): regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$' default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality', - 'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr', + 'res', 'fps', 'hdr:12', 'codec:vp9.2', 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'format_id') # These must not be aliases ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr', 'height', 'width', 'proto', 'vext', 'abr', 'aext', @@ -1518,6 +1520,8 @@ class InfoExtractor(object): 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']}, 'acodec': {'type': 'ordered', 'regex': True, 'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']}, + 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range', + 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]}, 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol', 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']}, 'vext': {'type': 'ordered', 'field': 'video_ext', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index b9566a0a7..aa58a22bf 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2653,7 +2653,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this - self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang')) + self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang')) keywords = get_first(video_details, 'keywords', expected_type=list) or [] if not keywords and webpage: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 3ac2fbc4b..28431ac73 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4618,12 +4618,21 @@ def parse_codecs(codecs_str): return {} split_codecs = list(filter(None, map( str.strip, codecs_str.strip().strip(',').split(',')))) - vcodec, acodec = None, None + vcodec, acodec, hdr = None, None, None for full_codec in split_codecs: codec = full_codec.split('.')[0] if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'): if not vcodec: vcodec = full_codec + if codec in ('dvh1', 'dvhe'): + hdr = 'DV' + elif codec == 'vp9' and vcodec.startswith('vp9.2'): + hdr = 'HDR10' + elif codec == 'av01': + parts = full_codec.split('.') + if len(parts) > 3 and parts[3] == '10': + hdr = 'HDR10' + vcodec = '.'.join(parts[:4]) elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): if not acodec: acodec = full_codec @@ -4639,6 +4648,7 @@ def parse_codecs(codecs_str): return { 'vcodec': vcodec or 'none', 'acodec': acodec or 'none', + 'dynamic_range': hdr, } return {} From 1e520b5535fbd870f46981fc0de228dc781bc361 Mon Sep 17 00:00:00 2001 From: Zirro <code@zirro.se> Date: Mon, 18 Oct 2021 21:11:07 +0200 Subject: [PATCH 0093/2552] Add option `--no-batch-file` (#1335) Authored by: Zirro --- README.md | 1 + yt_dlp/options.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/README.md b/README.md index ce76474a2..d410d04d1 100644 --- a/README.md +++ b/README.md @@ -465,6 +465,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t stdin), one URL per line. Lines starting with '#', ';' or ']' are considered as comments and ignored + --no-batch-file Do not read URLs from batch file (default) -P, --paths [TYPES:]PATH The paths where the files should be downloaded. Specify the type of file and the path separated by a colon ":". All the diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 1c99e7e7c..0638e8642 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -971,6 +971,10 @@ def parseOpts(overrideArguments=None): dest='batchfile', metavar='FILE', help="File containing URLs to download ('-' for stdin), one URL per line. " "Lines starting with '#', ';' or ']' are considered as comments and ignored") + filesystem.add_option( + '--no-batch-file', + dest='batchfile', action='store_const', const=None, + help='Do not read URLs from batch file (default)') filesystem.add_option( '-P', '--paths', metavar='[TYPES:]PATH', dest='paths', default={}, type='str', From e619d8a752d00aa9394e41b9b0c3c0d8f348eea6 Mon Sep 17 00:00:00 2001 From: Nil Admirari <50202386+nihil-admirari@users.noreply.github.com> Date: Tue, 19 Oct 2021 08:51:05 +0000 Subject: [PATCH 0094/2552] [ModifyChapters] Do not mutate original chapters (#1322) Closes #1295 Authored by: nihil-admirari --- yt_dlp/postprocessor/modify_chapters.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index a0818c41b..dca876200 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -31,8 +31,10 @@ class ModifyChaptersPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): + # Chapters must be preserved intact when downloading multiple formats of the same video. chapters, sponsor_chapters = self._mark_chapters_to_remove( - info.get('chapters') or [], info.get('sponsorblock_chapters') or []) + copy.deepcopy(info.get('chapters')) or [], + copy.deepcopy(info.get('sponsorblock_chapters')) or []) if not chapters and not sponsor_chapters: return [], info @@ -126,7 +128,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): cuts = [] def append_cut(c): - assert 'remove' in c + assert 'remove' in c, 'Not a cut is appended to cuts' last_to_cut = cuts[-1] if cuts else None if last_to_cut and last_to_cut['end_time'] >= c['start_time']: last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time']) @@ -154,7 +156,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): new_chapters = [] def append_chapter(c): - assert 'remove' not in c + assert 'remove' not in c, 'Cut is appended to chapters' length = c['end_time'] - c['start_time'] - excess_duration(c) # Chapter is completely covered by cuts or sponsors. if length <= 0: @@ -237,7 +239,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): heapq.heappush(chapters, (c['start_time'], i, c)) # (normal, sponsor) and (sponsor, sponsor) else: - assert '_categories' in c + assert '_categories' in c, 'Normal chapters overlap' cur_chapter['_was_cut'] = True c['_was_cut'] = True # Push the part after the sponsor to PQ. From 9fab498fbf38dca24ef215d4789b13dd24d7952d Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 19 Oct 2021 18:52:17 +0530 Subject: [PATCH 0095/2552] [http] Retry on socket timeout Closes #1222 --- yt_dlp/downloader/http.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 2e95bb9d1..6290884a8 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -191,11 +191,13 @@ class HttpFD(FileDownloader): # Unexpected HTTP error raise raise RetryDownload(err) + except socket.timeout as err: + raise RetryDownload(err) except socket.error as err: - if err.errno != errno.ECONNRESET: + if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT): # Connection reset is no problem, just retry - raise - raise RetryDownload(err) + raise RetryDownload(err) + raise def download(): nonlocal throttle_start From aa7785f860be0bae7135ee32fe0ef4f0ab00bbc1 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 19 Oct 2021 22:58:14 +0530 Subject: [PATCH 0096/2552] [utils] Standardize timestamp formatting code Closes #1285 --- test/test_utils.py | 8 ++++---- yt_dlp/downloader/common.py | 13 ++++++------- yt_dlp/extractor/adn.py | 9 +++------ yt_dlp/utils.py | 30 +++++++++++++++++++++++------- yt_dlp/webvtt.py | 8 ++------ 5 files changed, 38 insertions(+), 30 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9a5e3f0f0..d84c3d3ee 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1390,21 +1390,21 @@ The first line </body> </tt>'''.encode('utf-8') srt_data = '''1 -00:00:02,080 --> 00:00:05,839 +00:00:02,080 --> 00:00:05,840 <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> 2 -00:00:02,080 --> 00:00:05,839 +00:00:02,080 --> 00:00:05,840 <b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1 </font>part 2</font></b> 3 -00:00:05,839 --> 00:00:09,560 +00:00:05,840 --> 00:00:09,560 <u><font color="lime">line 3 part 3</font></u> 4 -00:00:09,560 --> 00:00:12,359 +00:00:09,560 --> 00:00:12,360 <i><u><font color="yellow"><font color="lime">inner </font>style</font></u></i> diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 9081794db..6cfbb6657 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -12,6 +12,7 @@ from ..utils import ( format_bytes, shell_quote, timeconvert, + timetuple_from_msec, ) from ..minicurses import ( MultilineLogger, @@ -75,14 +76,12 @@ class FileDownloader(object): @staticmethod def format_seconds(seconds): - (mins, secs) = divmod(seconds, 60) - (hours, mins) = divmod(mins, 60) - if hours > 99: + time = timetuple_from_msec(seconds * 1000) + if time.hours > 99: return '--:--:--' - if hours == 0: - return '%02d:%02d' % (mins, secs) - else: - return '%02d:%02d:%02d' % (hours, mins, secs) + if not time.hours: + return '%02d:%02d' % time[1:-1] + return '%02d:%02d:%02d' % time[:-1] @staticmethod def calc_percent(byte_counter, data_len): diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index a55ebbcbd..5a1283baa 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -15,6 +15,7 @@ from ..compat import ( compat_ord, ) from ..utils import ( + ass_subtitles_timecode, bytes_to_intlist, bytes_to_long, ExtractorError, @@ -68,10 +69,6 @@ class ADNIE(InfoExtractor): 'end': 4, } - @staticmethod - def _ass_subtitles_timecode(seconds): - return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100) - def _get_subtitles(self, sub_url, video_id): if not sub_url: return None @@ -117,8 +114,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' continue alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( - self._ass_subtitles_timecode(start), - self._ass_subtitles_timecode(end), + ass_subtitles_timecode(start), + ass_subtitles_timecode(end), '{\\a%d}' % alignment if alignment != 2 else '', text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}')) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 28431ac73..b88257bc2 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2342,14 +2342,25 @@ def decodeOption(optval): return optval +_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds')) + + +def timetuple_from_msec(msec): + secs, msec = divmod(msec, 1000) + mins, secs = divmod(secs, 60) + hrs, mins = divmod(mins, 60) + return _timetuple(hrs, mins, secs, msec) + + def formatSeconds(secs, delim=':', msec=False): - if secs > 3600: - ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60) - elif secs > 60: - ret = '%d%s%02d' % (secs // 60, delim, secs % 60) + time = timetuple_from_msec(secs * 1000) + if time.hours: + ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds) + elif time.minutes: + ret = '%d%s%02d' % (time.minutes, delim, time.seconds) else: - ret = '%d' % secs - return '%s.%03d' % (ret, secs % 1) if msec else ret + ret = '%d' % time.seconds + return '%s.%03d' % (ret, time.milliseconds) if msec else ret def _ssl_load_windows_store_certs(ssl_context, storename): @@ -4855,7 +4866,12 @@ def parse_dfxp_time_expr(time_expr): def srt_subtitles_timecode(seconds): - return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) + return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000) + + +def ass_subtitles_timecode(seconds): + time = timetuple_from_msec(seconds * 1000) + return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10) def dfxp2srt(dfxp_data): diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index cd936e7e5..962aa57ad 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -13,7 +13,7 @@ in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>. import re import io -from .utils import int_or_none +from .utils import int_or_none, timetuple_from_msec from .compat import ( compat_str as str, compat_Pattern, @@ -124,11 +124,7 @@ def _format_ts(ts): Convert an MPEG PES timestamp into a WebVTT timestamp. This will lose sub-millisecond precision. """ - msec = int((ts + 45) // 90) - secs, msec = divmod(msec, 1000) - mins, secs = divmod(secs, 60) - hrs, mins = divmod(mins, 60) - return '%02u:%02u:%02u.%03u' % (hrs, mins, secs, msec) + return '%02u:%02u:%02u.%03u' % timetuple_from_msec(int((ts + 45) // 90)) class Block(object): From 2cda6b401d4b9af36a2db71c71e1872ab7e4a6b6 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 19 Oct 2021 22:13:45 +0530 Subject: [PATCH 0097/2552] Revert "[fragments] Pad fragments before decrypting (#1298)" This reverts commit 373475f03553a7fff2d20df878755bfad2fab8e5. --- yt_dlp/downloader/fragment.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index d0eaede7e..6a490131b 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -355,8 +355,7 @@ class FragmentFD(FileDownloader): # not what it decrypts to. if self.params.get('test', False): return frag_content - padding_len = 16 - (len(frag_content) % 16) - decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv) + decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv) return decrypted_data[:-decrypted_data[-1]] return decrypt_fragment From b4b855ebc7fac536a85f087f6921df69dec4e470 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 19 Oct 2021 21:51:33 +0530 Subject: [PATCH 0098/2552] [fragment] Print error message when skipping fragment --- yt_dlp/downloader/external.py | 6 +++--- yt_dlp/downloader/fragment.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 40b9dcfe3..e30efb057 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -152,11 +152,11 @@ class ExternalFD(FragmentFD): fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) try: src, _ = sanitize_open(fragment_filename, 'rb') - except IOError: + except IOError as err: if skip_unavailable_fragments and frag_index > 1: - self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) + self.report_skip_fragment(frag_index, err) continue - self.report_error('Unable to open fragment %d' % frag_index) + self.report_error(f'Unable to open fragment {frag_index}; {err}') return -1 dest.write(decrypt_fragment(fragment, src.read())) src.close() diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 6a490131b..c345f3148 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -72,8 +72,9 @@ class FragmentFD(FileDownloader): '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...' % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) - def report_skip_fragment(self, frag_index): - self.to_screen('[download] Skipping fragment %d ...' % frag_index) + def report_skip_fragment(self, frag_index, err=None): + err = f' {err};' if err else '' + self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...') def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') @@ -443,7 +444,7 @@ class FragmentFD(FileDownloader): def append_fragment(frag_content, frag_index, ctx): if not frag_content: if not is_fatal(frag_index - 1): - self.report_skip_fragment(frag_index) + self.report_skip_fragment(frag_index, 'fragment not found') return True else: ctx['dest_stream'].close() From d3c93ec2b7f5bcb872b0afb169efaa2f1abdf6e2 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 20 Oct 2021 21:49:40 +0530 Subject: [PATCH 0099/2552] Don't create console for subprocesses on Windows (#1261) Closes #1251 --- yt_dlp/YoutubeDL.py | 13 +++++------- yt_dlp/cookies.py | 16 +++++++-------- yt_dlp/downloader/external.py | 20 +++++++++--------- yt_dlp/downloader/rtmp.py | 3 ++- yt_dlp/extractor/openload.py | 11 +++++----- yt_dlp/postprocessor/embedthumbnail.py | 6 +++--- yt_dlp/postprocessor/ffmpeg.py | 14 ++++++------- yt_dlp/postprocessor/sponskrub.py | 6 +++--- yt_dlp/update.py | 4 ++-- yt_dlp/utils.py | 28 ++++++++++++++++++-------- 10 files changed, 63 insertions(+), 58 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5d8e0bded..79f0b274d 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -87,10 +87,10 @@ from .utils import ( parse_filesize, PerRequestProxyHandler, platform_name, + Popen, PostProcessingError, preferredencoding, prepend_extension, - process_communicate_or_kill, register_socks_protocols, RejectedVideoReached, render_table, @@ -578,12 +578,9 @@ class YoutubeDL(object): stdout=slave, stderr=self._err_file) try: - self._output_process = subprocess.Popen( - ['bidiv'] + width_args, **sp_kwargs - ) + self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) except OSError: - self._output_process = subprocess.Popen( - ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: @@ -3280,11 +3277,11 @@ class YoutubeDL(object): if self.params.get('compat_opts'): write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) try: - sp = subprocess.Popen( + sp = Popen( ['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.path.dirname(os.path.abspath(__file__))) - out, err = process_communicate_or_kill(sp) + out, err = sp.communicate_or_kill() out = out.decode().strip() if re.match('[0-9a-f]+', out): write_debug('Git HEAD: %s\n' % out) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 049ec9fb1..5f7fdf584 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -17,7 +17,7 @@ from .compat import ( from .utils import ( bug_reports_message, expand_path, - process_communicate_or_kill, + Popen, YoutubeDLCookieJar, ) @@ -599,14 +599,14 @@ def _get_mac_keyring_password(browser_keyring_name, logger): return password.encode('utf-8') else: logger.debug('using find-generic-password to obtain password') - proc = subprocess.Popen(['security', 'find-generic-password', - '-w', # write password to stdout - '-a', browser_keyring_name, # match 'account' - '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service' - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL) + proc = Popen( + ['security', 'find-generic-password', + '-w', # write password to stdout + '-a', browser_keyring_name, # match 'account' + '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service' + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) try: - stdout, stderr = process_communicate_or_kill(proc) + stdout, stderr = proc.communicate_or_kill() if stdout[-1:] == b'\n': stdout = stdout[:-1] return stdout diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index e30efb057..ce3370fb7 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -22,7 +22,7 @@ from ..utils import ( handle_youtubedl_headers, check_executable, is_outdated_version, - process_communicate_or_kill, + Popen, sanitize_open, ) @@ -116,9 +116,8 @@ class ExternalFD(FragmentFD): self._debug_cmd(cmd) if 'fragments' not in info_dict: - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate_or_kill() if p.returncode != 0: self.to_stderr(stderr.decode('utf-8', 'replace')) return p.returncode @@ -128,9 +127,8 @@ class ExternalFD(FragmentFD): count = 0 while count <= fragment_retries: - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate_or_kill() if p.returncode == 0: break # TODO: Decide whether to retry based on error code @@ -199,8 +197,8 @@ class CurlFD(ExternalFD): self._debug_cmd(cmd) # curl writes the progress to stderr so don't capture it. - p = subprocess.Popen(cmd) - process_communicate_or_kill(p) + p = Popen(cmd) + p.communicate_or_kill() return p.returncode @@ -476,7 +474,7 @@ class FFmpegFD(ExternalFD): args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) self._debug_cmd(args) - proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) + proc = Popen(args, stdin=subprocess.PIPE, env=env) if url in ('-', 'pipe:'): self.on_process_started(proc, proc.stdin) try: @@ -488,7 +486,7 @@ class FFmpegFD(ExternalFD): # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): - process_communicate_or_kill(proc, b'q') + proc.communicate_or_kill(b'q') else: proc.kill() proc.wait() diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index 6dca64725..90f1acfd4 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -12,6 +12,7 @@ from ..utils import ( encodeFilename, encodeArgument, get_exe_version, + Popen, ) @@ -26,7 +27,7 @@ class RtmpFD(FileDownloader): start = time.time() resume_percent = None resume_downloaded_data_len = None - proc = subprocess.Popen(args, stderr=subprocess.PIPE) + proc = Popen(args, stderr=subprocess.PIPE) cursor_in_new_line = True proc_stderr_closed = False try: diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index dfdd0e526..6ec54509b 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -17,7 +17,7 @@ from ..utils import ( get_exe_version, is_outdated_version, std_headers, - process_communicate_or_kill, + Popen, ) @@ -223,11 +223,10 @@ class PhantomJSwrapper(object): else: self.extractor.to_screen('%s: %s' % (video_id, note2)) - p = subprocess.Popen([ - self.exe, '--ssl-protocol=any', - self._TMP_FILES['script'].name - ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = process_communicate_or_kill(p) + p = Popen( + [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate_or_kill() if p.returncode != 0: raise ExtractorError( 'Executing JS failed\n:' + encodeArgument(err)) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 3139a6338..918d3e788 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -26,9 +26,9 @@ from ..utils import ( encodeArgument, encodeFilename, error_to_compat_str, + Popen, PostProcessingError, prepend_extension, - process_communicate_or_kill, shell_quote, ) @@ -183,8 +183,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor): self._report_run('atomicparsley', filename) self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate_or_kill() if p.returncode != 0: msg = stderr.decode('utf-8', 'replace').strip() raise EmbedThumbnailPPError(msg) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index e5595341d..4a0a96427 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -20,9 +20,9 @@ from ..utils import ( is_outdated_version, ISO639Utils, orderedSet, + Popen, PostProcessingError, prepend_extension, - process_communicate_or_kill, replace_extension, shell_quote, traverse_obj, @@ -178,10 +178,8 @@ class FFmpegPostProcessor(PostProcessor): encodeArgument('-i')] cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd))) - handle = subprocess.Popen( - cmd, stderr=subprocess.PIPE, - stdout=subprocess.PIPE, stdin=subprocess.PIPE) - stdout_data, stderr_data = process_communicate_or_kill(handle) + handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout_data, stderr_data = handle.communicate_or_kill() expected_ret = 0 if self.probe_available else 1 if handle.wait() != expected_ret: return None @@ -223,7 +221,7 @@ class FFmpegPostProcessor(PostProcessor): cmd += opts cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) self.write_debug('ffprobe command line: %s' % shell_quote(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) stdout, stderr = p.communicate() return json.loads(stdout.decode('utf-8', 'replace')) @@ -284,8 +282,8 @@ class FFmpegPostProcessor(PostProcessor): for i, (path, opts) in enumerate(path_opts) if path) self.write_debug('ffmpeg command line: %s' % shell_quote(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - stdout, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + stdout, stderr = p.communicate_or_kill() if p.returncode not in variadic(expected_retcodes): stderr = stderr.decode('utf-8', 'replace').strip() self.write_debug(stderr) diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 932555a0e..37e7411e4 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -11,9 +11,9 @@ from ..utils import ( encodeFilename, shell_quote, str_or_none, + Popen, PostProcessingError, prepend_extension, - process_communicate_or_kill, ) @@ -81,8 +81,8 @@ class SponSkrubPP(PostProcessor): self.write_debug('sponskrub command line: %s' % shell_quote(cmd)) pipe = None if self.get_param('verbose') else subprocess.PIPE - p = subprocess.Popen(cmd, stdout=pipe) - stdout = process_communicate_or_kill(p)[0] + p = Popen(cmd, stdout=pipe) + stdout = p.communicate_or_kill()[0] if p.returncode == 0: os.replace(temp_filename, filename) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 26f18bdda..e4b1280be 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -10,7 +10,7 @@ import traceback from zipimport import zipimporter from .compat import compat_realpath -from .utils import encode_compat_str +from .utils import encode_compat_str, Popen from .version import __version__ @@ -191,7 +191,7 @@ def run_update(ydl): return try: # Continues to run in the background - subprocess.Popen( + Popen( 'ping 127.0.0.1 -n 5 -w 1000 & del /F "%s.old"' % exe, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) ydl.to_screen('Updated yt-dlp to version %s' % version_id) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index b88257bc2..319f6979b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2272,6 +2272,20 @@ def process_communicate_or_kill(p, *args, **kwargs): raise +class Popen(subprocess.Popen): + if sys.platform == 'win32': + _startupinfo = subprocess.STARTUPINFO() + _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + else: + _startupinfo = None + + def __init__(self, *args, **kwargs): + super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo) + + def communicate_or_kill(self, *args, **kwargs): + return process_communicate_or_kill(self, *args, **kwargs) + + def get_subprocess_encoding(): if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: # For subprocess calls, encode with locale encoding @@ -3977,8 +3991,7 @@ def check_executable(exe, args=[]): """ Checks if the given binary is installed somewhere in PATH, and returns its name. args can be a list of arguments for a short output (like -version) """ try: - process_communicate_or_kill(subprocess.Popen( - [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)) + Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill() except OSError: return False return exe @@ -3992,10 +4005,9 @@ def get_exe_version(exe, args=['--version'], # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656 - out, _ = process_communicate_or_kill(subprocess.Popen( - [encodeArgument(exe)] + args, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT)) + out, _ = Popen( + [encodeArgument(exe)] + args, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill() except OSError: return False if isinstance(out, bytes): # Python 2.x @@ -6155,11 +6167,11 @@ def write_xattr(path, key, value): + [encodeFilename(path, True)]) try: - p = subprocess.Popen( + p = Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) except EnvironmentError as e: raise XAttrMetadataError(e.errno, e.strerror) - stdout, stderr = process_communicate_or_kill(p) + stdout, stderr = p.communicate_or_kill() stderr = stderr.decode('utf-8', 'replace') if p.returncode != 0: raise XAttrMetadataError(p.returncode, stderr) From 27f817a84b8be5896caf7df2aeffbcc4904ecb75 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 21 Oct 2021 15:26:36 +0530 Subject: [PATCH 0100/2552] [docs] Migrate issues to use forms (#1302) Authored by: Ashish0804 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 73 ------------------ .github/ISSUE_TEMPLATE/1_broken_site.yml | 63 ++++++++++++++++ .../ISSUE_TEMPLATE/2_site_support_request.md | 60 --------------- .../ISSUE_TEMPLATE/2_site_support_request.yml | 74 +++++++++++++++++++ .../ISSUE_TEMPLATE/3_site_feature_request.md | 43 ----------- .../ISSUE_TEMPLATE/3_site_feature_request.yml | 37 ++++++++++ .github/ISSUE_TEMPLATE/4_bug_report.md | 74 ------------------- .github/ISSUE_TEMPLATE/4_bug_report.yml | 57 ++++++++++++++ .github/ISSUE_TEMPLATE/5_feature_request.md | 43 ----------- .github/ISSUE_TEMPLATE/5_feature_request.yml | 30 ++++++++ .github/ISSUE_TEMPLATE/6_question.md | 43 ----------- .github/ISSUE_TEMPLATE/6_question.yml | 30 ++++++++ .github/ISSUE_TEMPLATE/config.yml | 5 ++ .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md | 73 ------------------ .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml | 63 ++++++++++++++++ .../2_site_support_request.md | 60 --------------- .../2_site_support_request.yml | 74 +++++++++++++++++++ .../3_site_feature_request.md | 43 ----------- .../3_site_feature_request.yml | 37 ++++++++++ .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md | 74 ------------------- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml | 57 ++++++++++++++ .../ISSUE_TEMPLATE_tmpl/5_feature_request.md | 43 ----------- .../ISSUE_TEMPLATE_tmpl/5_feature_request.yml | 30 ++++++++ .github/ISSUE_TEMPLATE_tmpl/6_question.yml | 30 ++++++++ CONTRIBUTING.md | 12 +++ Makefile | 13 ++-- 26 files changed, 606 insertions(+), 635 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/1_broken_site.md create mode 100644 .github/ISSUE_TEMPLATE/1_broken_site.yml delete mode 100644 .github/ISSUE_TEMPLATE/2_site_support_request.md create mode 100644 .github/ISSUE_TEMPLATE/2_site_support_request.yml delete mode 100644 .github/ISSUE_TEMPLATE/3_site_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/3_site_feature_request.yml delete mode 100644 .github/ISSUE_TEMPLATE/4_bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/4_bug_report.yml delete mode 100644 .github/ISSUE_TEMPLATE/5_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/5_feature_request.yml delete mode 100644 .github/ISSUE_TEMPLATE/6_question.md create mode 100644 .github/ISSUE_TEMPLATE/6_question.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml create mode 100644 .github/ISSUE_TEMPLATE_tmpl/6_question.yml diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md deleted file mode 100644 index 157eca91b..000000000 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -name: Broken site support -about: Report broken or misfunctioning site -title: "[Broken] Website Name: A short description of the issue" -labels: ['triage', 'extractor-bug'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that all URLs and arguments with special characters are properly quoted or escaped. -- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar issues including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Verbose log - -<!-- -Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. -Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this: - [debug] System config: [] - [debug] User config: [] - [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc'] - [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] yt-dlp version 2021.10.10 - [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 - [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 - [debug] Proxy map: {} - <more lines> ---> - -``` -PASTE VERBOSE LOG HERE - -``` -<!-- -Do not remove the above ``` ---> - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Provide any additional information, suggested solution and as much context and examples as possible. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml new file mode 100644 index 000000000..2a492d132 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -0,0 +1,63 @@ +name: Broken site support +description: Report broken or misfunctioning site +labels: [triage, extractor-bug] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a broken site + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your issue in an arbitrary form. + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version 2021.10.10 (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (2021.10.10) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md deleted file mode 100644 index 122034472..000000000 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -name: Site support request -about: Request support for a new site -title: "[Site Request] Website Name" -labels: ['triage', 'site-request'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that site you are requesting is not dedicated to copyright infringement. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. -- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that none of provided URLs violate any copyrights -- [ ] The provided URLs do not contain any DRM to the best of my knowledge -- [ ] I've searched the bugtracker for similar site support requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Example URLs - -<!-- -Provide all kinds of example URLs support for which should be included. Replace following example URLs by yours. ---> - -- Single video: https://www.youtube.com/watch?v=BaW_jenozKc -- Single video: https://youtu.be/BaW_jenozKc -- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc - - -## Description - -<!-- -Provide any additional information. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml new file mode 100644 index 000000000..c0a22ac2b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -0,0 +1,74 @@ +name: Site support request +description: Request support for a new site +labels: [triage, site-request] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a new site support request + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Provide all kinds of example URLs, support for which should be included. Replace following example URLs by yours + value: | + - Single video: https://www.youtube.com/watch?v=BaW_jenozKc + - Single video: https://youtu.be/BaW_jenozKc + - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + validations: + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide any additional information + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output using one of the example URLs provided above. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version 2021.10.10 (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (2021.10.10) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md deleted file mode 100644 index 54536fce6..000000000 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Site feature request -about: Request a new functionality for a site -title: "[Site Feature] Website Name: A short description of the feature" -labels: ['triage', 'site-enhancement'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've searched the bugtracker for similar site feature requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Description - -<!-- -Provide an explanation of your site feature request in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml new file mode 100644 index 000000000..44c8a0816 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -0,0 +1,37 @@ +name: Site feature request +description: Request a new functionality for a site +labels: [triage, site-enhancement] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a site feature request + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your site feature request in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md deleted file mode 100644 index 6413e8b7e..000000000 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -name: Bug report -about: Report a bug unrelated to any particular site or extractor -title: '[Bug] A short description of the issue' -labels: ['triage', 'bug'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that all URLs and arguments with special characters are properly quoted or escaped. -- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] The provided URLs do not contain any DRM to the best of my knowledge -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Verbose log - -<!-- -Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. -Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this: - [debug] System config: [] - [debug] User config: [] - [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc'] - [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] yt-dlp version 2021.10.10 - [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 - [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 - [debug] Proxy map: {} - <more lines> ---> - -``` -PASTE VERBOSE LOG HERE - -``` -<!-- -Do not remove the above ``` ---> - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml new file mode 100644 index 000000000..1c609cab1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -0,0 +1,57 @@ +name: Bug report +description: Report a bug unrelated to any particular site or extractor +labels: [triage,bug] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a bug unrelated to a specific site + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your issue in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version 2021.10.10 (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (2021.10.10) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md deleted file mode 100644 index b04dbf981..000000000 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Feature request -about: Request a new functionality unrelated to any particular site or extractor -title: "[Feature Request] A short description of your feature" -labels: ['triage', 'enhancement'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've searched the bugtracker for similar feature requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml new file mode 100644 index 000000000..d839df95d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -0,0 +1,30 @@ +name: Feature request request +description: Request a new functionality unrelated to any particular site or extractor +labels: [triage, enhancement] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a feature request + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your site feature request in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md deleted file mode 100644 index 5ab17802a..000000000 --- a/.github/ISSUE_TEMPLATE/6_question.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Ask question -about: Ask yt-dlp related question -title: "[Question] A short description of your question" -labels: question -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- Look through the README (https://github.com/yt-dlp/yt-dlp) -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Search the bugtracker for similar questions: https://github.com/yt-dlp/yt-dlp/issues -- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm asking a question -- [ ] I've looked through the README -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I've searched the bugtracker for similar questions including closed ones -- [ ] I have given an appropriate title to the issue - - -## Question - -<!-- -Ask your question in an arbitrary form. Please make sure it's worded well enough to be understood, see https://github.com/yt-dlp/yt-dlp. ---> - -WRITE QUESTION HERE diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml new file mode 100644 index 000000000..c101c2286 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -0,0 +1,30 @@ +name: Ask question +description: Ask yt-dlp related question +labels: [question] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm asking a question and not reporting a bug/feature request + required: true + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp) + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues) for similar questions including closed ones + required: true + - type: textarea + id: question + attributes: + label: Question + description: | + Ask your question in an arbitrary form. + Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information and as much context and examples as possible + placeholder: WRITE QUESTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..61127d682 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: Get help from the community on Discord + url: https://discord.gg/H5MNcFW63r + about: Join the yt-dlp Discord for community-powered support! \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md deleted file mode 100644 index 9ee002296..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -name: Broken site support -about: Report broken or misfunctioning site -title: "[Broken] Website Name: A short description of the issue" -labels: ['triage', 'extractor-bug'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that all URLs and arguments with special characters are properly quoted or escaped. -- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar issues including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Verbose log - -<!-- -Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. -Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this: - [debug] System config: [] - [debug] User config: [] - [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc'] - [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] yt-dlp version %(version)s - [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 - [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 - [debug] Proxy map: {} - <more lines> ---> - -``` -PASTE VERBOSE LOG HERE - -``` -<!-- -Do not remove the above ``` ---> - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Provide any additional information, suggested solution and as much context and examples as possible. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml new file mode 100644 index 000000000..fdca0e53a --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml @@ -0,0 +1,63 @@ +name: Broken site support +description: Report broken or misfunctioning site +labels: [triage, extractor-bug] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a broken site + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your issue in an arbitrary form. + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version %(version)s (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (%(version)s) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md deleted file mode 100644 index e71abbab2..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -name: Site support request -about: Request support for a new site -title: "[Site Request] Website Name" -labels: ['triage', 'site-request'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that site you are requesting is not dedicated to copyright infringement. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. -- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that none of provided URLs violate any copyrights -- [ ] The provided URLs do not contain any DRM to the best of my knowledge -- [ ] I've searched the bugtracker for similar site support requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Example URLs - -<!-- -Provide all kinds of example URLs support for which should be included. Replace following example URLs by yours. ---> - -- Single video: https://www.youtube.com/watch?v=BaW_jenozKc -- Single video: https://youtu.be/BaW_jenozKc -- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc - - -## Description - -<!-- -Provide any additional information. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml new file mode 100644 index 000000000..be6427ce1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -0,0 +1,74 @@ +name: Site support request +description: Request support for a new site +labels: [triage, site-request] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a new site support request + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Provide all kinds of example URLs, support for which should be included. Replace following example URLs by yours + value: | + - Single video: https://www.youtube.com/watch?v=BaW_jenozKc + - Single video: https://youtu.be/BaW_jenozKc + - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + validations: + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide any additional information + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output using one of the example URLs provided above. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version %(version)s (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (%(version)s) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md deleted file mode 100644 index e0ccd5416..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Site feature request -about: Request a new functionality for a site -title: "[Site Feature] Website Name: A short description of the feature" -labels: ['triage', 'site-enhancement'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've searched the bugtracker for similar site feature requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Description - -<!-- -Provide an explanation of your site feature request in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml new file mode 100644 index 000000000..f19d958c6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml @@ -0,0 +1,37 @@ +name: Site feature request +description: Request a new functionality for a site +labels: [triage, site-enhancement] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a site feature request + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your site feature request in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md deleted file mode 100644 index 43e91b052..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -name: Bug report -about: Report a bug unrelated to any particular site or extractor -title: '[Bug] A short description of the issue' -labels: ['triage', 'bug'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that all URLs and arguments with special characters are properly quoted or escaped. -- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] The provided URLs do not contain any DRM to the best of my knowledge -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Verbose log - -<!-- -Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. -Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this: - [debug] System config: [] - [debug] User config: [] - [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc'] - [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] yt-dlp version %(version)s - [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 - [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 - [debug] Proxy map: {} - <more lines> ---> - -``` -PASTE VERBOSE LOG HERE - -``` -<!-- -Do not remove the above ``` ---> - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml new file mode 100644 index 000000000..e4d669bb7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml @@ -0,0 +1,57 @@ +name: Bug report +description: Report a bug unrelated to any particular site or extractor +labels: [triage,bug] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a bug unrelated to a specific site + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your issue in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version %(version)s (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (%(version)s) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md deleted file mode 100644 index 075e0b1b3..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Feature request -about: Request a new functionality unrelated to any particular site or extractor -title: "[Feature Request] A short description of your feature" -labels: ['triage', 'enhancement'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've searched the bugtracker for similar feature requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml new file mode 100644 index 000000000..27e2e773b --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml @@ -0,0 +1,30 @@ +name: Feature request request +description: Request a new functionality unrelated to any particular site or extractor +labels: [triage, enhancement] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a feature request + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your site feature request in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml new file mode 100644 index 000000000..c101c2286 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml @@ -0,0 +1,30 @@ +name: Ask question +description: Ask yt-dlp related question +labels: [question] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm asking a question and not reporting a bug/feature request + required: true + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp) + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues) for similar questions including closed ones + required: true + - type: textarea + id: question + attributes: + label: Question + description: | + Ask your question in an arbitrary form. + Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information and as much context and examples as possible + placeholder: WRITE QUESTION HERE + validations: + required: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7aaf6a52b..fb539ec0d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -109,6 +109,18 @@ Some bug reports are completely unrelated to yt-dlp and relate to a different, o If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-dlp, the issue should be raised in the youtube-dl project. +### Are you willing to share account details if needed? + +The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discression whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it. + +By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials. + +While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. + +- Look for people with `Member` or `Contributor` tag on their messages. +- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). +- Change the password after receiving the account back. + diff --git a/Makefile b/Makefile index 9ce975ea2..e7b854a9d 100644 --- a/Makefile +++ b/Makefile @@ -78,12 +78,13 @@ README.md: yt_dlp/*.py yt_dlp/*/*.py CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md -issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md yt_dlp/version.py - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE/1_broken_site.md - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE/2_site_support_request.md - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE/4_bug_report.md - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md .github/ISSUE_TEMPLATE/5_feature_request.md +issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml yt_dlp/version.py + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE/1_broken_site.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE/2_site_support_request.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE/3_site_feature_request.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE/4_bug_report.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml .github/ISSUE_TEMPLATE/5_feature_request.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/6_question.yml .github/ISSUE_TEMPLATE/6_question.yml supportedsites: $(PYTHON) devscripts/make_supportedsites.py supportedsites.md From 0e5927eebfcd02a4815fcb29319a1dd3f05fd1b3 Mon Sep 17 00:00:00 2001 From: Ricardo <10128951+smplayer-dev@users.noreply.github.com> Date: Thu, 21 Oct 2021 12:48:46 +0200 Subject: [PATCH 0101/2552] [build] Build standalone MacOS packages (#1221) Closes #1075 Authored by: smplayer-dev --- .github/workflows/build.yml | 108 ++++++++++++++++++++++++++++++------ README.md | 15 +++++ pyinst.py | 89 ++++++++++++++++------------- yt_dlp/update.py | 34 +++++++----- 4 files changed, 175 insertions(+), 71 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5717ce8ee..296380596 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -133,6 +133,70 @@ jobs: asset_name: yt-dlp.tar.gz asset_content_type: application/gzip + build_macos: + runs-on: macos-11 + needs: build_unix + + outputs: + sha256_macos: ${{ steps.sha256_macos.outputs.sha256_macos }} + sha512_macos: ${{ steps.sha512_macos.outputs.sha512_macos }} + sha256_macos_zip: ${{ steps.sha256_macos_zip.outputs.sha256_macos_zip }} + sha512_macos_zip: ${{ steps.sha512_macos_zip.outputs.sha512_macos_zip }} + + steps: + - uses: actions/checkout@v2 + # In order to create a universal2 application, the version of python3 in /usr/bin has to be used + - name: Install Requirements + run: | + brew install coreutils + /usr/bin/pip3 install --user Pyinstaller mutagen pycryptodomex websockets + - name: Bump version + id: bump_version + run: python devscripts/update-version.py + - name: Print version + run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + - name: Run PyInstaller Script + run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onefile + - name: Upload yt-dlp MacOS binary + id: upload-release-macos + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./dist/yt-dlp_macos + asset_name: yt-dlp_macos + asset_content_type: application/octet-stream + - name: Get SHA2-256SUMS for yt-dlp_macos + id: sha256_macos + run: echo "::set-output name=sha256_macos::$(sha256sum dist/yt-dlp_macos | awk '{print $1}')" + - name: Get SHA2-512SUMS for yt-dlp_macos + id: sha512_macos + run: echo "::set-output name=sha512_macos::$(sha512sum dist/yt-dlp_macos | awk '{print $1}')" + + - name: Run PyInstaller Script with --onedir + run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onedir + - uses: papeloto/action-zip@v1 + with: + files: ./dist/yt-dlp_macos + dest: ./dist/yt-dlp_macos.zip + - name: Upload yt-dlp MacOS onedir + id: upload-release-macos-zip + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./dist/yt-dlp_macos.zip + asset_name: yt-dlp_macos.zip + asset_content_type: application/zip + - name: Get SHA2-256SUMS for yt-dlp_macos.zip + id: sha256_macos_zip + run: echo "::set-output name=sha256_macos_zip::$(sha256sum dist/yt-dlp_macos.zip | awk '{print $1}')" + - name: Get SHA2-512SUMS for yt-dlp_macos + id: sha512_macos_zip + run: echo "::set-output name=sha512_macos_zip::$(sha512sum dist/yt-dlp_macos.zip | awk '{print $1}')" + build_windows: runs-on: windows-latest needs: build_unix @@ -150,11 +214,11 @@ jobs: uses: actions/setup-python@v2 with: python-version: '3.8' - - name: Upgrade pip and enable wheel support - run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds - run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets + run: | + python -m pip install --upgrade pip setuptools wheel + pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version run: python devscripts/update-version.py @@ -183,27 +247,27 @@ jobs: - uses: papeloto/action-zip@v1 with: files: ./dist/yt-dlp - dest: ./dist/yt-dlp.zip - - name: Upload yt-dlp.zip Windows onedir + dest: ./dist/yt-dlp_win.zip + - name: Upload yt-dlp Windows onedir id: upload-release-windows-zip uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ needs.build_unix.outputs.upload_url }} - asset_path: ./dist/yt-dlp.zip - asset_name: yt-dlp.zip + asset_path: ./dist/yt-dlp_win.zip + asset_name: yt-dlp_win.zip asset_content_type: application/zip - - name: Get SHA2-256SUMS for yt-dlp.zip + - name: Get SHA2-256SUMS for yt-dlp_win.zip id: sha256_win_zip - run: echo "::set-output name=sha256_win_zip::$((Get-FileHash dist\yt-dlp.zip -Algorithm SHA256).Hash.ToLower())" - - name: Get SHA2-512SUMS for yt-dlp.zip + run: echo "::set-output name=sha256_win_zip::$((Get-FileHash dist\yt-dlp_win.zip -Algorithm SHA256).Hash.ToLower())" + - name: Get SHA2-512SUMS for yt-dlp_win.zip id: sha512_win_zip - run: echo "::set-output name=sha512_win_zip::$((Get-FileHash dist\yt-dlp.zip -Algorithm SHA512).Hash.ToLower())" + run: echo "::set-output name=sha512_win_zip::$((Get-FileHash dist\yt-dlp_win.zip -Algorithm SHA512).Hash.ToLower())" build_windows32: runs-on: windows-latest - needs: [build_unix, build_windows] + needs: build_unix outputs: sha256_win32: ${{ steps.sha256_win32.outputs.sha256_win32 }} @@ -217,10 +281,10 @@ jobs: with: python-version: '3.7' architecture: 'x86' - - name: Upgrade pip and enable wheel support - run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements - run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets + run: | + python -m pip install --upgrade pip setuptools wheel + pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version run: python devscripts/update-version.py @@ -247,7 +311,7 @@ jobs: finish: runs-on: ubuntu-latest - needs: [build_unix, build_windows, build_windows32] + needs: [build_unix, build_windows, build_windows32, build_macos] steps: - name: Make SHA2-256SUMS file @@ -255,14 +319,18 @@ jobs: SHA256_WIN: ${{ needs.build_windows.outputs.sha256_win }} SHA256_WIN_ZIP: ${{ needs.build_windows.outputs.sha256_win_zip }} SHA256_WIN32: ${{ needs.build_windows32.outputs.sha256_win32 }} + SHA256_MACOS: ${{ needs.build_macos.outputs.sha256_macos }} + SHA256_MACOS_ZIP: ${{ needs.build_macos.outputs.sha256_macos_zip }} SHA256_BIN: ${{ needs.build_unix.outputs.sha256_bin }} SHA256_TAR: ${{ needs.build_unix.outputs.sha256_tar }} run: | echo "${{ env.SHA256_WIN }} yt-dlp.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS + echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS + echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS echo "${{ env.SHA256_BIN }} yt-dlp" >> SHA2-256SUMS echo "${{ env.SHA256_TAR }} yt-dlp.tar.gz" >> SHA2-256SUMS - echo "${{ env.SHA256_WIN_ZIP }} yt-dlp.zip" >> SHA2-256SUMS + echo "${{ env.SHA256_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-256SUMS - name: Upload 256SUMS file id: upload-sums uses: actions/upload-release-asset@v1 @@ -278,14 +346,18 @@ jobs: SHA512_WIN: ${{ needs.build_windows.outputs.sha512_win }} SHA512_WIN_ZIP: ${{ needs.build_windows.outputs.sha512_win_zip }} SHA512_WIN32: ${{ needs.build_windows32.outputs.sha512_win32 }} + SHA512_MACOS: ${{ needs.build_macos.outputs.sha512_macos }} + SHA512_MACOS_ZIP: ${{ needs.build_macos.outputs.sha512_macos_zip }} SHA512_BIN: ${{ needs.build_unix.outputs.sha512_bin }} SHA512_TAR: ${{ needs.build_unix.outputs.sha512_tar }} run: | echo "${{ env.SHA512_WIN }} yt-dlp.exe" >> SHA2-512SUMS echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS + echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS + echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS echo "${{ env.SHA512_BIN }} yt-dlp" >> SHA2-512SUMS echo "${{ env.SHA512_TAR }} yt-dlp.tar.gz" >> SHA2-512SUMS - echo "${{ env.SHA512_WIN_ZIP }} yt-dlp.zip" >> SHA2-512SUMS + echo "${{ env.SHA512_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-512SUMS - name: Upload 512SUMS file id: upload-512sums uses: actions/upload-release-asset@v1 diff --git a/README.md b/README.md index d410d04d1..edd7d298a 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Differences in default behavior](#differences-in-default-behavior) * [INSTALLATION](#installation) * [Update](#update) + * [Release Files](#release-files) * [Dependencies](#dependencies) * [Compile](#compile) * [USAGE AND OPTIONS](#usage-and-options) @@ -190,6 +191,20 @@ You can use `yt-dlp -U` to update if you are using the provided release. If you are using `pip`, simply re-run the same command that was used to install the program. If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` +### RELEASE FILES + +File|Description +:---|:--- +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for UNIX like OSes) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows standalone x64 binary (Recommended for Windows) +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary +[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable +[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable +[yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc +[SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums +[SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums + ### DEPENDENCIES Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. diff --git a/pyinst.py b/pyinst.py index ed410e0f2..5aa83f9da 100644 --- a/pyinst.py +++ b/pyinst.py @@ -6,16 +6,24 @@ import sys import platform from PyInstaller.utils.hooks import collect_submodules -from PyInstaller.utils.win32.versioninfo import ( - VarStruct, VarFileInfo, StringStruct, StringTable, - StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion, -) + +if platform.system() == 'Windows': + from PyInstaller.utils.win32.versioninfo import ( + VarStruct, VarFileInfo, StringStruct, StringTable, + StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion, + ) import PyInstaller.__main__ +suffix = '' arch = platform.architecture()[0][:2] assert arch in ('32', '64') _x86 = '_x86' if arch == '32' else '' +if platform.system() == 'Windows': + suffix = _x86 +if platform.system() == 'Darwin': + suffix = '_macos' + # Compatability with older arguments opts = sys.argv[1:] if opts[0:1] in (['32'], ['64']): @@ -37,39 +45,40 @@ VERSION_LIST = list(map(int, VERSION_LIST)) + [0] * (4 - len(VERSION_LIST)) print('Version: %s%s' % (VERSION, _x86)) print('Remember to update the version using devscipts\\update-version.py') -VERSION_FILE = VSVersionInfo( - ffi=FixedFileInfo( - filevers=VERSION_LIST, - prodvers=VERSION_LIST, - mask=0x3F, - flags=0x0, - OS=0x4, - fileType=0x1, - subtype=0x0, - date=(0, 0), - ), - kids=[ - StringFileInfo([ - StringTable( - '040904B0', [ - StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % _x86), - StringStruct('CompanyName', 'https://github.com/yt-dlp'), - StringStruct('FileDescription', FILE_DESCRIPTION), - StringStruct('FileVersion', VERSION), - StringStruct('InternalName', 'yt-dlp%s' % _x86), - StringStruct( - 'LegalCopyright', - 'pukkandan.ytdlp@gmail.com | UNLICENSE', - ), - StringStruct('OriginalFilename', 'yt-dlp%s.exe' % _x86), - StringStruct('ProductName', 'yt-dlp%s' % _x86), - StringStruct( - 'ProductVersion', - '%s%s on Python %s' % (VERSION, _x86, platform.python_version())), - ])]), - VarFileInfo([VarStruct('Translation', [0, 1200])]) - ] -) +if platform.system() == 'Windows': + VERSION_FILE = VSVersionInfo( + ffi=FixedFileInfo( + filevers=VERSION_LIST, + prodvers=VERSION_LIST, + mask=0x3F, + flags=0x0, + OS=0x4, + fileType=0x1, + subtype=0x0, + date=(0, 0), + ), + kids=[ + StringFileInfo([ + StringTable( + '040904B0', [ + StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % _x86), + StringStruct('CompanyName', 'https://github.com/yt-dlp'), + StringStruct('FileDescription', FILE_DESCRIPTION), + StringStruct('FileVersion', VERSION), + StringStruct('InternalName', 'yt-dlp%s' % _x86), + StringStruct( + 'LegalCopyright', + 'pukkandan.ytdlp@gmail.com | UNLICENSE', + ), + StringStruct('OriginalFilename', 'yt-dlp%s.exe' % _x86), + StringStruct('ProductName', 'yt-dlp%s' % _x86), + StringStruct( + 'ProductVersion', + '%s%s on Python %s' % (VERSION, _x86, platform.python_version())), + ])]), + VarFileInfo([VarStruct('Translation', [0, 1200])]) + ] + ) def pycryptodome_module(): @@ -90,7 +99,7 @@ dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websocke excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] PyInstaller.__main__.run([ - '--name=yt-dlp%s' % _x86, + '--name=yt-dlp%s' % suffix, '--icon=devscripts/logo.ico', *[f'--exclude-module={module}' for module in excluded_modules], *[f'--hidden-import={module}' for module in dependancies], @@ -99,4 +108,6 @@ PyInstaller.__main__.run([ *opts, 'yt_dlp/__main__.py', ]) -SetVersion('dist/%syt-dlp%s.exe' % ('yt-dlp/' if '--onedir' in opts else '', _x86), VERSION_FILE) + +if platform.system() == 'Windows': + SetVersion('dist/%syt-dlp%s.exe' % ('yt-dlp/' if '--onedir' in opts else '', _x86), VERSION_FILE) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index e4b1280be..127b2cbc8 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -33,10 +33,11 @@ def rsa_verify(message, signature, key): def detect_variant(): if hasattr(sys, 'frozen'): + prefix = 'mac' if sys.platform == 'darwin' else 'win' if getattr(sys, '_MEIPASS', None): if sys._MEIPASS == os.path.dirname(sys.executable): - return 'dir' - return 'exe' + return f'{prefix}_dir' + return f'{prefix}_exe' return 'py2exe' elif isinstance(globals().get('__loader__'), zipimporter): return 'zip' @@ -46,9 +47,11 @@ def detect_variant(): _NON_UPDATEABLE_REASONS = { - 'exe': None, + 'win_exe': None, 'zip': None, - 'dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release', + 'mac_exe': None, + 'win_dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release', + 'mac_dir': 'Auto-update is not supported for unpackaged MacOS executable; Re-download the latest release', 'py2exe': 'There is no official release for py2exe executable; Build it again with the latest source code', 'source': 'You cannot update when running from source code; Use git to pull the latest changes', 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball; Use that to update', @@ -119,6 +122,7 @@ def run_update(ydl): 'zip_3': '', 'exe_64': '.exe', 'exe_32': '_x86.exe', + 'mac_64': '_macos', } def get_bin_info(bin_or_exe, version): @@ -139,7 +143,8 @@ def run_update(ydl): return report_permission_error(filename) # PyInstaller - if hasattr(sys, 'frozen'): + variant = detect_variant() + if variant == 'win_exe': exe = filename directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): @@ -161,13 +166,11 @@ def run_update(ydl): except (IOError, OSError): return report_network_error('download latest version') - if not os.access(exe + '.new', os.W_OK): - return report_permission_error(f'{exe}.new') try: with open(exe + '.new', 'wb') as outf: outf.write(newcontent) except (IOError, OSError): - return report_unable('write the new version') + return report_permission_error(f'{exe}.new') expected_sum = get_sha256sum('exe', arch) if not expected_sum: @@ -199,10 +202,10 @@ def run_update(ydl): except OSError: report_unable('delete the old version') - # Zip unix package - elif isinstance(globals().get('__loader__'), zipimporter): + elif variant in ('zip', 'mac_exe'): + pack_type = ('mac', '64') if variant == 'mac_exe' else ('zip', '3') try: - url = get_bin_info('zip', '3').get('browser_download_url') + url = get_bin_info(*pack_type).get('browser_download_url') if not url: return report_network_error('fetch updates') urlh = ydl._opener.open(url) @@ -211,11 +214,11 @@ def run_update(ydl): except (IOError, OSError): return report_network_error('download the latest version') - expected_sum = get_sha256sum('zip', '3') + expected_sum = get_sha256sum(*pack_type) if not expected_sum: ydl.report_warning('no hash information found for the release') elif hashlib.sha256(newcontent).hexdigest() != expected_sum: - return report_network_error('verify the new zip') + return report_network_error('verify the new package') try: with open(filename, 'wb') as outf: @@ -223,7 +226,10 @@ def run_update(ydl): except (IOError, OSError): return report_unable('overwrite current version') - ydl.to_screen('Updated yt-dlp to version %s; Restart yt-dlp to use the new version' % version_id) + ydl.to_screen('Updated yt-dlp to version %s; Restart yt-dlp to use the new version' % version_id) + return + + assert False, f'Unhandled variant: {variant}' ''' # UNUSED From 6e21fdd27902efa6ad7fb12b570e4b2dd0bfde8d Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 21 Oct 2021 18:24:05 +0530 Subject: [PATCH 0102/2552] [build] Enable lazy-extractors in releases Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable lazy extractor loading --- .github/workflows/build.yml | 13 +++++++++++++ Makefile | 4 ++-- yt_dlp/YoutubeDL.py | 7 +++++-- yt_dlp/extractor/__init__.py | 21 +++++++++++---------- 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 296380596..9bcdc4f94 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,6 +51,10 @@ jobs: echo "changelog<<EOF" >> $GITHUB_ENV echo "$changelog" >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV + + - name: Build lazy extractors + id: lazy_extractors + run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run Make run: make all tar - name: Get SHA2-256SUMS for yt-dlp @@ -155,6 +159,9 @@ jobs: run: python devscripts/update-version.py - name: Print version run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + - name: Build lazy extractors + id: lazy_extractors + run: /usr/bin/python3 devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run PyInstaller Script run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onefile - name: Upload yt-dlp MacOS binary @@ -224,6 +231,9 @@ jobs: run: python devscripts/update-version.py - name: Print version run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + - name: Build lazy extractors + id: lazy_extractors + run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run PyInstaller Script run: python pyinst.py - name: Upload yt-dlp.exe Windows binary @@ -290,6 +300,9 @@ jobs: run: python devscripts/update-version.py - name: Print version run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + - name: Build lazy extractors + id: lazy_extractors + run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run PyInstaller Script for 32 Bit run: python pyinst.py - name: Upload Executable yt-dlp_x86.exe diff --git a/Makefile b/Makefile index e7b854a9d..ee199e448 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: yt-dlp doc pypi-files +all: lazy-extractors yt-dlp doc pypi-files clean: clean-test clean-dist clean-cache completions: completion-bash completion-fish completion-zsh doc: README.md CONTRIBUTING.md issuetemplates supportedsites @@ -40,7 +40,7 @@ SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then ech # set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) -install: yt-dlp yt-dlp.1 completions +install: lazy_extractors yt-dlp yt-dlp.1 completions install -Dm755 yt-dlp $(DESTDIR)$(BINDIR) install -Dm644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1 install -Dm644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 79f0b274d..f95bbea81 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3268,8 +3268,11 @@ class YoutubeDL(object): source = detect_variant() write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) - if _LAZY_LOADER: - write_debug('Lazy loading extractors enabled\n') + if not _LAZY_LOADER: + if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): + write_debug('Lazy loading extractors is forcibly disabled\n') + else: + write_debug('Lazy loading extractors is disabled\n') if plugin_extractors or plugin_postprocessors: write_debug('Plugins: %s\n' % [ '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py index 198c4ae17..b35484246 100644 --- a/yt_dlp/extractor/__init__.py +++ b/yt_dlp/extractor/__init__.py @@ -1,14 +1,15 @@ -from __future__ import unicode_literals +import os from ..utils import load_plugins -try: - from .lazy_extractors import * - from .lazy_extractors import _ALL_CLASSES - _LAZY_LOADER = True - _PLUGIN_CLASSES = {} -except ImportError: - _LAZY_LOADER = False +_LAZY_LOADER = False +if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): + try: + from .lazy_extractors import * + from .lazy_extractors import _ALL_CLASSES + _LAZY_LOADER = True + except ImportError: + pass if not _LAZY_LOADER: from .extractors import * @@ -19,8 +20,8 @@ if not _LAZY_LOADER: ] _ALL_CLASSES.append(GenericIE) - _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) - _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES +_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) +_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES def gen_extractor_classes(): From 386cdfdb5b9ff90c7e7b716e9db6ccdd776feb77 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 21 Oct 2021 18:26:56 +0530 Subject: [PATCH 0103/2552] [build] Release windows exe built with py2exe Closes: #855 Related: #661, #705, #890, #1024, #1160 --- .github/workflows/build.yml | 27 ++++++++++++++++++++++++++- README.md | 1 + setup.py | 2 +- yt_dlp/update.py | 22 +++++++++++----------- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9bcdc4f94..b2da4063b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -211,6 +211,8 @@ jobs: outputs: sha256_win: ${{ steps.sha256_win.outputs.sha256_win }} sha512_win: ${{ steps.sha512_win.outputs.sha512_win }} + sha256_py2exe: ${{ steps.sha256_py2exe.outputs.sha256_py2exe }} + sha512_py2exe: ${{ steps.sha512_py2exe.outputs.sha512_py2exe }} sha256_win_zip: ${{ steps.sha256_win_zip.outputs.sha256_win_zip }} sha512_win_zip: ${{ steps.sha512_win_zip.outputs.sha512_win_zip }} @@ -224,7 +226,7 @@ jobs: - name: Install Requirements # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | - python -m pip install --upgrade pip setuptools wheel + python -m pip install --upgrade pip setuptools wheel py2exe pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version @@ -275,6 +277,25 @@ jobs: id: sha512_win_zip run: echo "::set-output name=sha512_win_zip::$((Get-FileHash dist\yt-dlp_win.zip -Algorithm SHA512).Hash.ToLower())" + - name: Run py2exe Script + run: python setup.py py2exe + - name: Upload yt-dlp_min.exe Windows binary + id: upload-release-windows-py2exe + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./dist/yt-dlp.exe + asset_name: yt-dlp_min.exe + asset_content_type: application/vnd.microsoft.portable-executable + - name: Get SHA2-256SUMS for yt-dlp_min.exe + id: sha256_py2exe + run: echo "::set-output name=sha256_py2exe::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA256).Hash.ToLower())" + - name: Get SHA2-512SUMS for yt-dlp_min.exe + id: sha512_py2exe + run: echo "::set-output name=sha512_py2exe::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA512).Hash.ToLower())" + build_windows32: runs-on: windows-latest needs: build_unix @@ -330,6 +351,7 @@ jobs: - name: Make SHA2-256SUMS file env: SHA256_WIN: ${{ needs.build_windows.outputs.sha256_win }} + SHA256_PY2EXE: ${{ needs.build_windows.outputs.sha256_py2exe }} SHA256_WIN_ZIP: ${{ needs.build_windows.outputs.sha256_win_zip }} SHA256_WIN32: ${{ needs.build_windows32.outputs.sha256_win32 }} SHA256_MACOS: ${{ needs.build_macos.outputs.sha256_macos }} @@ -338,6 +360,7 @@ jobs: SHA256_TAR: ${{ needs.build_unix.outputs.sha256_tar }} run: | echo "${{ env.SHA256_WIN }} yt-dlp.exe" >> SHA2-256SUMS + echo "${{ env.SHA256_PY2EXE }} yt-dlp_min.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS @@ -357,6 +380,7 @@ jobs: - name: Make SHA2-512SUMS file env: SHA512_WIN: ${{ needs.build_windows.outputs.sha512_win }} + SHA512_PY2EXE: ${{ needs.build_windows.outputs.sha512_py2exe }} SHA512_WIN_ZIP: ${{ needs.build_windows.outputs.sha512_win_zip }} SHA512_WIN32: ${{ needs.build_windows32.outputs.sha512_win32 }} SHA512_MACOS: ${{ needs.build_macos.outputs.sha512_macos }} @@ -365,6 +389,7 @@ jobs: SHA512_TAR: ${{ needs.build_unix.outputs.sha512_tar }} run: | echo "${{ env.SHA512_WIN }} yt-dlp.exe" >> SHA2-512SUMS + echo "${{ env.SHA512_PY2EXE }} yt-dlp_min.exe" >> SHA2-512SUMS echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS diff --git a/README.md b/README.md index edd7d298a..25dd29002 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,7 @@ File|Description [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable [yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x64 binary built with `py2exe`. Does not contain `pycryptodomex`, needs VC++14 [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums diff --git a/setup.py b/setup.py index fbd2be0ae..e1c585be4 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets'] if sys.argv[1:2] == ['py2exe']: import py2exe warnings.warn( - 'Building with py2exe is not officially supported. ' + 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' 'The recommended way is to use "pyinst.py" to build using pyinstaller') params = { 'console': [{ diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 127b2cbc8..e880cbd8d 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -50,9 +50,9 @@ _NON_UPDATEABLE_REASONS = { 'win_exe': None, 'zip': None, 'mac_exe': None, + 'py2exe': None, 'win_dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release', 'mac_dir': 'Auto-update is not supported for unpackaged MacOS executable; Re-download the latest release', - 'py2exe': 'There is no official release for py2exe executable; Build it again with the latest source code', 'source': 'You cannot update when running from source code; Use git to pull the latest changes', 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball; Use that to update', } @@ -120,9 +120,10 @@ def run_update(ydl): version_labels = { 'zip_3': '', - 'exe_64': '.exe', - 'exe_32': '_x86.exe', - 'mac_64': '_macos', + 'win_exe_64': '.exe', + 'py2exe_64': '_min.exe', + 'win_exe_32': '_x86.exe', + 'mac_exe_64': '_macos', } def get_bin_info(bin_or_exe, version): @@ -144,9 +145,8 @@ def run_update(ydl): # PyInstaller variant = detect_variant() - if variant == 'win_exe': - exe = filename - directory = os.path.dirname(exe) + if variant in ('win_exe', 'py2exe'): + directory = os.path.dirname(filename) if not os.access(directory, os.W_OK): return report_permission_error(directory) try: @@ -157,7 +157,7 @@ def run_update(ydl): try: arch = platform.architecture()[0][:2] - url = get_bin_info('exe', arch).get('browser_download_url') + url = get_bin_info(variant, arch).get('browser_download_url') if not url: return report_network_error('fetch updates') urlh = ydl._opener.open(url) @@ -203,9 +203,9 @@ def run_update(ydl): report_unable('delete the old version') elif variant in ('zip', 'mac_exe'): - pack_type = ('mac', '64') if variant == 'mac_exe' else ('zip', '3') + pack_type = '3' if variant == 'zip' else '64' try: - url = get_bin_info(*pack_type).get('browser_download_url') + url = get_bin_info(variant, pack_type).get('browser_download_url') if not url: return report_network_error('fetch updates') urlh = ydl._opener.open(url) @@ -214,7 +214,7 @@ def run_update(ydl): except (IOError, OSError): return report_network_error('download the latest version') - expected_sum = get_sha256sum(*pack_type) + expected_sum = get_sha256sum(variant, pack_type) if not expected_sum: ydl.report_warning('no hash information found for the release') elif hashlib.sha256(newcontent).hexdigest() != expected_sum: From 733d8e8f9935534742408318274912704c5fae09 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 21 Oct 2021 18:27:58 +0530 Subject: [PATCH 0104/2552] [build] Refactor `pyinst.py` and misc cleanup Closes #1361 --- .github/workflows/build.yml | 45 +++++---- Makefile | 6 +- README.md | 38 +++++--- pyinst.py | 177 ++++++++++++++++++++---------------- yt_dlp/update.py | 18 ++-- 5 files changed, 156 insertions(+), 128 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b2da4063b..3082884aa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,7 +8,6 @@ on: jobs: build_unix: runs-on: ubuntu-latest - outputs: ytdlp_version: ${{ steps.bump_version.outputs.ytdlp_version }} upload_url: ${{ steps.create_release.outputs.upload_url }} @@ -69,6 +68,7 @@ jobs: - name: Get SHA2-512SUMS for yt-dlp.tar.gz id: sha512_tar run: echo "::set-output name=sha512_tar::$(sha512sum yt-dlp.tar.gz | awk '{print $1}')" + - name: Install dependencies for pypi env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} @@ -85,6 +85,7 @@ jobs: rm -rf dist/* python setup.py sdist bdist_wheel twine upload dist/* + - name: Install SSH private key env: BREW_TOKEN: ${{ secrets.BREW_TOKEN }} @@ -103,6 +104,7 @@ jobs: git -C taps/ config user.email github-actions@example.com git -C taps/ commit -am 'yt-dlp: ${{ steps.bump_version.outputs.ytdlp_version }}' git -C taps/ push + - name: Create Release id: create_release uses: actions/create-release@v1 @@ -113,7 +115,9 @@ jobs: release_name: yt-dlp ${{ steps.bump_version.outputs.ytdlp_version }} commitish: ${{ steps.push_update.outputs.head_sha }} body: | - Changelog: + See [this](https://github.com/yt-dlp/yt-dlp#release-files) for a description of the files + + #### Changelog: ${{ env.changelog }} draft: false prerelease: false @@ -140,7 +144,6 @@ jobs: build_macos: runs-on: macos-11 needs: build_unix - outputs: sha256_macos: ${{ steps.sha256_macos.outputs.sha256_macos }} sha512_macos: ${{ steps.sha512_macos.outputs.sha512_macos }} @@ -153,17 +156,15 @@ jobs: - name: Install Requirements run: | brew install coreutils - /usr/bin/pip3 install --user Pyinstaller mutagen pycryptodomex websockets + /usr/bin/python3 -m pip install -U --user pip Pyinstaller mutagen pycryptodomex websockets - name: Bump version id: bump_version - run: python devscripts/update-version.py - - name: Print version - run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + run: /usr/bin/python3 devscripts/update-version.py - name: Build lazy extractors id: lazy_extractors run: /usr/bin/python3 devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run PyInstaller Script - run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onefile + run: /usr/bin/python3 pyinst.py --target-architecture universal2 --onefile - name: Upload yt-dlp MacOS binary id: upload-release-macos uses: actions/upload-release-asset@v1 @@ -182,7 +183,7 @@ jobs: run: echo "::set-output name=sha512_macos::$(sha512sum dist/yt-dlp_macos | awk '{print $1}')" - name: Run PyInstaller Script with --onedir - run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onedir + run: /usr/bin/python3 pyinst.py --target-architecture universal2 --onedir - uses: papeloto/action-zip@v1 with: files: ./dist/yt-dlp_macos @@ -207,7 +208,6 @@ jobs: build_windows: runs-on: windows-latest needs: build_unix - outputs: sha256_win: ${{ steps.sha256_win.outputs.sha256_win }} sha512_win: ${{ steps.sha512_win.outputs.sha512_win }} @@ -231,8 +231,6 @@ jobs: - name: Bump version id: bump_version run: python devscripts/update-version.py - - name: Print version - run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" - name: Build lazy extractors id: lazy_extractors run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py @@ -254,6 +252,7 @@ jobs: - name: Get SHA2-512SUMS for yt-dlp.exe id: sha512_win run: echo "::set-output name=sha512_win::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA512).Hash.ToLower())" + - name: Run PyInstaller Script with --onedir run: python pyinst.py --onedir - uses: papeloto/action-zip@v1 @@ -319,8 +318,6 @@ jobs: - name: Bump version id: bump_version run: python devscripts/update-version.py - - name: Print version - run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" - name: Build lazy extractors id: lazy_extractors run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py @@ -350,23 +347,23 @@ jobs: steps: - name: Make SHA2-256SUMS file env: + SHA256_BIN: ${{ needs.build_unix.outputs.sha256_bin }} + SHA256_TAR: ${{ needs.build_unix.outputs.sha256_tar }} SHA256_WIN: ${{ needs.build_windows.outputs.sha256_win }} SHA256_PY2EXE: ${{ needs.build_windows.outputs.sha256_py2exe }} SHA256_WIN_ZIP: ${{ needs.build_windows.outputs.sha256_win_zip }} SHA256_WIN32: ${{ needs.build_windows32.outputs.sha256_win32 }} SHA256_MACOS: ${{ needs.build_macos.outputs.sha256_macos }} SHA256_MACOS_ZIP: ${{ needs.build_macos.outputs.sha256_macos_zip }} - SHA256_BIN: ${{ needs.build_unix.outputs.sha256_bin }} - SHA256_TAR: ${{ needs.build_unix.outputs.sha256_tar }} run: | + echo "${{ env.SHA256_BIN }} yt-dlp" >> SHA2-256SUMS + echo "${{ env.SHA256_TAR }} yt-dlp.tar.gz" >> SHA2-256SUMS echo "${{ env.SHA256_WIN }} yt-dlp.exe" >> SHA2-256SUMS echo "${{ env.SHA256_PY2EXE }} yt-dlp_min.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS + echo "${{ env.SHA256_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-256SUMS echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS - echo "${{ env.SHA256_BIN }} yt-dlp" >> SHA2-256SUMS - echo "${{ env.SHA256_TAR }} yt-dlp.tar.gz" >> SHA2-256SUMS - echo "${{ env.SHA256_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-256SUMS - name: Upload 256SUMS file id: upload-sums uses: actions/upload-release-asset@v1 @@ -379,23 +376,23 @@ jobs: asset_content_type: text/plain - name: Make SHA2-512SUMS file env: + SHA512_BIN: ${{ needs.build_unix.outputs.sha512_bin }} + SHA512_TAR: ${{ needs.build_unix.outputs.sha512_tar }} SHA512_WIN: ${{ needs.build_windows.outputs.sha512_win }} SHA512_PY2EXE: ${{ needs.build_windows.outputs.sha512_py2exe }} SHA512_WIN_ZIP: ${{ needs.build_windows.outputs.sha512_win_zip }} SHA512_WIN32: ${{ needs.build_windows32.outputs.sha512_win32 }} SHA512_MACOS: ${{ needs.build_macos.outputs.sha512_macos }} SHA512_MACOS_ZIP: ${{ needs.build_macos.outputs.sha512_macos_zip }} - SHA512_BIN: ${{ needs.build_unix.outputs.sha512_bin }} - SHA512_TAR: ${{ needs.build_unix.outputs.sha512_tar }} run: | + echo "${{ env.SHA512_BIN }} yt-dlp" >> SHA2-512SUMS + echo "${{ env.SHA512_TAR }} yt-dlp.tar.gz" >> SHA2-512SUMS echo "${{ env.SHA512_WIN }} yt-dlp.exe" >> SHA2-512SUMS + echo "${{ env.SHA512_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-512SUMS echo "${{ env.SHA512_PY2EXE }} yt-dlp_min.exe" >> SHA2-512SUMS echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS - echo "${{ env.SHA512_BIN }} yt-dlp" >> SHA2-512SUMS - echo "${{ env.SHA512_TAR }} yt-dlp.tar.gz" >> SHA2-512SUMS - echo "${{ env.SHA512_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-512SUMS - name: Upload 512SUMS file id: upload-512sums uses: actions/upload-release-asset@v1 diff --git a/Makefile b/Makefile index ee199e448..10d6ab856 100644 --- a/Makefile +++ b/Makefile @@ -40,9 +40,9 @@ SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then ech # set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) -install: lazy_extractors yt-dlp yt-dlp.1 completions - install -Dm755 yt-dlp $(DESTDIR)$(BINDIR) - install -Dm644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1 +install: lazy-extractors yt-dlp yt-dlp.1 completions + install -Dm755 yt-dlp $(DESTDIR)$(BINDIR)/yt-dlp + install -Dm644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 install -Dm644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp install -Dm644 completions/zsh/_yt-dlp $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp install -Dm644 completions/fish/yt-dlp.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish diff --git a/README.md b/README.md index 25dd29002..cfdcadd0d 100644 --- a/README.md +++ b/README.md @@ -155,11 +155,10 @@ For ease of use, a few more compat options are available: yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS You can install yt-dlp using one of the following methods: -* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) +* Download [the binary](#release-files) from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) * With Homebrew, `brew install yt-dlp/taps/yt-dlp` * Use [PyPI package](https://pypi.org/project/yt-dlp): `python3 -m pip install --upgrade yt-dlp` -* Use pip+git: `python3 -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp.git@release` -* Install master branch: `python3 -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp` +* Install master branch: `python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip` Note that on some systems, you may need to use `py` or `python` instead of `python3` @@ -193,15 +192,27 @@ If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` ### RELEASE FILES +#### Recommended + +File|Description +:---|:--- +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for **UNIX-like systems**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows standalone x64 binary (Recommended for **Windows**) + +#### Alternatives + File|Description :---|:--- -[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for UNIX like OSes) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows standalone x64 binary (Recommended for Windows) -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary -[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable -[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable -[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x64 binary built with `py2exe`. Does not contain `pycryptodomex`, needs VC++14 +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x64 binary built with `py2exe`.<br/> Does not contain `pycryptodomex`, needs VC++14 +[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable (No auto-update) +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable (No auto-update) + +#### Misc + +File|Description +:---|:--- [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums @@ -239,13 +250,10 @@ The windows releases are already built with the python interpreter, mutagen, pyc **For Windows**: To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets) - python3 -m pip install -U -r requirements.txt - -Once you have all the necessary dependencies installed, just run `py pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. - -You can also build the executable without any version info or metadata by using: +Once you have all the necessary dependencies installed, just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. - pyinstaller.exe yt_dlp\__main__.py --onefile --name yt-dlp + py -m pip install -U pyinstaller -r requirements.txt + py pyinst.py Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment diff --git a/pyinst.py b/pyinst.py index 5aa83f9da..0a695289b 100644 --- a/pyinst.py +++ b/pyinst.py @@ -1,84 +1,85 @@ #!/usr/bin/env python3 # coding: utf-8 - -from __future__ import unicode_literals -import sys +import os import platform - +import sys from PyInstaller.utils.hooks import collect_submodules -if platform.system() == 'Windows': + +OS_NAME = platform.system() +if OS_NAME == 'Windows': from PyInstaller.utils.win32.versioninfo import ( VarStruct, VarFileInfo, StringStruct, StringTable, StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion, ) -import PyInstaller.__main__ +elif OS_NAME == 'Darwin': + pass +else: + raise Exception('{OS_NAME} is not supported') -suffix = '' -arch = platform.architecture()[0][:2] -assert arch in ('32', '64') -_x86 = '_x86' if arch == '32' else '' +ARCH = platform.architecture()[0][:2] -if platform.system() == 'Windows': - suffix = _x86 -if platform.system() == 'Darwin': - suffix = '_macos' -# Compatability with older arguments -opts = sys.argv[1:] -if opts[0:1] in (['32'], ['64']): - if arch != opts[0]: - raise Exception(f'{opts[0]}bit executable cannot be built on a {arch}bit system') - opts = opts[1:] -opts = opts or ['--onefile'] +def main(): + opts = parse_options() + version = read_version() -print(f'Building {arch}bit version with options {opts}') + suffix = '_x86' if ARCH == '32' else '_macos' if OS_NAME == 'Darwin' else '' + final_file = 'dist/%syt-dlp%s%s' % ( + 'yt-dlp/' if '--onedir' in opts else '', suffix, '.exe' if OS_NAME == 'Windows' else '') -FILE_DESCRIPTION = 'yt-dlp%s' % (' (32 Bit)' if _x86 else '') + print(f'Building yt-dlp v{version} {ARCH}bit for {OS_NAME} with options {opts}') + print('Remember to update the version using "devscripts/update-version.py"') + if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'): + print('WARNING: Building without lazy_extractors. Run ' + '"devscripts/make_lazy_extractors.py" "yt_dlp/extractor/lazy_extractors.py" ' + 'to build lazy extractors', file=sys.stderr) + print(f'Destination: {final_file}\n') -exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) -VERSION = locals()['__version__'] + opts = [ + f'--name=yt-dlp{suffix}', + '--icon=devscripts/logo.ico', + '--upx-exclude=vcruntime140.dll', + '--noconfirm', + *dependancy_options(), + *opts, + 'yt_dlp/__main__.py', + ] + print(f'Running PyInstaller with {opts}') -VERSION_LIST = VERSION.split('.') -VERSION_LIST = list(map(int, VERSION_LIST)) + [0] * (4 - len(VERSION_LIST)) + import PyInstaller.__main__ -print('Version: %s%s' % (VERSION, _x86)) -print('Remember to update the version using devscipts\\update-version.py') + PyInstaller.__main__.run(opts) + + set_version_info(final_file, version) + + +def parse_options(): + # Compatability with older arguments + opts = sys.argv[1:] + if opts[0:1] in (['32'], ['64']): + if ARCH != opts[0]: + raise Exception(f'{opts[0]}bit executable cannot be built on a {ARCH}bit system') + opts = opts[1:] + return opts or ['--onefile'] -if platform.system() == 'Windows': - VERSION_FILE = VSVersionInfo( - ffi=FixedFileInfo( - filevers=VERSION_LIST, - prodvers=VERSION_LIST, - mask=0x3F, - flags=0x0, - OS=0x4, - fileType=0x1, - subtype=0x0, - date=(0, 0), - ), - kids=[ - StringFileInfo([ - StringTable( - '040904B0', [ - StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % _x86), - StringStruct('CompanyName', 'https://github.com/yt-dlp'), - StringStruct('FileDescription', FILE_DESCRIPTION), - StringStruct('FileVersion', VERSION), - StringStruct('InternalName', 'yt-dlp%s' % _x86), - StringStruct( - 'LegalCopyright', - 'pukkandan.ytdlp@gmail.com | UNLICENSE', - ), - StringStruct('OriginalFilename', 'yt-dlp%s.exe' % _x86), - StringStruct('ProductName', 'yt-dlp%s' % _x86), - StringStruct( - 'ProductVersion', - '%s%s on Python %s' % (VERSION, _x86, platform.python_version())), - ])]), - VarFileInfo([VarStruct('Translation', [0, 1200])]) - ] - ) + +def read_version(): + exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) + return locals()['__version__'] + + +def version_to_list(version): + version_list = version.split('.') + return list(map(int, version_list)) + [0] * (4 - len(version_list)) + + +def dependancy_options(): + dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') + excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] + + yield from (f'--hidden-import={module}' for module in dependancies) + yield from (f'--exclude-module={module}' for module in excluded_modules) def pycryptodome_module(): @@ -95,19 +96,41 @@ def pycryptodome_module(): return 'Cryptodome' -dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') -excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] +def set_version_info(exe, version): + if OS_NAME == 'Windows': + windows_set_version(exe, version) + + +def windows_set_version(exe, version): + version_list = version_to_list(version) + suffix = '_x86' if ARCH == '32' else '' + SetVersion(exe, VSVersionInfo( + ffi=FixedFileInfo( + filevers=version_list, + prodvers=version_list, + mask=0x3F, + flags=0x0, + OS=0x4, + fileType=0x1, + subtype=0x0, + date=(0, 0), + ), + kids=[ + StringFileInfo([StringTable('040904B0', [ + StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % suffix), + StringStruct('CompanyName', 'https://github.com/yt-dlp'), + StringStruct('FileDescription', 'yt-dlp%s' % (' (32 Bit)' if ARCH == '32' else '')), + StringStruct('FileVersion', version), + StringStruct('InternalName', f'yt-dlp{suffix}'), + StringStruct('LegalCopyright', 'pukkandan.ytdlp@gmail.com | UNLICENSE'), + StringStruct('OriginalFilename', f'yt-dlp{suffix}.exe'), + StringStruct('ProductName', f'yt-dlp{suffix}'), + StringStruct( + 'ProductVersion', f'{version}{suffix} on Python {platform.python_version()}'), + ])]), VarFileInfo([VarStruct('Translation', [0, 1200])]) + ] + )) -PyInstaller.__main__.run([ - '--name=yt-dlp%s' % suffix, - '--icon=devscripts/logo.ico', - *[f'--exclude-module={module}' for module in excluded_modules], - *[f'--hidden-import={module}' for module in dependancies], - '--upx-exclude=vcruntime140.dll', - '--noconfirm', - *opts, - 'yt_dlp/__main__.py', -]) -if platform.system() == 'Windows': - SetVersion('dist/%syt-dlp%s.exe' % ('yt-dlp/' if '--onedir' in opts else '', _x86), VERSION_FILE) +if __name__ == '__main__': + main() diff --git a/yt_dlp/update.py b/yt_dlp/update.py index e880cbd8d..9fadae90c 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -167,35 +167,35 @@ def run_update(ydl): return report_network_error('download latest version') try: - with open(exe + '.new', 'wb') as outf: + with open(filename + '.new', 'wb') as outf: outf.write(newcontent) except (IOError, OSError): - return report_permission_error(f'{exe}.new') + return report_permission_error(f'{filename}.new') - expected_sum = get_sha256sum('exe', arch) + expected_sum = get_sha256sum(variant, arch) if not expected_sum: ydl.report_warning('no hash information found for the release') - elif calc_sha256sum(exe + '.new') != expected_sum: + elif calc_sha256sum(filename + '.new') != expected_sum: report_network_error('verify the new executable') try: - os.remove(exe + '.new') + os.remove(filename + '.new') except OSError: return report_unable('remove corrupt download') try: - os.rename(exe, exe + '.old') + os.rename(filename, filename + '.old') except (IOError, OSError): return report_unable('move current version') try: - os.rename(exe + '.new', exe) + os.rename(filename + '.new', filename) except (IOError, OSError): report_unable('overwrite current version') - os.rename(exe + '.old', exe) + os.rename(filename + '.old', filename) return try: # Continues to run in the background Popen( - 'ping 127.0.0.1 -n 5 -w 1000 & del /F "%s.old"' % exe, + 'ping 127.0.0.1 -n 5 -w 1000 & del /F "%s.old"' % filename, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) ydl.to_screen('Updated yt-dlp to version %s' % version_id) return True # Exit app From f0ded3dad3d751e697d2938d60f369b4cd409170 Mon Sep 17 00:00:00 2001 From: jfogelman <jfogelman@users.noreply.github.com> Date: Thu, 21 Oct 2021 15:36:03 -0400 Subject: [PATCH 0105/2552] [AdobePass] Fix RCN MSO (#1349) Authored by: jfogelman --- yt_dlp/extractor/adobepass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 9378c33cd..bebcafa6b 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -39,8 +39,8 @@ MSO_INFO = { }, 'RCN': { 'name': 'RCN', - 'username_field': 'UserName', - 'password_field': 'UserPassword', + 'username_field': 'username', + 'password_field': 'password', }, 'Rogers': { 'name': 'Rogers', From 19b824f6939b0c13c6de1297faee2e70206ce6c4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 22 Oct 2021 04:34:22 +0530 Subject: [PATCH 0106/2552] Re-implement deprecated option `--id` Despite `--title`, `--literal` etc being deprecated, `--id` is still documented in youtube-dl and so should be kept --- README.md | 2 +- yt_dlp/__init__.py | 14 +++++++++----- yt_dlp/options.py | 3 +++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index cfdcadd0d..3ca308f87 100644 --- a/README.md +++ b/README.md @@ -1658,6 +1658,7 @@ While these options still work, their use is not recommended since there are oth --print-json -j --no-simulate --autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d --autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s + --id -o "%(id)s.%(ext)s" --metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT" --hls-prefer-native --downloader "m3u8:native" --hls-prefer-ffmpeg --downloader "m3u8:ffmpeg" @@ -1724,7 +1725,6 @@ These options may no longer work as intended #### Removed These options were deprecated since 2014 and have now been entirely removed - --id -o "%(id)s.%(ext)s" -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s" -t, --title -o "%(title)s-%(id)s.%(ext)s" -l, --literal -o accepts literal names diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index b952cc062..d97d4af64 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -259,6 +259,9 @@ def _real_main(argv=None): compat_opts = opts.compat_opts + def report_conflict(arg1, arg2): + warnings.append(f'{arg2} is ignored since {arg1} was given') + def _unused_compat_opt(name): if name not in compat_opts: return False @@ -290,10 +293,14 @@ def _real_main(argv=None): if _video_multistreams_set is False and _audio_multistreams_set is False: _unused_compat_opt('multistreams') outtmpl_default = opts.outtmpl.get('default') + if opts.useid: + if outtmpl_default is None: + outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s' + else: + report_conflict('--output', '--id') if 'filename' in compat_opts: if outtmpl_default is None: - outtmpl_default = '%(title)s-%(id)s.%(ext)s' - opts.outtmpl.update({'default': outtmpl_default}) + outtmpl_default = opts.outtmpl['default'] = '%(title)s-%(id)s.%(ext)s' else: _unused_compat_opt('filename') @@ -366,9 +373,6 @@ def _real_main(argv=None): opts.addchapters = True opts.remove_chapters = opts.remove_chapters or [] - def report_conflict(arg1, arg2): - warnings.append('%s is ignored since %s was given' % (arg2, arg1)) - if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: if opts.sponskrub: if opts.remove_chapters: diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 0638e8642..719a1bce4 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -975,6 +975,9 @@ def parseOpts(overrideArguments=None): '--no-batch-file', dest='batchfile', action='store_const', const=None, help='Do not read URLs from batch file (default)') + filesystem.add_option( + '--id', default=False, + action='store_true', dest='useid', help=optparse.SUPPRESS_HELP) filesystem.add_option( '-P', '--paths', metavar='[TYPES:]PATH', dest='paths', default={}, type='str', From ef58c47637625089cc7dc7326e7ce67a9c15f5e0 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 09:19:37 +0530 Subject: [PATCH 0107/2552] [SponsorBlock] Obey `extractor-retries` and `sleep-requests` --- yt_dlp/postprocessor/sponsorblock.py | 33 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 7265a9de7..70c5462d1 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -1,6 +1,8 @@ +from hashlib import sha256 +import itertools import json import re -from hashlib import sha256 +import time from .ffmpeg import FFmpegPostProcessor from ..compat import compat_urllib_parse_urlencode, compat_HTTPError @@ -33,6 +35,7 @@ class SponsorBlockPP(FFmpegPostProcessor): self.to_screen(f'SponsorBlock is not supported for {extractor}') return [], info + self.to_screen('Fetching SponsorBlock segments') info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration']) return [], info @@ -79,18 +82,28 @@ class SponsorBlockPP(FFmpegPostProcessor): 'service': service, 'categories': json.dumps(self._categories), }) + self.write_debug(f'SponsorBlock query: {url}') for d in self._get_json(url): if d['videoID'] == video_id: return d['segments'] return [] def _get_json(self, url): - self.write_debug(f'SponsorBlock query: {url}') - try: - rsp = self._downloader.urlopen(sanitized_Request(url)) - except network_exceptions as e: - if isinstance(e, compat_HTTPError) and e.code == 404: - return [] - raise PostProcessingError(f'Unable to communicate with SponsorBlock API - {e}') - - return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) + # While this is not an extractor, it behaves similar to one and + # so obey extractor_retries and sleep_interval_requests + max_retries = self.get_param('extractor_retries', 3) + sleep_interval = self.get_param('sleep_interval_requests') or 0 + for retries in itertools.count(): + try: + rsp = self._downloader.urlopen(sanitized_Request(url)) + return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) + except network_exceptions as e: + if isinstance(e, compat_HTTPError) and e.code == 404: + return [] + if retries < max_retries: + self.report_warning(f'{e}. Retrying...') + if sleep_interval > 0: + self.to_screen(f'Sleeping {sleep_interval} seconds ...') + time.sleep(sleep_interval) + continue + raise PostProcessingError(f'Unable to communicate with SponsorBlock API: {e}') From 0f6e60bb5722f03c6b64712f70aaf9b0b6915795 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 21 Oct 2021 23:39:50 +0000 Subject: [PATCH 0108/2552] [tagesschau] Fix extractor (#1227) Closes #1124 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/tagesschau.py | 279 +++++++-------------------------- 2 files changed, 62 insertions(+), 222 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index f4f817fcb..8ea7d2ed8 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1387,10 +1387,7 @@ from .svt import ( from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE -from .tagesschau import ( - TagesschauPlayerIE, - TagesschauIE, -) +from .tagesschau import TagesschauIE from .tass import TassIE from .tbs import TBSIE from .tdslifeway import TDSLifewayIE diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py index 25c200455..6e03d0a7d 100644 --- a/yt_dlp/extractor/tagesschau.py +++ b/yt_dlp/extractor/tagesschau.py @@ -5,177 +5,63 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, js_to_json, - parse_iso8601, - parse_filesize, + extract_attributes, + try_get, + int_or_none, ) -class TagesschauPlayerIE(InfoExtractor): - IE_NAME = 'tagesschau:player' - _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html' - - _TESTS = [{ - 'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html', - 'md5': '8d09548d5c15debad38bee3a4d15ca21', - 'info_dict': { - 'id': '179517', - 'ext': 'mp4', - 'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD', - 'thumbnail': r're:^https?:.*\.jpg$', - 'formats': 'mincount:6', - }, - }, { - 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html', - 'md5': '76e6eec6ebd40740671cf0a2c88617e5', - 'info_dict': { - 'id': '29417', - 'ext': 'mp3', - 'title': 'Trabi - Bye, bye Rennpappe', - 'thumbnail': r're:^https?:.*\.jpg$', - 'formats': 'mincount:2', - }, - }, { - 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html', - 'only_matching': True, - }] - - _FORMATS = { - 'xs': {'quality': 0}, - 's': {'width': 320, 'height': 180, 'quality': 1}, - 'm': {'width': 512, 'height': 288, 'quality': 2}, - 'l': {'width': 960, 'height': 540, 'quality': 3}, - 'xl': {'width': 1280, 'height': 720, 'quality': 4}, - 'xxl': {'quality': 5}, - } - - def _extract_via_api(self, kind, video_id): - info = self._download_json( - 'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id), - video_id) - title = info['headline'] - formats = [] - for media in info['mediadata']: - for format_id, format_url in media.items(): - if determine_ext(format_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls')) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'vcodec': 'none' if kind == 'audio' else None, - }) - self._sort_formats(formats) - timestamp = parse_iso8601(info.get('date')) - return { - 'id': video_id, - 'title': title, - 'timestamp': timestamp, - 'formats': formats, - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - # kind = mobj.group('kind').lower() - # if kind == 'video': - # return self._extract_via_api(kind, video_id) - - # JSON api does not provide some audio formats (e.g. ogg) thus - # extracting audio via webpage - - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage).strip() - formats = [] - - for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage): - media = self._parse_json(js_to_json(media_json), video_id, fatal=False) - if not media: - continue - src = media.get('src') - if not src: - return - quality = media.get('quality') - kind = media.get('type', '').split('/')[0] - ext = determine_ext(src) - f = { - 'url': src, - 'format_id': '%s_%s' % (quality, ext) if quality else ext, - 'ext': ext, - 'vcodec': 'none' if kind == 'audio' else None, - } - f.update(self._FORMATS.get(quality, {})) - formats.append(f) - - self._sort_formats(formats) - - thumbnail = self._og_search_thumbnail(webpage) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } - - class TagesschauIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html' _TESTS = [{ 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html', - 'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6', + 'md5': '7a7287612fa881a1ae1d087df45c2fd6', 'info_dict': { - 'id': 'video-102143', + 'id': 'video-102143-1', 'ext': 'mp4', 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt', - 'description': '18.07.2015 20:10 Uhr', - 'thumbnail': r're:^https?:.*\.jpg$', }, }, { 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', 'md5': '3c54c1f6243d279b706bde660ceec633', 'info_dict': { - 'id': 'ts-5727', + 'id': 'ts-5727-1', 'ext': 'mp4', - 'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr', - 'description': 'md5:695c01bfd98b7e313c501386327aea59', - 'thumbnail': r're:^https?:.*\.jpg$', + 'title': 'Ganze Sendung', }, }, { # exclusive audio 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html', - 'md5': '76e6eec6ebd40740671cf0a2c88617e5', + 'md5': '4cf22023c285f35e99c24d290ba58cc9', 'info_dict': { - 'id': 'audio-29417', + 'id': 'audio-29417-1', 'ext': 'mp3', - 'title': 'Trabi - Bye, bye Rennpappe', - 'description': 'md5:8687dda862cbbe2cfb2df09b56341317', - 'thumbnail': r're:^https?:.*\.jpg$', + 'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt', }, }, { - # audio in article 'url': 'http://www.tagesschau.de/inland/bnd-303.html', - 'md5': 'e0916c623e85fc1d2b26b78f299d3958', + 'md5': '12cfb212d9325b5ba0d52b625f1aa61c', 'info_dict': { - 'id': 'bnd-303', - 'ext': 'mp3', - 'title': 'Viele Baustellen für neuen BND-Chef', - 'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4', - 'thumbnail': r're:^https?:.*\.jpg$', + 'id': 'bnd-303-1', + 'ext': 'mp4', + 'title': 'SPD-Gruppenbild mit Bärbel Bas nach der Fraktionssitzung | dpa', }, }, { 'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html', 'info_dict': { 'id': 'afd-parteitag-135', - 'title': 'Möchtegern-Underdog mit Machtanspruch', + 'title': 'AfD', + }, + 'playlist_count': 20, + }, { + 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html', + 'info_dict': { + 'id': 'audio-29417-1', + 'ext': 'mp3', + 'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt', }, - 'playlist_count': 2, }, { 'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html', 'only_matching': True, @@ -206,62 +92,6 @@ class TagesschauIE(InfoExtractor): 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url) - - def _extract_formats(self, download_text, media_kind): - links = re.finditer( - r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>', - download_text) - formats = [] - for l in links: - link_url = l.group('url') - if not link_url: - continue - format_id = self._search_regex( - r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID', - default=determine_ext(link_url)) - format = { - 'format_id': format_id, - 'url': l.group('url'), - 'format_name': l.group('name'), - } - title = l.group('title') - if title: - if media_kind.lower() == 'video': - m = re.match( - r'''(?x) - Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10; - (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10; - (?P<vbr>[0-9]+)kbps&\#10; - Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10; - Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''', - title) - if m: - format.update({ - 'format_note': m.group('audio_desc'), - 'vcodec': m.group('vcodec'), - 'width': int(m.group('width')), - 'height': int(m.group('height')), - 'abr': int(m.group('abr')), - 'vbr': int(m.group('vbr')), - 'filesize_approx': parse_filesize(m.group('filesize_approx')), - }) - else: - m = re.match( - r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)', - title) - if m: - format.update({ - 'format_note': '%s, %s' % (m.group('format'), m.group('note')), - 'vcodec': 'none', - 'abr': int(m.group('abr')), - }) - formats.append(format) - self._sort_formats(formats) - return formats - def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') or mobj.group('path') @@ -271,34 +101,46 @@ class TagesschauIE(InfoExtractor): title = self._html_search_regex( r'<span[^>]*class="headline"[^>]*>(.+?)</span>', - webpage, 'title', default=None) or self._og_search_title(webpage) - - DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>' - - webpage_type = self._og_search_property('type', webpage, default=None) - if webpage_type == 'website': # Article - entries = [] - for num, (entry_title, media_kind, download_text) in enumerate(re.findall( - r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX, - webpage), 1): + webpage, 'title', default=None) or self._og_search_title(webpage, fatal=False) + + entries = [] + videos = re.findall(r'<div[^>]+>', webpage) + num = 0 + for video in videos: + video = extract_attributes(video).get('data-config') + if not video: + continue + video = self._parse_json(video, video_id, transform_source=js_to_json, fatal=False) + video_formats = try_get(video, lambda x: x['mc']['_mediaArray'][0]['_mediaStreamArray']) + if not video_formats: + continue + num += 1 + for video_format in video_formats: + media_url = video_format.get('_stream') or '' + formats = [] + if media_url.endswith('master.m3u8'): + formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls') + elif media_url.endswith('.hi.mp3') and media_url.startswith('https://download'): + formats = [{ + 'url': media_url, + 'vcodec': 'none', + }] + if not formats: + continue entries.append({ 'id': '%s-%d' % (display_id, num), - 'title': '%s' % entry_title, - 'formats': self._extract_formats(download_text, media_kind), + 'title': try_get(video, lambda x: x['mc']['_title']), + 'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])), + 'formats': formats }) - if len(entries) > 1: - return self.playlist_result(entries, display_id, title) - formats = entries[0]['formats'] - else: # Assume single video - download_text = self._search_regex( - DOWNLOAD_REGEX, webpage, 'download links', group='links') - media_kind = self._search_regex( - DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind') - formats = self._extract_formats(download_text, media_kind) - thumbnail = self._og_search_thumbnail(webpage) - description = self._html_search_regex( - r'(?s)<p class="teasertext">(.*?)</p>', - webpage, 'description', default=None) + if len(entries) > 1: + return self.playlist_result(entries, display_id, title) + formats = entries[0]['formats'] + video_info = self._search_json_ld(webpage, video_id) + description = video_info.get('description') + thumbnail = self._og_search_thumbnail(webpage) or video_info.get('thumbnail') + timestamp = video_info.get('timestamp') + title = title or video_info.get('description') self._sort_formats(formats) @@ -307,5 +149,6 @@ class TagesschauIE(InfoExtractor): 'title': title, 'thumbnail': thumbnail, 'formats': formats, + 'timestamp': timestamp, 'description': description, } From 17ec8bcfa90b80913072fdcb0cafc09c1ad79849 Mon Sep 17 00:00:00 2001 From: Damiano Amatruda <damiano.amatruda@outlook.com> Date: Fri, 22 Oct 2021 02:04:00 +0200 Subject: [PATCH 0109/2552] [microsoftstream] Add extractor (#1201) Based on: https://github.com/ytdl-org/youtube-dl/pull/24649 Fixes: https://github.com/ytdl-org/youtube-dl/issues/24440 Authored by: damianoamatruda, nixklai --- test/test_utils.py | 7 +- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/microsoftstream.py | 125 ++++++++++++++++++++++++++++ yt_dlp/utils.py | 4 +- 4 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 yt_dlp/extractor/microsoftstream.py diff --git a/test/test_utils.py b/test/test_utils.py index d84c3d3ee..810ed3de4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1163,12 +1163,15 @@ class TestUtil(unittest.TestCase): def test_parse_resolution(self): self.assertEqual(parse_resolution(None), {}) self.assertEqual(parse_resolution(''), {}) - self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080}) - self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('720p'), {'height': 720}) self.assertEqual(parse_resolution('4k'), {'height': 2160}) self.assertEqual(parse_resolution('8K'), {'height': 4320}) + self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('ep1x2'), {}) + self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 8ea7d2ed8..ef2b25c93 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -760,6 +760,7 @@ from .metacritic import MetacriticIE from .mgoon import MgoonIE from .mgtv import MGTVIE from .miaopai import MiaoPaiIE +from .microsoftstream import MicrosoftStreamIE from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py new file mode 100644 index 000000000..4d5a9df1f --- /dev/null +++ b/yt_dlp/extractor/microsoftstream.py @@ -0,0 +1,125 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from base64 import b64decode + +from .common import InfoExtractor +from ..utils import ( + merge_dicts, + parse_iso8601, + parse_duration, + parse_resolution, + try_get, + url_basename, +) + + +class MicrosoftStreamIE(InfoExtractor): + IE_NAME = 'microsoftstream' + IE_DESC = 'Microsoft Stream' + _VALID_URL = r'https?://(?:web|www|msit)\.microsoftstream\.com/video/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + + _TESTS = [{ + 'url': 'https://web.microsoftstream.com/video/6e51d928-4f46-4f1c-b141-369925e37b62?list=user&userId=f5491e02-e8fe-4e34-b67c-ec2e79a6ecc0', + 'only_matching': True, + }, { + 'url': 'https://msit.microsoftstream.com/video/b60f5987-aabd-4e1c-a42f-c559d138f2ca', + 'only_matching': True, + }] + + def _get_all_subtitles(self, api_url, video_id, headers): + subtitles = {} + automatic_captions = {} + text_tracks = self._download_json( + f'{api_url}/videos/{video_id}/texttracks', video_id, + note='Downloading subtitles JSON', fatal=False, headers=headers, + query={'api-version': '1.4-private'}).get('value') or [] + for track in text_tracks: + if not track.get('language') or not track.get('url'): + continue + sub_dict = automatic_captions if track.get('autoGenerated') else subtitles + sub_dict.setdefault(track['language'], []).append({ + 'ext': 'vtt', + 'url': track.get('url') + }) + return { + 'subtitles': subtitles, + 'automatic_captions': automatic_captions + } + + def extract_all_subtitles(self, *args, **kwargs): + if (self.get_param('writesubtitles', False) + or self.get_param('writeautomaticsub', False) + or self.get_param('listsubtitles')): + return self._get_all_subtitles(*args, **kwargs) + return {} + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + if '<title>Microsoft Stream' not in webpage: + self.raise_login_required(method='cookies') + + access_token = self._html_search_regex(r'"AccessToken":"(.+?)"', webpage, 'access token') + api_url = self._html_search_regex(r'"ApiGatewayUri":"(.+?)"', webpage, 'api url') + + headers = {'Authorization': f'Bearer {access_token}'} + + video_data = self._download_json( + f'{api_url}/videos/{video_id}', video_id, + headers=headers, query={ + '$expand': 'creator,tokens,status,liveEvent,extensions', + 'api-version': '1.4-private' + }) + video_id = video_data.get('id') or video_id + language = video_data.get('language') + + thumbnails = [] + for thumbnail_id in ('extraSmall', 'small', 'medium', 'large'): + thumbnail_url = try_get(video_data, lambda x: x['posterImage'][thumbnail_id]['url'], str) + if not thumbnail_url: + continue + thumb = { + 'id': thumbnail_id, + 'url': thumbnail_url, + } + thumb_name = url_basename(thumbnail_url) + thumb_name = str(b64decode(thumb_name + '=' * (-len(thumb_name) % 4))) + thumb.update(parse_resolution(thumb_name)) + thumbnails.append(thumb) + + formats = [] + for playlist in video_data['playbackUrls']: + if playlist['mimeType'] == 'application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + playlist['playbackUrl'], video_id, + ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False, headers=headers)) + elif playlist['mimeType'] == 'application/dash+xml': + formats.extend(self._extract_mpd_formats( + playlist['playbackUrl'], video_id, mpd_id='dash', + fatal=False, headers=headers)) + elif playlist['mimeType'] == 'application/vnd.ms-sstr+xml': + formats.extend(self._extract_ism_formats( + playlist['playbackUrl'], video_id, ism_id='mss', + fatal=False, headers=headers)) + formats = [merge_dicts(f, {'language': language}) for f in formats] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data['name'], + 'description': video_data.get('description'), + 'uploader': try_get(video_data, lambda x: x['creator']['name'], str), + 'uploader_id': try_get(video_data, (lambda x: x['creator']['mail'], + lambda x: x['creator']['id']), str), + 'thumbnails': thumbnails, + **self.extract_all_subtitles(api_url, video_id, headers), + 'timestamp': parse_iso8601(video_data.get('created')), + 'duration': parse_duration(try_get(video_data, lambda x: x['media']['duration'])), + 'webpage_url': f'https://web.microsoftstream.com/video/{video_id}', + 'view_count': try_get(video_data, lambda x: x['metrics']['views'], int), + 'like_count': try_get(video_data, lambda x: x['metrics']['likes'], int), + 'comment_count': try_get(video_data, lambda x: x['metrics']['comments'], int), + 'formats': formats, + } diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 319f6979b..e05677d08 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3714,14 +3714,14 @@ def parse_resolution(s): if s is None: return {} - mobj = re.search(r'\b(?P\d+)\s*[xX×]\s*(?P\d+)\b', s) + mobj = re.search(r'(?\d+)\s*[xX×,]\s*(?P\d+)(?![a-zA-Z0-9])', s) if mobj: return { 'width': int(mobj.group('w')), 'height': int(mobj.group('h')), } - mobj = re.search(r'\b(\d+)[pPiI]\b', s) + mobj = re.search(r'(? Date: Fri, 22 Oct 2021 05:57:15 +0530 Subject: [PATCH 0110/2552] [vimeo] Fix embedded `player.vimeo` URL Closes #1138, partially fixes #1323 Cherry-picked from upstream commit 3ae9c0f410b1d4f63e8bada67dd62a8d2852be32 --- yt_dlp/extractor/vimeo.py | 230 +++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 129 deletions(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 8b367a4e6..04c504934 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import base64 import functools -import json import re import itertools @@ -17,8 +16,8 @@ from ..compat import ( from ..utils import ( clean_html, determine_ext, - dict_get, ExtractorError, + get_element_by_class, js_to_json, int_or_none, merge_dicts, @@ -26,7 +25,6 @@ from ..utils import ( parse_filesize, parse_iso8601, parse_qs, - RegexNotFoundError, sanitized_Request, smuggle_url, std_headers, @@ -129,10 +127,11 @@ class VimeoBaseInfoExtractor(InfoExtractor): video_title = video_data['title'] live_event = video_data.get('live_event') or {} is_live = live_event.get('status') == 'started' + request = config.get('request') or {} formats = [] - config_files = video_data.get('files') or config['request'].get('files', {}) - for f in config_files.get('progressive', []): + config_files = video_data.get('files') or request.get('files') or {} + for f in (config_files.get('progressive') or []): video_url = f.get('url') if not video_url: continue @@ -148,7 +147,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): # TODO: fix handling of 308 status code returned for live archive manifest requests sep_pattern = r'/sep/video/' for files_type in ('hls', 'dash'): - for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items(): + for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items(): manifest_url = cdn_data.get('url') if not manifest_url: continue @@ -188,17 +187,15 @@ class VimeoBaseInfoExtractor(InfoExtractor): }) subtitles = {} - text_tracks = config['request'].get('text_tracks') - if text_tracks: - for tt in text_tracks: - subtitles[tt['lang']] = [{ - 'ext': 'vtt', - 'url': urljoin('https://vimeo.com', tt['url']), - }] + for tt in (request.get('text_tracks') or []): + subtitles[tt['lang']] = [{ + 'ext': 'vtt', + 'url': urljoin('https://vimeo.com', tt['url']), + }] thumbnails = [] if not is_live: - for key, thumb in video_data.get('thumbs', {}).items(): + for key, thumb in (video_data.get('thumbs') or {}).items(): thumbnails.append({ 'id': key, 'width': int_or_none(key), @@ -342,6 +339,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'duration': 1595, 'upload_date': '20130610', 'timestamp': 1370893156, + 'license': 'by', }, 'params': { 'format': 'best[protocol=https]', @@ -420,6 +418,12 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'staff', 'uploader': 'Vimeo Staff', 'duration': 62, + 'subtitles': { + 'de': [{'ext': 'vtt'}], + 'en': [{'ext': 'vtt'}], + 'es': [{'ext': 'vtt'}], + 'fr': [{'ext': 'vtt'}], + }, } }, { @@ -626,6 +630,37 @@ class VimeoIE(VimeoBaseInfoExtractor): def _real_initialize(self): self._login() + def _extract_from_api(self, video_id, unlisted_hash=None): + token = self._download_json( + 'https://vimeo.com/_rv/jwt', video_id, headers={ + 'X-Requested-With': 'XMLHttpRequest' + })['token'] + api_url = 'https://api.vimeo.com/videos/' + video_id + if unlisted_hash: + api_url += ':' + unlisted_hash + video = self._download_json( + api_url, video_id, headers={ + 'Authorization': 'jwt ' + token, + }, query={ + 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', + }) + info = self._parse_config(self._download_json( + video['config_url'], video_id), video_id) + self._vimeo_sort_formats(info['formats']) + get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) + info.update({ + 'description': video.get('description'), + 'license': video.get('license'), + 'release_timestamp': get_timestamp('release'), + 'timestamp': get_timestamp('created'), + 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])), + }) + connections = try_get( + video, lambda x: x['metadata']['connections'], dict) or {} + for k in ('comment', 'like'): + info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total'])) + return info + def _try_album_password(self, url): album_id = self._search_regex( r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None) @@ -675,45 +710,16 @@ class VimeoIE(VimeoBaseInfoExtractor): # Extract ID from URL video_id, unlisted_hash = self._match_valid_url(url).groups() if unlisted_hash: - token = self._download_json( - 'https://vimeo.com/_rv/jwt', video_id, headers={ - 'X-Requested-With': 'XMLHttpRequest' - })['token'] - video = self._download_json( - 'https://api.vimeo.com/videos/%s:%s' % (video_id, unlisted_hash), - video_id, headers={ - 'Authorization': 'jwt ' + token, - }, query={ - 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', - }) - info = self._parse_config(self._download_json( - video['config_url'], video_id), video_id) - self._vimeo_sort_formats(info['formats']) - get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) - info.update({ - 'description': video.get('description'), - 'license': video.get('license'), - 'release_timestamp': get_timestamp('release'), - 'timestamp': get_timestamp('created'), - 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])), - }) - connections = try_get( - video, lambda x: x['metadata']['connections'], dict) or {} - for k in ('comment', 'like'): - info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total'])) - return info + return self._extract_from_api(video_id, unlisted_hash) orig_url = url is_pro = 'vimeopro.com/' in url - is_player = '://player.vimeo.com/video/' in url if is_pro: # some videos require portfolio_id to be present in player url # https://github.com/ytdl-org/youtube-dl/issues/20070 url = self._extract_url(url, self._download_webpage(url, video_id)) if not url: url = 'https://vimeo.com/' + video_id - elif is_player: - url = 'https://player.vimeo.com/video/' + video_id elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id @@ -734,14 +740,25 @@ class VimeoIE(VimeoBaseInfoExtractor): expected=True) raise - # Now we begin extracting as much information as we can from what we - # retrieved. First we extract the information common to all extractors, - # and latter we extract those that are Vimeo specific. - self.report_extraction(video_id) + if '://player.vimeo.com/video/' in url: + config = self._parse_json(self._search_regex( + r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) + if config.get('view') == 4: + config = self._verify_player_video_password( + redirect_url, video_id, headers) + info = self._parse_config(config, video_id) + self._vimeo_sort_formats(info['formats']) + return info + + if re.search(r']+?id="pw_form"', webpage): + video_password = self._get_video_password() + token, vuid = self._extract_xsrft_and_vuid(webpage) + webpage = self._verify_video_password( + redirect_url, video_id, video_password, token, vuid) vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: - seed_status = vimeo_config.get('seed_status', {}) + seed_status = vimeo_config.get('seed_status') or {} if seed_status.get('state') == 'failed': raise ExtractorError( '%s said: %s' % (self.IE_NAME, seed_status['title']), @@ -750,70 +767,40 @@ class VimeoIE(VimeoBaseInfoExtractor): cc_license = None timestamp = None video_description = None + info_dict = {} - # Extract the config JSON - try: - try: - config_url = self._html_search_regex( - r' data-config-url="(.+?)"', webpage, - 'config URL', default=None) - if not config_url: - # Sometimes new react-based page is served instead of old one that require - # different config URL extraction approach (see - # https://github.com/ytdl-org/youtube-dl/pull/7209) - page_config = self._parse_json(self._search_regex( - r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', - webpage, 'page config'), video_id) - config_url = page_config['player']['config_url'] - cc_license = page_config.get('cc_license') - timestamp = try_get( - page_config, lambda x: x['clip']['uploaded_on'], - compat_str) - video_description = clean_html(dict_get( - page_config, ('description', 'description_html_escaped'))) - config = self._download_json(config_url, video_id) - except RegexNotFoundError: - # For pro videos or player.vimeo.com urls - # We try to find out to which variable is assigned the config dic - m_variable_name = re.search(r'(\w)\.video\.id', webpage) - if m_variable_name is not None: - config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))] - else: - config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] - config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;') - config_re.append(r'\bconfig\s*=\s*({.+?})\s*;') - config = self._search_regex(config_re, webpage, 'info section', - flags=re.DOTALL) - config = json.loads(config) - except Exception as e: - if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): - raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') - - if re.search(r']+?id="pw_form"', webpage) is not None: - if '_video_password_verified' in data: - raise ExtractorError('video password verification failed!') - video_password = self._get_video_password() - token, vuid = self._extract_xsrft_and_vuid(webpage) - self._verify_video_password( - redirect_url, video_id, video_password, token, vuid) - return self._real_extract( - smuggle_url(redirect_url, {'_video_password_verified': 'verified'})) - else: - raise ExtractorError('Unable to extract info section', - cause=e) + channel_id = self._search_regex( + r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) + if channel_id: + config_url = self._html_search_regex( + r'\bdata-config-url="([^"]+)"', webpage, 'config URL') + video_description = clean_html(get_element_by_class('description', webpage)) + info_dict.update({ + 'channel_id': channel_id, + 'channel_url': 'https://vimeo.com/channels/' + channel_id, + }) else: - if config.get('view') == 4: - config = self._verify_player_video_password(redirect_url, video_id, headers) - + page_config = self._parse_json(self._search_regex( + r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', + webpage, 'page config', default='{}'), video_id, fatal=False) + if not page_config: + return self._extract_from_api(video_id) + config_url = page_config['player']['config_url'] + cc_license = page_config.get('cc_license') + clip = page_config.get('clip') or {} + timestamp = clip.get('uploaded_on') + video_description = clean_html( + clip.get('description') or page_config.get('description_html_escaped')) + config = self._download_json(config_url, video_id) video = config.get('video') or {} vod = video.get('vod') or {} def is_rented(): if '>You rented this title.<' in webpage: return True - if config.get('user', {}).get('purchased'): + if try_get(config, lambda x: x['user']['purchased']): return True - for purchase_option in vod.get('purchase_options', []): + for purchase_option in (vod.get('purchase_options') or []): if purchase_option.get('purchased'): return True label = purchase_option.get('label_string') @@ -828,14 +815,14 @@ class VimeoIE(VimeoBaseInfoExtractor): 'https://player.vimeo.com/player/%s' % feature_id, {'force_feature_id': True}), 'Vimeo') - # Extract video description if not video_description: video_description = self._html_search_regex( r'(?s)]*>(.*?)', webpage, 'description', default=None) if not video_description: video_description = self._html_search_meta( - 'description', webpage, default=None) + ['description', 'og:description', 'twitter:description'], + webpage, default=None) if not video_description and is_pro: orig_webpage = self._download_webpage( orig_url, video_id, @@ -844,24 +831,17 @@ class VimeoIE(VimeoBaseInfoExtractor): if orig_webpage: video_description = self._html_search_meta( 'description', orig_webpage, default=None) - if not video_description and not is_player: + if not video_description: self.report_warning('Cannot find video description') - # Extract upload date if not timestamp: timestamp = self._search_regex( r']+datetime="([^"]+)"', webpage, 'timestamp', default=None) - try: - view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count')) - like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count')) - comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count')) - except RegexNotFoundError: - # This info is only available in vimeo.com/{id} urls - view_count = None - like_count = None - comment_count = None + view_count = int_or_none(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count', default=None)) + like_count = int_or_none(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count', default=None)) + comment_count = int_or_none(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count', default=None)) formats = [] @@ -881,11 +861,7 @@ class VimeoIE(VimeoBaseInfoExtractor): r']+rel=["\']license["\'][^>]+href=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'license', default=None, group='license') - channel_id = self._search_regex( - r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) - channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None - - info_dict = { + info_dict.update({ 'formats': formats, 'timestamp': unified_timestamp(timestamp), 'description': video_description, @@ -894,18 +870,14 @@ class VimeoIE(VimeoBaseInfoExtractor): 'like_count': like_count, 'comment_count': comment_count, 'license': cc_license, - 'channel_id': channel_id, - 'channel_url': channel_url, - } - - info_dict = merge_dicts(info_dict, info_dict_config, json_ld) + }) - return info_dict + return merge_dicts(info_dict, info_dict_config, json_ld) class VimeoOndemandIE(VimeoIE): IE_NAME = 'vimeo:ondemand' - _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P[^/?#&]+)' _TESTS = [{ # ondemand video not available via https://vimeo.com/id 'url': 'https://vimeo.com/ondemand/20704', From f656a23cb116980b0eed5cad02e707249b75701a Mon Sep 17 00:00:00 2001 From: zenerdi0de <83358565+zenerdi0de@users.noreply.github.com> Date: Fri, 22 Oct 2021 06:20:49 +0530 Subject: [PATCH 0111/2552] [patreon] Fix vimeo player regex (#1332) Closes #1323 Authored by: zenerdi0de --- yt_dlp/extractor/patreon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index a189c0237..c7d316efc 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -161,7 +161,7 @@ class PatreonIE(InfoExtractor): if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo': embed_html = try_get(attributes, lambda x: x['embed']['html']) v_url = url_or_none(compat_urllib_parse_unquote( - self._search_regex(r'src=(https%3A%2F%2Fplayer\.vimeo\.com.+)%3F', embed_html, 'vimeo url', fatal=False))) + self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False))) if v_url: info.update({ '_type': 'url_transparent', From ab2ffab22d02d530e0b46f9e361ff53a2139898b Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Fri, 22 Oct 2021 00:53:45 +0000 Subject: [PATCH 0112/2552] [Instagram] Add login (#1288) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 44 ++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 3801c7af9..24f47f3a8 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -4,6 +4,7 @@ import itertools import hashlib import json import re +import time from .common import InfoExtractor from ..compat import ( @@ -20,11 +21,13 @@ from ..utils import ( try_get, url_or_none, variadic, + urlencode_postdata, ) class InstagramIE(InfoExtractor): _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' + _NETRC_MACHINE = 'instagram' _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -140,6 +143,45 @@ class InstagramIE(InfoExtractor): if mobj: return mobj.group('link') + def _login(self): + username, password = self._get_login_info() + + login_webpage = self._download_webpage( + 'https://www.instagram.com/accounts/login/', None, + note='Downloading login webpage', errnote='Failed to download login webpage') + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + login_webpage, 'shared data', default='{}'), + None) + + login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ + 'Accept': '*/*', + 'X-IG-App-ID': '936619743392459', + 'X-ASBD-ID': '198387', + 'X-IG-WWW-Claim': '0', + 'X-Requested-With': 'XMLHttpRequest', + 'X-CSRFToken': shared_data['config']['csrf_token'], + 'X-Instagram-AJAX': shared_data['rollout_hash'], + 'Referer': 'https://www.instagram.com/', + }, data=urlencode_postdata({ + 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', + 'username': username, + 'queryParams': '{}', + 'optIntoOneTap': 'false', + 'stopDeletionNonce': '', + 'trustedDeviceRecords': '{}', + })) + + if not login.get('authenticated'): + if login.get('message'): + raise ExtractorError(f'Unable to login: {login["message"]}') + raise ExtractorError('Unable to login') + + def _real_initialize(self): + self._login() + def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') @@ -147,7 +189,7 @@ class InstagramIE(InfoExtractor): webpage, urlh = self._download_webpage_handle(url, video_id) if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'): - self.raise_login_required('You need to log in to access this content', method='cookies') + self.raise_login_required('You need to log in to access this content') (media, video_url, description, thumbnail, timestamp, uploader, uploader_id, like_count, comment_count, comments, height, From 3c239332b0df3b22a5cbd66930ad240d2398fb44 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Thu, 21 Oct 2021 20:56:29 -0400 Subject: [PATCH 0113/2552] [CBC] Fix Gem livestream (#1289) Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 5e4526c53..61fe4074c 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -377,7 +377,7 @@ class CBCGemPlaylistIE(InfoExtractor): class CBCGemLiveIE(InfoExtractor): IE_NAME = 'gem.cbc.ca:live' - _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P[0-9]{12})' + _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P\d+)' _TEST = { 'url': 'https://gem.cbc.ca/live/920604739687', 'info_dict': { @@ -396,21 +396,21 @@ class CBCGemLiveIE(InfoExtractor): # It's unclear where the chars at the end come from, but they appear to be # constant. Might need updating in the future. - _API = 'https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT' + # There are two URLs, some livestreams are in one, and some + # in the other. The JSON schema is the same for both. + _API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT'] def _real_extract(self, url): video_id = self._match_id(url) - live_info = self._download_json(self._API, video_id)['entries'] - video_info = None - for stream in live_info: - if stream.get('guid') == video_id: - video_info = stream - - if video_info is None: - raise ExtractorError( - 'Couldn\'t find video metadata, maybe this livestream is now offline', - expected=True) + for api_url in self._API_URLS: + video_info = next(( + stream for stream in self._download_json(api_url, video_id)['entries'] + if stream.get('guid') == video_id), None) + if video_info: + break + else: + raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True) return { '_type': 'url_transparent', From d183af3cc1dbb98d2e2f89dbc7cff2901bd10408 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Thu, 21 Oct 2021 20:58:32 -0400 Subject: [PATCH 0114/2552] [CBC] Support CBC Gem member content (#1294) Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 110 +++++++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 61fe4074c..4fcf2a9c1 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -2,6 +2,9 @@ from __future__ import unicode_literals import re +import json +import base64 +import time from .common import InfoExtractor from ..compat import ( @@ -244,37 +247,96 @@ class CBCGemIE(InfoExtractor): 'params': {'format': 'bv'}, 'skip': 'Geo-restricted to Canada', }] - _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + + _GEO_COUNTRIES = ['CA'] + _TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37' + _NETRC_MACHINE = 'cbcgem' + _claims_token = None + + def _new_claims_token(self, email, password): + data = json.dumps({ + 'email': email, + 'password': password, + }).encode() + headers = {'content-type': 'application/json'} + query = {'apikey': self._TOKEN_API_KEY} + resp = self._download_json('https://api.loginradius.com/identity/v2/auth/login', + None, data=data, headers=headers, query=query) + access_token = resp['access_token'] + + query = { + 'access_token': access_token, + 'apikey': self._TOKEN_API_KEY, + 'jwtapp': 'jwt', + } + resp = self._download_json('https://cloud-api.loginradius.com/sso/jwt/api/token', + None, headers=headers, query=query) + sig = resp['signature'] + + data = json.dumps({'jwt': sig}).encode() + headers = {'content-type': 'application/json', 'ott-device-type': 'web'} + resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token', + None, data=data, headers=headers) + cbc_access_token = resp['accessToken'] + + headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token} + resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile', + None, headers=headers) + return resp['claimsToken'] + + def _get_claims_token_expiry(self): + # Token is a JWT + # JWT is decoded here and 'exp' field is extracted + # It is a Unix timestamp for when the token expires + b64_data = self._claims_token.split('.')[1] + data = base64.urlsafe_b64decode(b64_data + "==") + return json.loads(data)['exp'] + + def claims_token_expired(self): + exp = self._get_claims_token_expiry() + if exp - time.time() < 10: + # It will expire in less than 10 seconds, or has already expired + return True + return False + + def claims_token_valid(self): + return self._claims_token is not None and not self.claims_token_expired() + + def _get_claims_token(self, email, password): + if not self.claims_token_valid(): + self._claims_token = self._new_claims_token(email, password) + self._downloader.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token) + return self._claims_token + + def _real_initialize(self): + if self.claims_token_valid(): + return + self._claims_token = self._downloader.cache.load(self._NETRC_MACHINE, 'claims_token') def _real_extract(self, url): video_id = self._match_id(url) - video_info = self._download_json(self._API_BASE + video_id, video_id) - - last_error = None - attempt = -1 - retries = self.get_param('extractor_retries', 15) - while attempt < retries: - attempt += 1 - if last_error: - self.report_warning('%s. Retrying ...' % last_error) - m3u8_info = self._download_json( - video_info['playSession']['url'], video_id, - note='Downloading JSON metadata%s' % f' (attempt {attempt})') - m3u8_url = m3u8_info.get('url') - if m3u8_url: - break - elif m3u8_info.get('errorCode') == 1: - self.raise_geo_restricted(countries=['CA']) - else: - last_error = f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}' - # 35 means media unavailable, but retries work - if m3u8_info.get('errorCode') != 35 or attempt >= retries: - raise ExtractorError(last_error) + video_info = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + video_id, video_id) + + email, password = self._get_login_info() + if email and password: + claims_token = self._get_claims_token(email, password) + headers = {'x-claims-token': claims_token} + else: + headers = {} + m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers) + m3u8_url = m3u8_info.get('url') + + if m3u8_info.get('errorCode') == 1: + self.raise_geo_restricted(countries=['CA']) + elif m3u8_info.get('errorCode') == 35: + self.raise_login_required(method='password') + elif m3u8_info.get('errorCode') != 0: + raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}') formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') self._remove_duplicate_formats(formats) - for i, format in enumerate(formats): + for format in formats: if format.get('vcodec') == 'none': if format.get('ext') is None: format['ext'] = 'm4a' From ad0090d0d23e938e8a2107777a83e6c6b92494d3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 22 Oct 2021 21:58:06 +0530 Subject: [PATCH 0115/2552] [cookies] Local State should be opened as utf-8 Closes #1276 --- yt_dlp/cookies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 5f7fdf584..c9ae9b6db 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -620,7 +620,7 @@ def _get_windows_v10_key(browser_root, logger): if path is None: logger.error('could not find local state file') return None - with open(path, 'r') as f: + with open(path, 'r', encoding='utf8') as f: data = json.load(f) try: base64_key = data['os_crypt']['encrypted_key'] From 457f6d68668704c20debc40ca77768796656d98b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 22 Oct 2021 23:13:06 +0530 Subject: [PATCH 0116/2552] [vlive:channel] Fix extraction Based on https://github.com/ytdl-org/youtube-dl/pull/29866 Closes #749, #927, https://github.com/ytdl-org/youtube-dl/issues/29837 Authored by kikuyan, pukkandan --- yt_dlp/extractor/vlive.py | 219 +++++++++++++++++--------------------- 1 file changed, 99 insertions(+), 120 deletions(-) diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index 84f51a544..681d95902 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -17,17 +17,65 @@ from ..utils import ( strip_or_none, try_get, urlencode_postdata, + url_or_none, ) class VLiveBaseIE(NaverBaseIE): - _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' + _NETRC_MACHINE = 'vlive' + _logged_in = False + + def _real_initialize(self): + if not self._logged_in: + VLiveBaseIE._logged_in = self._login() + + def _login(self): + email, password = self._get_login_info() + if email is None: + return False + + LOGIN_URL = 'https://www.vlive.tv/auth/email/login' + self._request_webpage( + LOGIN_URL, None, note='Downloading login cookies') + + self._download_webpage( + LOGIN_URL, None, note='Logging in', + data=urlencode_postdata({'email': email, 'pwd': password}), + headers={ + 'Referer': LOGIN_URL, + 'Content-Type': 'application/x-www-form-urlencoded' + }) + + login_info = self._download_json( + 'https://www.vlive.tv/auth/loginInfo', None, + note='Checking login status', + headers={'Referer': 'https://www.vlive.tv/home'}) + + if not try_get(login_info, lambda x: x['message']['login'], bool): + raise ExtractorError('Unable to log in', expected=True) + return True + + def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None): + if note is None: + note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0] + query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'} + if fields: + query['fields'] = fields + if query_add: + query.update(query_add) + try: + return self._download_json( + 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id, + note, headers={'Referer': 'https://www.vlive.tv/'}, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message']) + raise class VLiveIE(VLiveBaseIE): IE_NAME = 'vlive' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P[0-9]+)' - _NETRC_MACHINE = 'vlive' _TESTS = [{ 'url': 'http://www.vlive.tv/video/1326', 'md5': 'cc7314812855ce56de70a06a27314983', @@ -81,53 +129,6 @@ class VLiveIE(VLiveBaseIE): 'playlist_mincount': 120 }] - def _real_initialize(self): - self._login() - - def _login(self): - email, password = self._get_login_info() - if None in (email, password): - return - - def is_logged_in(): - login_info = self._download_json( - 'https://www.vlive.tv/auth/loginInfo', None, - note='Downloading login info', - headers={'Referer': 'https://www.vlive.tv/home'}) - return try_get( - login_info, lambda x: x['message']['login'], bool) or False - - LOGIN_URL = 'https://www.vlive.tv/auth/email/login' - self._request_webpage( - LOGIN_URL, None, note='Downloading login cookies') - - self._download_webpage( - LOGIN_URL, None, note='Logging in', - data=urlencode_postdata({'email': email, 'pwd': password}), - headers={ - 'Referer': LOGIN_URL, - 'Content-Type': 'application/x-www-form-urlencoded' - }) - - if not is_logged_in(): - raise ExtractorError('Unable to log in', expected=True) - - def _call_api(self, path_template, video_id, fields=None, limit=None): - query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'} - if fields: - query['fields'] = fields - if limit: - query['limit'] = limit - try: - return self._download_json( - 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id, - 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0], - headers={'Referer': 'https://www.vlive.tv/'}, query=query) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message']) - raise - def _real_extract(self, url): video_id = self._match_id(url) @@ -150,7 +151,7 @@ class VLiveIE(VLiveBaseIE): playlist_count = str_or_none(playlist.get('totalCount')) playlist = self._call_api( - 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', limit=playlist_count) + 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count}) entries = [] for video_data in playlist['data']: @@ -216,7 +217,7 @@ class VLiveIE(VLiveBaseIE): raise ExtractorError('Unknown status ' + status) -class VLivePostIE(VLiveIE): +class VLivePostIE(VLiveBaseIE): IE_NAME = 'vlive:post' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P\d-\d+)' _TESTS = [{ @@ -238,8 +239,6 @@ class VLivePostIE(VLiveIE): 'playlist_count': 1, }] _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s' - _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo' - _INKEY_TMPL = _FVIDEO_TMPL % 'inKey' def _real_extract(self, url): post_id = self._match_id(url) @@ -266,7 +265,7 @@ class VLivePostIE(VLiveIE): entry = None if upload_type == 'SOS': download = self._call_api( - self._SOS_TMPL, video_id)['videoUrl']['download'] + self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download'] formats = [] for f_id, f_url in download.items(): formats.append({ @@ -284,7 +283,7 @@ class VLivePostIE(VLiveIE): vod_id = upload_info.get('videoId') if not vod_id: continue - inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey'] + inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey'] entry = self._extract_video_info(video_id, vod_id, inkey) if entry: entry['title'] = '%s_part%s' % (title, idx) @@ -295,7 +294,7 @@ class VLivePostIE(VLiveIE): class VLiveChannelIE(VLiveBaseIE): IE_NAME = 'vlive:channel' - _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P[0-9A-Z]+)' + _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P[0-9A-Z]+)(?:/board/(?P\d+))?' _TESTS = [{ 'url': 'http://channels.vlive.tv/FCD4B', 'info_dict': { @@ -306,78 +305,58 @@ class VLiveChannelIE(VLiveBaseIE): }, { 'url': 'https://www.vlive.tv/channel/FCD4B', 'only_matching': True, + }, { + 'url': 'https://www.vlive.tv/channel/FCD4B/board/3546', + 'info_dict': { + 'id': 'FCD4B-3546', + 'title': 'MAMAMOO - Star Board', + }, + 'playlist_mincount': 880 }] - def _call_api(self, path, channel_key_suffix, channel_value, note, query): - q = { - 'app_id': self._APP_ID, - 'channel' + channel_key_suffix: channel_value, - } - q.update(query) - return self._download_json( - 'http://api.vfan.vlive.tv/vproxy/channelplus/' + path, - channel_value, note='Downloading ' + note, query=q)['result'] - - def _real_extract(self, url): - channel_code = self._match_id(url) - - channel_seq = self._call_api( - 'decodeChannelCode', 'Code', channel_code, - 'decode channel code', {})['channelSeq'] - - channel_name = None - entries = [] + def _entries(self, posts_id, board_name): + if board_name: + posts_path = 'post/v1.0/board-%s/posts' + query_add = {'limit': 100, 'sortType': 'LATEST'} + else: + posts_path = 'post/v1.0/channel-%s/starPosts' + query_add = {'limit': 100} for page_num in itertools.count(1): video_list = self._call_api( - 'getChannelVideoList', 'Seq', channel_seq, - 'channel list page #%d' % page_num, { - # Large values of maxNumOfRows (~300 or above) may cause - # empty responses (see [1]), e.g. this happens for [2] that - # has more than 300 videos. - # 1. https://github.com/ytdl-org/youtube-dl/issues/13830 - # 2. http://channels.vlive.tv/EDBF. - 'maxNumOfRows': 100, - 'pageNo': page_num - } - ) - - if not channel_name: - channel_name = try_get( - video_list, - lambda x: x['channelInfo']['channelName'], - compat_str) + posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add, + note=f'Downloading playlist page {page_num}') + + for video in try_get(video_list, lambda x: x['data'], list) or []: + video_id = str(video.get('postId')) + video_title = str_or_none(video.get('title')) + video_url = url_or_none(video.get('url')) + if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO': + continue + channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str) + yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name) - videos = try_get( - video_list, lambda x: x['videoList'], list) - if not videos: + after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str) + if not after: break + query_add['after'] = after + + def _real_extract(self, url): + channel_id, posts_id = self._match_valid_url(url).groups() - for video in videos: - video_id = video.get('videoSeq') - video_type = video.get('videoType') + board_name = None + if posts_id: + board = self._call_api( + 'board/v1.0/board-%s', posts_id, 'title,boardType') + board_name = board.get('title') or 'Unknown' + if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'): + raise ExtractorError(f'Board {board_name!r} is not supported', expected=True) - if not video_id or not video_type: - continue - video_id = compat_str(video_id) - - if video_type in ('PLAYLIST'): - first_video_id = try_get( - video, - lambda x: x['videoPlaylist']['videoList'][0]['videoSeq'], int) - - if not first_video_id: - continue - - entries.append( - self.url_result( - 'http://www.vlive.tv/video/%s' % first_video_id, - ie=VLiveIE.ie_key(), video_id=first_video_id)) - else: - entries.append( - self.url_result( - 'http://www.vlive.tv/video/%s' % video_id, - ie=VLiveIE.ie_key(), video_id=video_id)) + entries = self._entries(posts_id or channel_id, board_name) + first_video = next(entries) + channel_name = first_video['channel'] return self.playlist_result( - entries, channel_code, channel_name) + itertools.chain([first_video], entries), + f'{channel_id}-{posts_id}' if posts_id else channel_id, + f'{channel_name} - {board_name}' if channel_name and board_name else channel_name) From 49a57e70a9105dfe1671e96bef24663bce5b563d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Oct 2021 02:07:20 +0530 Subject: [PATCH 0117/2552] [cleanup] misc --- .../ISSUE_TEMPLATE/2_site_support_request.yml | 2 +- .../ISSUE_TEMPLATE/3_site_feature_request.yml | 12 ++++ .github/ISSUE_TEMPLATE/6_question.yml | 4 +- .../2_site_support_request.yml | 2 +- .../3_site_feature_request.yml | 12 ++++ .github/ISSUE_TEMPLATE_tmpl/6_question.yml | 4 +- .github/workflows/build.yml | 16 +++-- .github/workflows/quick-test.yml | 2 +- README.md | 21 +++--- devscripts/make_lazy_extractors.py | 2 +- pyinst.py | 7 +- yt_dlp/YoutubeDL.py | 72 ++++++++++--------- yt_dlp/__init__.py | 2 +- yt_dlp/downloader/fragment.py | 3 +- yt_dlp/extractor/common.py | 6 +- yt_dlp/extractor/soundcloud.py | 2 +- yt_dlp/extractor/youtube.py | 4 +- 17 files changed, 104 insertions(+), 69 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index c0a22ac2b..f8ca606c7 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -33,7 +33,7 @@ body: attributes: label: Example URLs description: | - Provide all kinds of example URLs, support for which should be included. Replace following example URLs by yours + Provide all kinds of example URLs for which support should be added value: | - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 44c8a0816..a986df363 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -13,6 +13,8 @@ body: required: true - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) @@ -24,6 +26,16 @@ body: label: Region description: "Enter the region the site is accessible from" placeholder: "India" + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Example URLs that can be used to demonstrate the requested feature + value: | + https://www.youtube.com/watch?v=BaW_jenozKc + validations: + required: true - type: textarea id: description attributes: diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index c101c2286..a6e5fa80d 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -11,11 +11,11 @@ body: options: - label: I'm asking a question and not reporting a bug/feature request required: true - - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp) + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues) for similar questions including closed ones + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones required: true - type: textarea id: question diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml index be6427ce1..f7a48edc7 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -33,7 +33,7 @@ body: attributes: label: Example URLs description: | - Provide all kinds of example URLs, support for which should be included. Replace following example URLs by yours + Provide all kinds of example URLs for which support should be added value: | - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml index f19d958c6..09b98a9ec 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml @@ -13,6 +13,8 @@ body: required: true - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) @@ -24,6 +26,16 @@ body: label: Region description: "Enter the region the site is accessible from" placeholder: "India" + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Example URLs that can be used to demonstrate the requested feature + value: | + https://www.youtube.com/watch?v=BaW_jenozKc + validations: + required: true - type: textarea id: description attributes: diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml index c101c2286..a6e5fa80d 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml @@ -11,11 +11,11 @@ body: options: - label: I'm asking a question and not reporting a bug/feature request required: true - - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp) + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues) for similar questions including closed ones + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones required: true - type: textarea id: question diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3082884aa..3329c141f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,7 +53,7 @@ jobs: - name: Build lazy extractors id: lazy_extractors - run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: python devscripts/make_lazy_extractors.py - name: Run Make run: make all tar - name: Get SHA2-256SUMS for yt-dlp @@ -115,10 +115,12 @@ jobs: release_name: yt-dlp ${{ steps.bump_version.outputs.ytdlp_version }} commitish: ${{ steps.push_update.outputs.head_sha }} body: | - See [this](https://github.com/yt-dlp/yt-dlp#release-files) for a description of the files - - #### Changelog: + ### Changelog: ${{ env.changelog }} + + --- + + ### See [this](https://github.com/yt-dlp/yt-dlp#release-files) for a description of the release files draft: false prerelease: false - name: Upload yt-dlp Unix binary @@ -162,7 +164,7 @@ jobs: run: /usr/bin/python3 devscripts/update-version.py - name: Build lazy extractors id: lazy_extractors - run: /usr/bin/python3 devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: /usr/bin/python3 devscripts/make_lazy_extractors.py - name: Run PyInstaller Script run: /usr/bin/python3 pyinst.py --target-architecture universal2 --onefile - name: Upload yt-dlp MacOS binary @@ -233,7 +235,7 @@ jobs: run: python devscripts/update-version.py - name: Build lazy extractors id: lazy_extractors - run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: python devscripts/make_lazy_extractors.py - name: Run PyInstaller Script run: python pyinst.py - name: Upload yt-dlp.exe Windows binary @@ -320,7 +322,7 @@ jobs: run: python devscripts/update-version.py - name: Build lazy extractors id: lazy_extractors - run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: python devscripts/make_lazy_extractors.py - name: Run PyInstaller Script for 32 Bit run: python pyinst.py - name: Upload Executable yt-dlp_x86.exe diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index bbad209b3..d8e14f470 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -28,6 +28,6 @@ jobs: - name: Install flake8 run: pip install flake8 - name: Make lazy extractors - run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: python devscripts/make_lazy_extractors.py - name: Run flake8 run: flake8 . diff --git a/README.md b/README.md index 3ca308f87..713e6e534 100644 --- a/README.md +++ b/README.md @@ -205,7 +205,7 @@ File|Description :---|:--- [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary -[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable (No auto-update) [yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable (No auto-update) @@ -248,11 +248,10 @@ The windows releases are already built with the python interpreter, mutagen, pyc ### COMPILE **For Windows**: -To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets) - -Once you have all the necessary dependencies installed, just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. +To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. py -m pip install -U pyinstaller -r requirements.txt + py devscripts/make_lazy_extractors.py py pyinst.py Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment @@ -261,7 +260,7 @@ Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstal You will need the required build tools: `python`, `make` (GNU), `pandoc`, `zip`, `pytest` Then simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files -**Note**: In either platform, `devscripts\update-version.py` can be used to automatically update the version number +**Note**: In either platform, `devscripts/update-version.py` can be used to automatically update the version number # USAGE AND OPTIONS @@ -1156,11 +1155,13 @@ Available only in `--sponsorblock-chapter-title`: - `category_names` (list): Friendly names of the categories - `name` (string): Friendly name of the smallest category -Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). +Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. + +Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). -For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. +**Tip**: Look at the `-j` output to identify which fields are available for the purticular URL -For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. +For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. @@ -1309,7 +1310,7 @@ The available fields are: - `width`: Width of video - `res`: Video resolution, calculated as the smallest dimension. - `fps`: Framerate of video - - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `SDR`) + - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`) - `tbr`: Total average bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s - `abr`: Average audio bitrate in KBit/s @@ -1627,6 +1628,8 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: See the public functions in [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py) for other available functions. Eg: `ydl.download`, `ydl.download_with_info_file` +**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above + # DEPRECATED OPTIONS diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 427045b98..0411df76b 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -9,7 +9,7 @@ import sys sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) -lazy_extractors_filename = sys.argv[1] +lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py' if os.path.exists(lazy_extractors_filename): os.remove(lazy_extractors_filename) diff --git a/pyinst.py b/pyinst.py index 0a695289b..c7ef2761b 100644 --- a/pyinst.py +++ b/pyinst.py @@ -24,16 +24,15 @@ def main(): opts = parse_options() version = read_version() - suffix = '_x86' if ARCH == '32' else '_macos' if OS_NAME == 'Darwin' else '' + suffix = '_macos' if OS_NAME == 'Darwin' else '_x86' if ARCH == '32' else '' final_file = 'dist/%syt-dlp%s%s' % ( 'yt-dlp/' if '--onedir' in opts else '', suffix, '.exe' if OS_NAME == 'Windows' else '') print(f'Building yt-dlp v{version} {ARCH}bit for {OS_NAME} with options {opts}') - print('Remember to update the version using "devscripts/update-version.py"') + print('Remember to update the version using "devscripts/update-version.py"') if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'): print('WARNING: Building without lazy_extractors. Run ' - '"devscripts/make_lazy_extractors.py" "yt_dlp/extractor/lazy_extractors.py" ' - 'to build lazy extractors', file=sys.stderr) + '"devscripts/make_lazy_extractors.py" to build lazy extractors', file=sys.stderr) print(f'Destination: {final_file}\n') opts = [ diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index f95bbea81..0ac1f1c61 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -307,7 +307,7 @@ class YoutubeDL(object): cookiefile: File name where cookies should be read from and dumped to cookiesfrombrowser: A tuple containing the name of the browser and the profile name/path from where cookies are loaded. - Eg: ('chrome', ) or (vivaldi, 'default') + Eg: ('chrome', ) or ('vivaldi', 'default') nocheckcertificate:Do not verify SSL certificates prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. @@ -503,7 +503,7 @@ class YoutubeDL(object): def __init__(self, params=None, auto_init=True): """Create a FileDownloader object with the given options. @param auto_init Whether to load the default extractors and print header (if verbose). - Set to 'no_verbose_header' to not ptint the header + Set to 'no_verbose_header' to not print the header """ if params is None: params = {} @@ -551,7 +551,7 @@ class YoutubeDL(object): check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') - for msg in self.params.get('warnings', []): + for msg in self.params.get('_warnings', []): self.report_warning(msg) if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: @@ -584,7 +584,9 @@ class YoutubeDL(object): self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: - self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') + self.report_warning( + 'Could not find fribidi executable, ignoring --bidi-workaround. ' + 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise @@ -631,7 +633,7 @@ class YoutubeDL(object): """Preload the archive, if any is specified""" if fn is None: return False - self.write_debug('Loading archive file %r\n' % fn) + self.write_debug(f'Loading archive file {fn!r}') try: with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: @@ -658,7 +660,7 @@ class YoutubeDL(object): ) self.report_warning( 'Long argument string detected. ' - 'Use -- to separate parameters and URLs, like this:\n%s\n' % + 'Use -- to separate parameters and URLs, like this:\n%s' % args_to_str(correct_argv)) def add_info_extractor(self, ie): @@ -1550,7 +1552,7 @@ class YoutubeDL(object): playlistitems = list(range(playliststart, playliststart + n_entries)) ie_result['requested_entries'] = playlistitems - if self.params.get('allow_playlist_files', True): + if not self.params.get('simulate') and self.params.get('allow_playlist_files', True): ie_copy = { 'playlist': playlist, 'playlist_id': ie_result.get('id'), @@ -1558,6 +1560,7 @@ class YoutubeDL(object): 'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_index': 0, + 'n_entries': n_entries, } ie_copy.update(dict(ie_result)) @@ -1883,6 +1886,7 @@ class YoutubeDL(object): 'height': the_only_video.get('height'), 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), 'fps': the_only_video.get('fps'), + 'dynamic_range': the_only_video.get('dynamic_range'), 'vcodec': the_only_video.get('vcodec'), 'vbr': the_only_video.get('vbr'), 'stretched_ratio': the_only_video.get('stretched_ratio'), @@ -2381,7 +2385,7 @@ class YoutubeDL(object): new_info['__original_infodict'] = info_dict new_info.update(fmt) self.process_info(new_info) - # We update the info dict with the best quality format (backwards compatibility) + # We update the info dict with the selected best quality format (backwards compatibility) if formats_to_download: info_dict.update(formats_to_download[-1]) return info_dict @@ -3250,35 +3254,40 @@ class YoutubeDL(object): def print_debug_header(self): if not self.params.get('verbose'): return - get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) - encoding_str = ( - '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % ( - locale.getpreferredencoding(), - sys.getfilesystemencoding(), - get_encoding(self._screen_file), get_encoding(self._err_file), - self.get_encoding())) + + def get_encoding(stream): + ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) + if not supports_terminal_sequences(stream): + ret += ' (No ANSI)' + return ret + + encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % ( + locale.getpreferredencoding(), + sys.getfilesystemencoding(), + get_encoding(self._screen_file), get_encoding(self._err_file), + self.get_encoding()) logger = self.params.get('logger') if logger: write_debug = lambda msg: logger.debug(f'[debug] {msg}') write_debug(encoding_str) else: - write_debug = lambda msg: self._write_string(f'[debug] {msg}') - write_string(encoding_str, encoding=None) + write_string(f'[debug] {encoding_str}', encoding=None) + write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') source = detect_variant() - write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) + write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})')) if not _LAZY_LOADER: if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): - write_debug('Lazy loading extractors is forcibly disabled\n') + write_debug('Lazy loading extractors is forcibly disabled') else: - write_debug('Lazy loading extractors is disabled\n') + write_debug('Lazy loading extractors is disabled') if plugin_extractors or plugin_postprocessors: - write_debug('Plugins: %s\n' % [ + write_debug('Plugins: %s' % [ '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params.get('compat_opts'): - write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) + write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts'))) try: sp = Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -3287,7 +3296,7 @@ class YoutubeDL(object): out, err = sp.communicate_or_kill() out = out.decode().strip() if re.match('[0-9a-f]+', out): - write_debug('Git HEAD: %s\n' % out) + write_debug('Git HEAD: %s' % out) except Exception: try: sys.exc_clear() @@ -3300,7 +3309,7 @@ class YoutubeDL(object): return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] return impl_name - write_debug('Python version %s (%s %s) - %s\n' % ( + write_debug('Python version %s (%s %s) - %s' % ( platform.python_version(), python_implementation(), platform.architecture()[0], @@ -3312,7 +3321,7 @@ class YoutubeDL(object): exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' - write_debug('exe versions: %s\n' % exe_str) + write_debug('exe versions: %s' % exe_str) from .downloader.websocket import has_websockets from .postprocessor.embedthumbnail import has_mutagen @@ -3325,21 +3334,18 @@ class YoutubeDL(object): SQLITE_AVAILABLE and 'sqlite', KEYRING_AVAILABLE and 'keyring', )))) or 'none' - write_debug('Optional libraries: %s\n' % lib_str) - write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % ( - supports_terminal_sequences(self._screen_file), - supports_terminal_sequences(self._err_file))) + write_debug('Optional libraries: %s' % lib_str) proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) - write_debug('Proxy map: ' + compat_str(proxy_map) + '\n') + write_debug(f'Proxy map: {proxy_map}') - if self.params.get('call_home', False): + # Not implemented + if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') - write_debug('Public IP address: %s\n' % ipaddr) - return + write_debug('Public IP address: %s' % ipaddr) latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode('utf-8') if version_tuple(latest_version) > version_tuple(__version__): diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index d97d4af64..e1c45441a 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -745,7 +745,7 @@ def _real_main(argv=None): 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, - 'warnings': warnings, + '_warnings': warnings, 'compat_opts': compat_opts, } diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index c345f3148..a9d1471f8 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -370,7 +370,8 @@ class FragmentFD(FileDownloader): if max_progress == 1: return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) max_workers = self.params.get('concurrent_fragment_downloads', max_progress) - self._prepare_multiline_status(max_progress) + if max_progress > 1: + self._prepare_multiline_status(max_progress) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e00d8c42b..22b1ed69a 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -448,7 +448,9 @@ class InfoExtractor(object): } def __init__(self, downloader=None): - """Constructor. Receives an optional downloader.""" + """Constructor. Receives an optional downloader (a YoutubeDL instance). + If a downloader is not passed during initialization, + it must be set using "set_downloader()" before "extract()" is called""" self._ready = False self._x_forwarded_for_ip = None self._printed_messages = set() @@ -664,7 +666,7 @@ class InfoExtractor(object): See _download_webpage docstring for arguments specification. """ if not self._downloader._first_webpage_request: - sleep_interval = float_or_none(self.get_param('sleep_interval_requests')) or 0 + sleep_interval = self.get_param('sleep_interval_requests') or 0 if sleep_interval > 0: self.to_screen('Sleeping %s seconds ...' % sleep_interval) time.sleep(sleep_interval) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index e89383ff1..412331e17 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -855,7 +855,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): IE_NAME = 'soundcloud:search' - IE_DESC = 'Soundcloud search' + IE_DESC = 'Soundcloud search, "scsearch" keyword' _MAX_RESULTS = float('inf') _TESTS = [{ 'url': 'scsearch15:post-avant jazzcore', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index aa58a22bf..54f5ef15c 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4304,9 +4304,7 @@ class YoutubePlaylistIE(InfoExtractor): def suitable(cls, url): if YoutubeTabIE.suitable(url): return False - # Hack for lazy extractors until more generic solution is implemented - # (see #28780) - from .youtube import parse_qs + from ..utils import parse_qs qs = parse_qs(url) if qs.get('v', [None])[0]: return False From 0676afb12609b4d457b9626215eea38bab40f2dc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Oct 2021 02:09:15 +0530 Subject: [PATCH 0118/2552] Release 2021.10.22 --- CONTRIBUTORS | 4 +++ Changelog.md | 78 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +-- supportedsites.md | 14 +++++++-- 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 048d98852..2bf96affe 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -125,3 +125,7 @@ jfogelman timethrow sarnoud Bojidarist +18928172992817182/gustaf +nixklai +smplayer-dev +Zirro diff --git a/Changelog.md b/Changelog.md index 2e6da33fb..6dbc13bd7 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,84 @@ --> +### 2021.10.22 + +* [build] Improvements + * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev) + * Release windows exe built with `py2exe` + * Enable lazy-extractors in releases. + * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental) + * Clean up error reporting in update + * Refactor `pyinst.py`, misc cleanup and improve docs +* [docs] Migrate issues to use forms by [Ashish0804](https://github.com/Ashish0804) +* [downloader] **Fix slow progress hooks** + * This was causing HLS/DASH downloads to be extremely slow in some situations +* [downloader/ffmpeg] Improve simultaneous download and merge +* [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key +* [ModifyChapters] Add ability for `--remove-chapters` to remove sections by timestamp +* [utils] Allow duration strings in `--match-filter` +* Add HDR information to formats +* Add negative option `--no-batch-file` by [Zirro](https://github.com/Zirro) +* Calculate more fields for merged formats +* Do not verify thumbnail URLs unless `--check-formats` is specified +* Don't create console for subprocesses on Windows +* Fix `--restrict-filename` when used with default template +* Fix `check_formats` output being written to stdout when `-qv` +* Fix bug in storyboards +* Fix conflict b/w id and ext in format selection +* Fix verbose head not showing custom configs +* Load archive only after printing verbose head +* Make `duration_string` and `resolution` available in --match-filter +* Re-implement deprecated option `--id` +* Reduce default `--socket-timeout` +* Write verbose header to logger +* [outtmpl] Fix bug in expanding environment variables +* [cookies] Local State should be opened as utf-8 +* [extractor,utils] Detect more codecs/mimetypes +* [extractor] Detect `EXT-X-KEY` Apple FairPlay +* [utils] Use `importlib` to load plugins by [sulyi](https://github.com/sulyi) +* [http] Retry on socket timeout and show the last encountered error +* [fragment] Print error message when skipping fragment +* [aria2c] Fix `--skip-unavailable-fragment` +* [SponsorBlock] Obey `extractor-retries` and `sleep-requests` +* [Merger] Do not add `aac_adtstoasc` to non-hls audio +* [ModifyChapters] Do not mutate original chapters by [nihil-admirari](https://github.com/nihil-admirari) +* [devscripts/run_tests] Use markers to filter tests by [sulyi](https://github.com/sulyi) +* [7plus] Add cookie based authentication by [nyuszika7h](https://github.com/nyuszika7h) +* [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman) +* [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [crunchyroll] Add season to flat-playlist Closes #1319 +* [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code +* [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804) +* [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [hidive] Fix typo +* [Hotstar] Mention Dynamic Range in `format_id` by [Ashish0804](https://github.com/Ashish0804) +* [Hotstar] Raise appropriate error for DRM +* [instagram] Add login by [u-spec-png](https://github.com/u-spec-png) +* [instagram] Show appropriate error when login is needed +* [microsoftstream] Add extractor by [damianoamatruda](https://github.com/damianoamatruda), [nixklai](https://github.com/nixklai) +* [on24] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) +* [patreon] Fix vimeo player regex by [zenerdi0de](https://github.com/zenerdi0de) +* [SkyNewsAU] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [tagesschau] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [tbs] Add tbs live streams by [llacb47](https://github.com/llacb47) +* [tiktok] Fix typo and update tests +* [trovo] Support channel clips and VODs by [Ashish0804](https://github.com/Ashish0804) +* [Viafree] Add support for Finland by [18928172992817182](https://github.com/18928172992817182) +* [vimeo] Fix embedded `player.vimeo` +* [vlive:channel] Fix extraction by [kikuyan](https://github.com/kikuyan), [pukkandan](https://github.com/pukkandan) +* [youtube] Add auto-translated subtitles +* [youtube] Expose different formats with same itag +* [youtube:comments] Fix for new layout by [coletdjnz](https://github.com/coletdjnz) +* [cleanup] Cleanup bilibili code by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png) +* [cleanup] Remove broken youtube login code +* [cleanup] Standardize timestamp formatting code +* [cleanup] Generalize `getcomments` implementation for extractors +* [cleanup] Simplify search extractors code +* [cleanup] misc + + ### 2021.10.10 * [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor` diff --git a/README.md b/README.md index 713e6e534..6e773412d 100644 --- a/README.md +++ b/README.md @@ -93,9 +93,9 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload, NovaPlay +* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload, NovaPlay, SkyNewsAU, EUScreen, Gronkh, microsoftstream, on24, trovo channels -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme, francetv +* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme, francetv, 7plus, tagesschau * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details diff --git a/supportedsites.md b/supportedsites.md index 02be6b918..616151db8 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -226,7 +226,9 @@ - **Crackle** - **CrooksAndLiars** - **crunchyroll** + - **crunchyroll:beta** - **crunchyroll:playlist** + - **crunchyroll:playlist:beta** - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 - **CTV** @@ -315,6 +317,7 @@ - **ESPNArticle** - **EsriVideo** - **Europa** + - **EUScreen** - **EWETV** - **ExpoTV** - **Expressen** @@ -394,6 +397,7 @@ - **Goshgay** - **GoToStage** - **GPUTechConf** + - **Gronkh** - **Groupon** - **hbo** - **HearThisAt** @@ -570,6 +574,7 @@ - **Mgoon** - **MGTV**: 芒果TV - **MiaoPai** + - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom - **mildom:user:vod**: Download all VODs from specific user in Mildom - **mildom:vod**: Download a VOD in Mildom @@ -734,6 +739,7 @@ - **Odnoklassniki** - **OktoberfestTV** - **OlympicsReplay** + - **on24**: ON24 - **OnDemandKorea** - **onet.pl** - **onet.tv** @@ -961,6 +967,7 @@ - **SkylineWebcams** - **skynewsarabia:article** - **skynewsarabia:video** + - **SkyNewsAU** - **Slideshare** - **SlidesLive** - **Slutload** @@ -970,7 +977,7 @@ - **SonyLIVSeries** - **soundcloud** - **soundcloud:playlist** - - **soundcloud:search**: Soundcloud search + - **soundcloud:search**: Soundcloud search, "scsearch" keyword - **soundcloud:set** - **soundcloud:trackstation** - **soundcloud:user** @@ -1029,7 +1036,6 @@ - **SztvHu** - **t-online.de** - **Tagesschau** - - **tagesschau:player** - **Tass** - **TBS** - **TDSLifeway** @@ -1089,6 +1095,8 @@ - **TrailerAddict** (Currently broken) - **Trilulilu** - **Trovo** + - **TrovoChannelClip**: All Clips of a trovo.live channel, "trovoclip" keyword + - **TrovoChannelVod**: All VODs of a trovo.live channel, "trovovod" keyword - **TrovoVod** - **TruNews** - **TruTV** @@ -1193,7 +1201,7 @@ - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video - - **video.google:search**: Google Video search + - **video.google:search**: Google Video search (Currently broken) - **video.sky.it** - **video.sky.it:live** - **VideoDetective** From 1117579b9457f8fbf7a4d7433a92b67ac802bdea Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 22 Oct 2021 20:47:18 +0000 Subject: [PATCH 0119/2552] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 2a492d132..862e7235f 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.10 (exe) + [debug] yt-dlp version 2021.10.22 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.10) + yt-dlp is up to date (2021.10.22) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index f8ca606c7..aa00b8ad7 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.10 (exe) + [debug] yt-dlp version 2021.10.22 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.10) + yt-dlp is up to date (2021.10.22) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index a986df363..59578b712 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 1c609cab1..9003bb19a 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.10 (exe) + [debug] yt-dlp version 2021.10.22 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.10) + yt-dlp is up to date (2021.10.22) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index d839df95d..134416f4e 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 83b6fea9f..e7203be6b 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.10.10' +__version__ = '2021.10.22' From 93c7f3398dd2e45fdb2c32b49ff169c46eadfbda Mon Sep 17 00:00:00 2001 From: Alf Marius Date: Sat, 23 Oct 2021 00:52:01 +0200 Subject: [PATCH 0120/2552] [Nrk] See desc (#1382) * Endpoint has changed. Currently the old one redirects to the new one, but this may change * Descriptions use \r instead of \n. So translate it Authored by: fractalf --- yt_dlp/extractor/nrk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index b556bc6aa..49d58a685 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -147,7 +147,7 @@ class NRKIE(NRKBaseIE): def _real_extract(self, url): video_id = self._match_id(url).split('/')[-1] - path_templ = 'playback/%s/' + video_id + path_templ = 'playback/%s/program/' + video_id def call_playback_api(item, query=None): return self._call_api(path_templ % item, video_id, item, query=query) @@ -188,7 +188,7 @@ class NRKIE(NRKBaseIE): title = titles['title'] alt_title = titles.get('subtitle') - description = preplay.get('description') + description = try_get(preplay, lambda x: x['description'].replace('\r', '\n')) duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration')) thumbnails = [] From ec11a9f4a26e8225b195e5f91bd0b72b008d0c3a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 20 Oct 2021 22:07:32 +0530 Subject: [PATCH 0121/2552] [minicurses] Add more colors --- yt_dlp/YoutubeDL.py | 95 +++++++++++++++++++++++++++----------- yt_dlp/extractor/common.py | 2 +- yt_dlp/minicurses.py | 78 ++++++++++++++++++++++++++++--- yt_dlp/utils.py | 33 +++++++------ 4 files changed, 161 insertions(+), 47 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 0ac1f1c61..a3fb3faeb 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -28,6 +28,7 @@ import traceback import random import unicodedata +from enum import Enum from string import ascii_letters from .compat import ( @@ -81,6 +82,7 @@ from .utils import ( make_HTTPS_handler, MaxDownloadsReached, network_exceptions, + number_of_digits, orderedSet, OUTTMPL_TYPES, PagedList, @@ -107,7 +109,6 @@ from .utils import ( strftime_or_none, subtitles_filename, supports_terminal_sequences, - TERMINAL_SEQUENCES, ThrottledDownload, to_high_limit_path, traverse_obj, @@ -123,6 +124,7 @@ from .utils import ( YoutubeDLRedirectHandler, ) from .cache import Cache +from .minicurses import format_text from .extractor import ( gen_extractor_classes, get_info_extractor, @@ -524,7 +526,10 @@ class YoutubeDL(object): windows_enable_vt_mode() # FIXME: This will break if we ever print color to stdout - self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file) + self._allow_colors = { + 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file), + 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file), + } if sys.version_info < (3, 6): self.report_warning( @@ -532,10 +537,10 @@ class YoutubeDL(object): if self.params.get('allow_unplayable_formats'): self.report_warning( - f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. ' + f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. ' 'This is a developer option intended for debugging. \n' ' If you experience any issues while using this option, ' - f'{self._color_text("DO NOT", "red")} open a bug report') + f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: @@ -554,6 +559,9 @@ class YoutubeDL(object): for msg in self.params.get('_warnings', []): self.report_warning(msg) + if 'list-formats' in self.params.get('compat_opts', []): + self.params['listformats_table'] = False + if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: # nooverwrites was unnecessarily changed to overwrites # in 0c3d0f51778b153f65c21906031c2e091fcfb641 @@ -826,10 +834,32 @@ class YoutubeDL(object): self.to_stdout( message, skip_eol, quiet=self.params.get('quiet', False)) - def _color_text(self, text, color): - if self.params.get('no_color'): - return text - return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}' + class Styles(Enum): + HEADERS = 'yellow' + EMPHASIS = 'blue' + ID = 'green' + DELIM = 'blue' + ERROR = 'red' + WARNING = 'yellow' + + def __format_text(self, out, text, f, fallback=None, *, test_encoding=False): + assert out in ('screen', 'err') + if test_encoding: + original_text = text + handle = self._screen_file if out == 'screen' else self._err_file + encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii') + text = text.encode(encoding, 'ignore').decode(encoding) + if fallback is not None and text != original_text: + text = fallback + if isinstance(f, self.Styles): + f = f._value_ + return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback + + def _format_screen(self, *args, **kwargs): + return self.__format_text('screen', *args, **kwargs) + + def _format_err(self, *args, **kwargs): + return self.__format_text('err', *args, **kwargs) def report_warning(self, message, only_once=False): ''' @@ -841,14 +871,14 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once) + self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once) def report_error(self, message, tb=None): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb) + self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb) def write_debug(self, message, only_once=False): '''Log debug message or Print message to stderr''' @@ -977,8 +1007,8 @@ class YoutubeDL(object): # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { - 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')), - 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')), + 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0), + 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), 'autonumber': self.params.get('autonumber_size') or 5, } @@ -3167,38 +3197,46 @@ class YoutubeDL(object): res += '~' + format_bytes(fdict['filesize_approx']) return res + def _list_format_headers(self, *headers): + if self.params.get('listformats_table', True) is not False: + return [self._format_screen(header, self.Styles.HEADERS) for header in headers] + return headers + def list_formats(self, info_dict): formats = info_dict.get('formats', [info_dict]) - new_format = ( - 'list-formats' not in self.params.get('compat_opts', []) - and self.params.get('listformats_table', True) is not False) + new_format = self.params.get('listformats_table', True) is not False if new_format: + tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats)) + vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats)) + abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats)) + delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) table = [ [ - format_field(f, 'format_id'), + self._format_screen(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), self.format_resolution(f), format_field(f, 'fps', '%d'), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), - '|', + delim, format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes), - format_field(f, 'tbr', '%4dk'), + format_field(f, 'tbr', f'%{tbr_digits}dk'), shorten_protocol_name(f.get('protocol', '').replace("native", "n")), - '|', + delim, format_field(f, 'vcodec', default='unknown').replace('none', ''), - format_field(f, 'vbr', '%4dk'), + format_field(f, 'vbr', f'%{vbr_digits}dk'), format_field(f, 'acodec', default='unknown').replace('none', ''), - format_field(f, 'abr', '%3dk'), + format_field(f, 'abr', f'%{abr_digits}dk'), format_field(f, 'asr', '%5dHz'), ', '.join(filter(None, ( - 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '', + self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else '', format_field(f, 'language', '[%s]'), format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), ))), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] - header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO', - '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO'] + header_line = self._list_format_headers( + 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO', + delim, 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO') else: table = [ [ @@ -3213,7 +3251,10 @@ class YoutubeDL(object): self.to_screen( '[info] Available formats for %s:' % info_dict['id']) self.to_stdout(render_table( - header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format)) + header_line, table, + extraGap=(0 if new_format else 1), + hideEmpty=new_format, + delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))) def list_thumbnails(self, info_dict): thumbnails = list(info_dict.get('thumbnails')) @@ -3224,7 +3265,7 @@ class YoutubeDL(object): self.to_screen( '[info] Thumbnails for %s:' % info_dict['id']) self.to_stdout(render_table( - ['ID', 'width', 'height', 'URL'], + self._list_format_headers('ID', 'Width', 'Height', 'URL'), [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) def list_subtitles(self, video_id, subtitles, name='subtitles'): @@ -3241,7 +3282,7 @@ class YoutubeDL(object): return [lang, ', '.join(names), ', '.join(exts)] self.to_stdout(render_table( - ['Language', 'Name', 'Formats'], + self._list_format_headers('Language', 'Name', 'Formats'), [_row(lang, formats) for lang, formats in subtitles.items()], hideEmpty=True)) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 22b1ed69a..d1d1b46fc 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1139,7 +1139,7 @@ class InfoExtractor(object): if mobj: break - _name = self._downloader._color_text(name, 'blue') + _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS) if mobj: if group is None: diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index a6e159a14..38fdb5bc6 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,6 +1,72 @@ import functools from threading import Lock -from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES, write_string +from .utils import supports_terminal_sequences, write_string + + +CONTROL_SEQUENCES = { + 'DOWN': '\n', + 'UP': '\033[A', + 'ERASE_LINE': '\033[K', + 'RESET': '\033[0m', +} + + +_COLORS = { + 'BLACK': '0', + 'RED': '1', + 'GREEN': '2', + 'YELLOW': '3', + 'BLUE': '4', + 'PURPLE': '5', + 'CYAN': '6', + 'WHITE': '7', +} + + +_TEXT_STYLES = { + 'NORMAL': '0', + 'BOLD': '1', + 'UNDERLINED': '4', +} + + +def format_text(text, f): + f = f.upper() + tokens = f.strip().split() + + bg_color = '' + if 'ON' in tokens: + if tokens[-1] == 'ON': + raise SyntaxError(f'Empty background format specified in {f!r}') + if tokens[-1] not in _COLORS: + raise SyntaxError(f'{tokens[-1]} in {f!r} must be a color') + bg_color = f'4{_COLORS[tokens.pop()]}' + if tokens[-1] == 'LIGHT': + bg_color = f'0;10{bg_color[1:]}' + tokens.pop() + if tokens[-1] != 'ON': + raise SyntaxError(f'Invalid format {f.split(" ON ", 1)[1]!r} in {f!r}') + bg_color = f'\033[{bg_color}m' + tokens.pop() + + if not tokens: + fg_color = '' + elif tokens[-1] not in _COLORS: + raise SyntaxError(f'{tokens[-1]} in {f!r} must be a color') + else: + fg_color = f'3{_COLORS[tokens.pop()]}' + if tokens and tokens[-1] == 'LIGHT': + fg_color = f'9{fg_color[1:]}' + tokens.pop() + fg_style = tokens.pop() if tokens and tokens[-1] in _TEXT_STYLES else 'NORMAL' + fg_color = f'\033[{_TEXT_STYLES[fg_style]};{fg_color}m' + if tokens: + raise SyntaxError(f'Invalid format {" ".join(tokens)!r} in {f!r}') + + if fg_color or bg_color: + return f'{fg_color}{bg_color}{text}{CONTROL_SEQUENCES["RESET"]}' + else: + return text class MultilinePrinterBase: @@ -67,15 +133,15 @@ class MultilinePrinter(MultilinePrinterBase): yield '\r' distance = dest - current if distance < 0: - yield TERMINAL_SEQUENCES['UP'] * -distance + yield CONTROL_SEQUENCES['UP'] * -distance elif distance > 0: - yield TERMINAL_SEQUENCES['DOWN'] * distance + yield CONTROL_SEQUENCES['DOWN'] * distance self._lastline = dest @lock def print_at_line(self, text, pos): if self._HAVE_FULLCAP: - self.write(*self._move_cursor(pos), TERMINAL_SEQUENCES['ERASE_LINE'], text) + self.write(*self._move_cursor(pos), CONTROL_SEQUENCES['ERASE_LINE'], text) text = self._add_line_number(text, pos) textlen = len(text) @@ -103,7 +169,7 @@ class MultilinePrinter(MultilinePrinterBase): if self._HAVE_FULLCAP: self.write( - *text, TERMINAL_SEQUENCES['ERASE_LINE'], - f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum) + *text, CONTROL_SEQUENCES['ERASE_LINE'], + f'{CONTROL_SEQUENCES["UP"]}{CONTROL_SEQUENCES["ERASE_LINE"]}' * self.maximum) else: self.write(*text, ' ' * self._lastlength) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index e05677d08..08f9a5dc9 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4748,9 +4748,11 @@ def determine_protocol(info_dict): def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): """ Render a list of rows, each as a list of values """ + def width(string): + return len(remove_terminal_sequences(string)) def get_max_lens(table): - return [max(len(compat_str(v)) for v in col) for col in zip(*table)] + return [max(width(str(v)) for v in col) for col in zip(*table)] def filter_using_list(row, filterArray): return [col for (take, col) in zip(filterArray, row) if take] @@ -4762,10 +4764,15 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): table = [header_row] + data max_lens = get_max_lens(table) + extraGap += 1 if delim: - table = [header_row] + [['-' * ml for ml in max_lens]] + data - format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s' - return '\n'.join(format_str % tuple(row) for row in table) + table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data + max_lens[-1] = 0 + for row in table: + for pos, text in enumerate(map(str, row)): + row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap)) + ret = '\n'.join(''.join(row) for row in table) + return ret def _match_one(filter_part, dct, incomplete): @@ -6498,12 +6505,12 @@ def supports_terminal_sequences(stream): return False -TERMINAL_SEQUENCES = { - 'DOWN': '\n', - 'UP': '\x1b[A', - 'ERASE_LINE': '\x1b[K', - 'RED': '\033[0;31m', - 'YELLOW': '\033[0;33m', - 'BLUE': '\033[0;34m', - 'RESET_STYLE': '\033[0m', -} +_terminal_sequences_re = re.compile('\033\\[[^m]+m') + + +def remove_terminal_sequences(string): + return _terminal_sequences_re.sub('', string) + + +def number_of_digits(number): + return len('%d' % number) From 96565c7e55bc3d97a1d4232fe974091dd45f5fe9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Oct 2021 19:59:52 +0530 Subject: [PATCH 0122/2552] [cleanup] Add keyword automatically to SearchIE descriptions and some minor cleanup of docs --- Changelog.md | 2 +- README.md | 51 ++++++++++++++++--------------- devscripts/make_supportedsites.py | 3 ++ setup.py | 2 +- supportedsites.md | 40 ++++++++++++------------ yt_dlp/YoutubeDL.py | 2 +- yt_dlp/__init__.py | 4 +-- yt_dlp/extractor/bilibili.py | 2 +- yt_dlp/extractor/common.py | 4 ++- yt_dlp/extractor/niconico.py | 1 - yt_dlp/extractor/soundcloud.py | 5 ++- yt_dlp/extractor/trovo.py | 4 +-- yt_dlp/extractor/youtube.py | 28 ++++++++--------- yt_dlp/minicurses.py | 5 +++ 14 files changed, 80 insertions(+), 73 deletions(-) diff --git a/Changelog.md b/Changelog.md index 6dbc13bd7..d74237dd4 100644 --- a/Changelog.md +++ b/Changelog.md @@ -61,7 +61,7 @@ * [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman) * [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) -* [crunchyroll] Add season to flat-playlist Closes #1319 +* [crunchyroll] Add season to flat-playlist * [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code * [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804) * [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804) diff --git a/README.md b/README.md index 6e773412d..f9695aec5 100644 --- a/README.md +++ b/README.md @@ -125,9 +125,9 @@ If you are coming from [youtube-dl](https://github.com/ytdl-org/youtube-dl), the Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc. -* The options `--id`, `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details +* The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as as an alternative to `ffmpeg` -* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s.%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` +* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` * The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order * The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be prefered. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both @@ -197,17 +197,17 @@ If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` File|Description :---|:--- [yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for **UNIX-like systems**) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows standalone x64 binary (Recommended for **Windows**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (Recommended for **Windows**) #### Alternatives File|Description :---|:--- -[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary -[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 +[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32bit) binary +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable (No auto-update) -[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable (No auto-update) +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (No auto-update) #### Misc @@ -1516,24 +1516,25 @@ $ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-' Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` The following extractors use this feature: -* **youtube** - * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests - * `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients - * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details - * `include_live_dash`: Include live dash formats (These formats don't download properly) - * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side). - * `max_comments`: Maximum amount of comments to download (default all). - * `max_comment_depth`: Maximum depth for nested comments. YouTube supports depths 1 or 2 (default). -* **youtubetab** - (YouTube playlists, channels, feeds, etc.) - * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) - -* **funimation** - * `language`: Languages to extract. Eg: `funimation:language=english,japanese` - * `version`: The video version to extract - `uncut` or `simulcast` - -* **vikiChannel** - * `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` + +#### youtube +* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests +* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients +* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details +* `include_live_dash`: Include live dash formats (These formats don't download properly) +* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) +* `max_comments`: Maximum amount of comments to download (default all) +* `max_comment_depth`: Maximum depth for nested comments. YouTube supports depths 1 or 2 (default) + +#### youtubetab (YouTube playlists, channels, feeds, etc.) +* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) + +#### funimation +* `language`: Languages to extract. Eg: `funimation:language=english,japanese` +* `version`: The video version to extract - `uncut` or `simulcast` + +#### vikichannel +* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` NOTE: These options may be changed/removed in the future without concern for backward compatibility diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 17a34843f..4c11e25f2 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -29,6 +29,9 @@ def main(): continue if ie_desc is not None: ie_md += ': {0}'.format(ie.IE_DESC) + search_key = getattr(ie, 'SEARCH_KEY', None) + if search_key is not None: + ie_md += f'; "{ie.SEARCH_KEY}:" prefix' if not ie.working(): ie_md += ' (Currently broken)' yield ie_md diff --git a/setup.py b/setup.py index e1c585be4..f08ae2309 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ from distutils.spawn import spawn exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) -DESCRIPTION = 'Command-line program to download videos from YouTube.com and many other other video platforms.' +DESCRIPTION = 'A youtube-dl fork with additional features and patches' LONG_DESCRIPTION = '\n\n'.join(( 'Official repository: ', diff --git a/supportedsites.md b/supportedsites.md index 616151db8..01c3f43a9 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -127,7 +127,7 @@ - **BilibiliAudioAlbum** - **BilibiliChannel** - **BiliBiliPlayer** - - **BiliBiliSearch**: Bilibili video search, "bilisearch" keyword + - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix - **BiliIntl** - **BiliIntlSeries** - **BioBioChileTV** @@ -691,8 +691,8 @@ - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **NiconicoUser** - - **nicovideo:search**: Nico video searches - - **nicovideo:search:date**: Nico video searches, newest first + - **nicovideo:search**: Nico video searches; "nicosearch:" prefix + - **nicovideo:search:date**: Nico video searches, newest first; "nicosearchdate:" prefix - **nicovideo:search_url**: Nico video search URLs - **Nintendo** - **Nitter** @@ -936,7 +936,7 @@ - **SBS**: sbs.com.au - **schooltv** - **ScienceChannel** - - **screen.yahoo:search**: Yahoo screen search + - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix - **Screencast** - **ScreencastOMatic** - **ScrippsNetworks** @@ -977,7 +977,7 @@ - **SonyLIVSeries** - **soundcloud** - **soundcloud:playlist** - - **soundcloud:search**: Soundcloud search, "scsearch" keyword + - **soundcloud:search**: Soundcloud search; "scsearch:" prefix - **soundcloud:set** - **soundcloud:trackstation** - **soundcloud:user** @@ -1095,8 +1095,8 @@ - **TrailerAddict** (Currently broken) - **Trilulilu** - **Trovo** - - **TrovoChannelClip**: All Clips of a trovo.live channel, "trovoclip" keyword - - **TrovoChannelVod**: All VODs of a trovo.live channel, "trovovod" keyword + - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix + - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix - **TrovoVod** - **TruNews** - **TruTV** @@ -1201,7 +1201,7 @@ - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video - - **video.google:search**: Google Video search (Currently broken) + - **video.google:search**: Google Video search; "gvsearch:" prefix (Currently broken) - **video.sky.it** - **video.sky.it:live** - **VideoDetective** @@ -1343,19 +1343,19 @@ - **YouPorn** - **YourPorn** - **YourUpload** - - **youtube**: YouTube.com - - **youtube:favorites**: YouTube.com liked videos, ":ytfav" for short (requires authentication) - - **youtube:history**: Youtube watch history, ":ythis" for short (requires authentication) - - **youtube:playlist**: YouTube.com playlists - - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication) - - **youtube:search**: YouTube.com searches, "ytsearch" keyword - - **youtube:search:date**: YouTube.com searches, newest videos first, "ytsearchdate" keyword - - **youtube:search_url**: YouTube.com search URLs - - **youtube:subscriptions**: YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication) - - **youtube:tab**: YouTube.com tab - - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) + - **youtube**: YouTube + - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) + - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) + - **youtube:playlist**: YouTube playlists + - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword + - **youtube:search**: YouTube searches; "ytsearch:" prefix + - **youtube:search:date**: YouTube searches, newest videos first; "ytsearchdate:" prefix + - **youtube:search_url**: YouTube search URLs with sorting and filter support + - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) + - **youtube:tab**: YouTube Tabs + - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies) - **YoutubeYtBe**: youtu.be - - **YoutubeYtUser**: YouTube.com user videos, URL or "ytuser" keyword + - **YoutubeYtUser**: YouTube user videos; "ytuser:" prefix - **Zapiks** - **Zattoo** - **ZattooLive** diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a3fb3faeb..27fac6263 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3313,7 +3313,7 @@ class YoutubeDL(object): write_debug = lambda msg: logger.debug(f'[debug] {msg}') write_debug(encoding_str) else: - write_string(f'[debug] {encoding_str}', encoding=None) + write_string(f'[debug] {encoding_str}\n', encoding=None) write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') source = detect_variant() diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index e1c45441a..5c3d33df0 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -122,10 +122,10 @@ def _real_main(argv=None): desc = getattr(ie, 'IE_DESC', ie.IE_NAME) if desc is False: continue - if hasattr(ie, 'SEARCH_KEY'): + if getattr(ie, 'SEARCH_KEY', None) is not None: _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') _COUNTS = ('', '5', '10', 'all') - desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) + desc += f'; "{ie.SEARCH_KEY}:" prefix (Example: "{ie.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(_SEARCHES)}")' write_string(desc + '\n', out=sys.stdout) sys.exit(0) if opts.ap_list_mso: diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index d6c77e418..ee1722e94 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -566,7 +566,7 @@ class BilibiliCategoryIE(InfoExtractor): class BiliBiliSearchIE(SearchInfoExtractor): - IE_DESC = 'Bilibili video search, "bilisearch" keyword' + IE_DESC = 'Bilibili video search' _MAX_RESULTS = 100000 _SEARCH_KEY = 'bilisearch' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d1d1b46fc..c0d714249 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3620,9 +3620,11 @@ class SearchInfoExtractor(InfoExtractor): """ Base class for paged search queries extractors. They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query} - Instances should define _SEARCH_KEY and _MAX_RESULTS. + Instances should define _SEARCH_KEY and optionally _MAX_RESULTS """ + _MAX_RESULTS = float('inf') + @classmethod def _make_valid_url(cls): return r'%s(?P|[1-9][0-9]*|all):(?P[\s\S]+)' % cls._SEARCH_KEY diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 76f087057..4bcea33d5 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -704,7 +704,6 @@ class NicovideoSearchURLIE(InfoExtractor): class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): IE_DESC = 'Nico video searches' - _MAX_RESULTS = float('inf') IE_NAME = NicovideoSearchIE_NAME _SEARCH_KEY = 'nicosearch' _TESTS = [] diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 412331e17..824528474 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -855,8 +855,8 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): IE_NAME = 'soundcloud:search' - IE_DESC = 'Soundcloud search, "scsearch" keyword' - _MAX_RESULTS = float('inf') + IE_DESC = 'Soundcloud search' + _SEARCH_KEY = 'scsearch' _TESTS = [{ 'url': 'scsearch15:post-avant jazzcore', 'info_dict': { @@ -865,7 +865,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): 'playlist_count': 15, }] - _SEARCH_KEY = 'scsearch' _MAX_RESULTS_PER_PAGE = 200 _DEFAULT_RESULTS_PER_PAGE = 50 diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index ec55f41f2..a0f0cc31c 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -223,7 +223,7 @@ class TrovoChannelBaseIE(InfoExtractor): class TrovoChannelVodIE(TrovoChannelBaseIE): _VALID_URL = r'trovovod:(?P[^\s]+)' - IE_DESC = 'All VODs of a trovo.live channel, "trovovod" keyword' + IE_DESC = 'All VODs of a trovo.live channel; "trovovod:" prefix' _TESTS = [{ 'url': 'trovovod:OneTappedYou', @@ -244,7 +244,7 @@ class TrovoChannelVodIE(TrovoChannelBaseIE): class TrovoChannelClipIE(TrovoChannelBaseIE): _VALID_URL = r'trovoclip:(?P[^\s]+)' - IE_DESC = 'All Clips of a trovo.live channel, "trovoclip" keyword' + IE_DESC = 'All Clips of a trovo.live channel; "trovoclip:" prefix' _TESTS = [{ 'url': 'trovoclip:OneTappedYou', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 54f5ef15c..6a7a2ce1a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -695,7 +695,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor): - IE_DESC = 'YouTube.com' + IE_DESC = 'YouTube' _INVIDIOUS_SITES = ( # invidious-redirect websites r'(?:www\.)?redirect\.invidious\.io', @@ -3010,7 +3010,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): class YoutubeTabIE(YoutubeBaseInfoExtractor): - IE_DESC = 'YouTube.com tab' + IE_DESC = 'YouTube Tabs' _VALID_URL = r'''(?x) https?:// (?:\w+\.)? @@ -4238,7 +4238,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): class YoutubePlaylistIE(InfoExtractor): - IE_DESC = 'YouTube.com playlists' + IE_DESC = 'YouTube playlists' _VALID_URL = r'''(?x)(?: (?:https?://)? (?:\w+\.)? @@ -4362,7 +4362,7 @@ class YoutubeYtBeIE(InfoExtractor): class YoutubeYtUserIE(InfoExtractor): - IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword' + IE_DESC = 'YouTube user videos; "ytuser:" prefix' _VALID_URL = r'ytuser:(?P.+)' _TESTS = [{ 'url': 'ytuser:phihag', @@ -4378,7 +4378,7 @@ class YoutubeYtUserIE(InfoExtractor): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:favorites' - IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)' + IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)' _VALID_URL = r':ytfav(?:ou?rite)?s?' _LOGIN_REQUIRED = True _TESTS = [{ @@ -4396,10 +4396,7 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): - IE_DESC = 'YouTube.com searches, "ytsearch" keyword' - # there doesn't appear to be a real limit, for example if you search for - # 'python' you get more than 8.000.000 results - _MAX_RESULTS = float('inf') + IE_DESC = 'YouTube searches' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' _SEARCH_PARAMS = None @@ -4459,13 +4456,14 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' - IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword' + IE_DESC = 'YouTube searches, newest videos first' _SEARCH_PARAMS = 'CAI%3D' class YoutubeSearchURLIE(YoutubeSearchIE): - IE_DESC = 'YouTube.com search URLs' + IE_DESC = 'YouTube search URLs with sorting and filter support' IE_NAME = YoutubeSearchIE.IE_NAME + '_url' + _SEARCH_KEY = None _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)' # _MAX_RESULTS = 100 _TESTS = [{ @@ -4511,7 +4509,7 @@ class YoutubeFeedsInfoExtractor(YoutubeTabIE): class YoutubeWatchLaterIE(InfoExtractor): IE_NAME = 'youtube:watchlater' - IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' + IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)' _VALID_URL = r':ytwatchlater' _TESTS = [{ 'url': ':ytwatchlater', @@ -4524,7 +4522,7 @@ class YoutubeWatchLaterIE(InfoExtractor): class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' + IE_DESC = 'YouTube recommended videos; ":ytrec" keyword' _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' _LOGIN_REQUIRED = False @@ -4541,7 +4539,7 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)' + IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)' _VALID_URL = r':ytsub(?:scription)?s?' _FEED_NAME = 'subscriptions' _TESTS = [{ @@ -4554,7 +4552,7 @@ class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)' + IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)' _VALID_URL = r':ythis(?:tory)?' _FEED_NAME = 'history' _TESTS = [{ diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index 38fdb5bc6..699b1158a 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -31,6 +31,11 @@ _TEXT_STYLES = { def format_text(text, f): + ''' + @param f String representation of formatting to apply in the form: + [style] [light] font_color [on [light] bg_color] + Eg: "red", "bold green on light blue" + ''' f = f.upper() tokens = f.strip().split() From 9f1a1c36e60b14f9ff47d83234b4ea61c5f5e2f7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 24 Oct 2021 14:46:07 +0530 Subject: [PATCH 0123/2552] Separate `--check-all-formats` from `--check-formats` Previously, `--check-formats` tested only the selected video formats, but ALL thumbnails --- yt_dlp/YoutubeDL.py | 115 ++++++++++++++++++++++++-------------------- yt_dlp/options.py | 10 ++-- yt_dlp/utils.py | 2 + 3 files changed, 71 insertions(+), 56 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 27fac6263..071f2e943 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -224,7 +224,8 @@ class YoutubeDL(object): allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file check_formats Whether to test if the formats are downloadable. - Can be True (check all), False (check none) + Can be True (check all), False (check none), + 'selected' (check selected formats), or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' 'temp' and the keys of OUTTMPL_TYPES (in utils.py) @@ -1720,6 +1721,28 @@ class YoutubeDL(object): return op(actual_value, comparison_value) return _filter + def _check_formats(self, formats): + for f in formats: + self.to_screen('[info] Testing format %s' % f['format_id']) + temp_file = tempfile.NamedTemporaryFile( + suffix='.tmp', delete=False, + dir=self.get_output_path('temp') or None) + temp_file.close() + try: + success, _ = self.dl(temp_file.name, f, test=True) + except (DownloadError, IOError, OSError, ValueError) + network_exceptions: + success = False + finally: + if os.path.exists(temp_file.name): + try: + os.remove(temp_file.name) + except OSError: + self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + if success: + yield f + else: + self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + def _default_format_spec(self, info_dict, download=True): def can_merge(): @@ -1759,7 +1782,7 @@ class YoutubeDL(object): allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), 'video': self.params.get('allow_multiple_video_streams', False)} - check_formats = self.params.get('check_formats') + check_formats = self.params.get('check_formats') == 'selected' def _parse_filter(tokens): filter_parts = [] @@ -1935,26 +1958,7 @@ class YoutubeDL(object): if not check_formats: yield from formats return - for f in formats: - self.to_screen('[info] Testing format %s' % f['format_id']) - temp_file = tempfile.NamedTemporaryFile( - suffix='.tmp', delete=False, - dir=self.get_output_path('temp') or None) - temp_file.close() - try: - success, _ = self.dl(temp_file.name, f, test=True) - except (DownloadError, IOError, OSError, ValueError) + network_exceptions: - success = False - finally: - if os.path.exists(temp_file.name): - try: - os.remove(temp_file.name) - except OSError: - self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) - if success: - yield f - else: - self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + yield from self._check_formats(formats) def _build_selector_function(selector): if isinstance(selector, list): # , @@ -2111,42 +2115,45 @@ class YoutubeDL(object): self.cookiejar.add_cookie_header(pr) return pr.get_header('Cookie') + def _sort_thumbnails(self, thumbnails): + thumbnails.sort(key=lambda t: ( + t.get('preference') if t.get('preference') is not None else -1, + t.get('width') if t.get('width') is not None else -1, + t.get('height') if t.get('height') is not None else -1, + t.get('id') if t.get('id') is not None else '', + t.get('url'))) + def _sanitize_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') if thumbnails is None: thumbnail = info_dict.get('thumbnail') if thumbnail: info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] - if thumbnails: - thumbnails.sort(key=lambda t: ( - t.get('preference') if t.get('preference') is not None else -1, - t.get('width') if t.get('width') is not None else -1, - t.get('height') if t.get('height') is not None else -1, - t.get('id') if t.get('id') is not None else '', - t.get('url'))) - - def thumbnail_tester(): - def test_thumbnail(t): - self.to_screen(f'[info] Testing thumbnail {t["id"]}') - try: - self.urlopen(HEADRequest(t['url'])) - except network_exceptions as err: - self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') - return False - return True - return test_thumbnail - - for i, t in enumerate(thumbnails): - if t.get('id') is None: - t['id'] = '%d' % i - if t.get('width') and t.get('height'): - t['resolution'] = '%dx%d' % (t['width'], t['height']) - t['url'] = sanitize_url(t['url']) - - if self.params.get('check_formats'): - info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() - else: - info_dict['thumbnails'] = thumbnails + if not thumbnails: + return + + def check_thumbnails(thumbnails): + for t in thumbnails: + self.to_screen(f'[info] Testing thumbnail {t["id"]}') + try: + self.urlopen(HEADRequest(t['url'])) + except network_exceptions as err: + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') + continue + yield t + + self._sort_thumbnails(thumbnails) + for i, t in enumerate(thumbnails): + if t.get('id') is None: + t['id'] = '%d' % i + if t.get('width') and t.get('height'): + t['resolution'] = '%dx%d' % (t['width'], t['height']) + t['url'] = sanitize_url(t['url']) + + if self.params.get('check_formats') is True: + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse() + else: + info_dict['thumbnails'] = thumbnails def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -2252,7 +2259,6 @@ class YoutubeDL(object): info_dict['requested_subtitles'] = self.process_subtitles( info_dict['id'], subtitles, automatic_captions) - # We now pick which formats have to be downloaded if info_dict.get('formats') is None: # There's only one format available formats = [info_dict] @@ -2335,6 +2341,9 @@ class YoutubeDL(object): # TODO Central sorting goes here + if self.params.get('check_formats') is True: + formats = LazyList(self._check_formats(formats[::-1])).reverse() + if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 719a1bce4..5499ab13e 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -562,12 +562,16 @@ def parseOpts(overrideArguments=None): help="Don't give any special preference to free containers (default)") video_format.add_option( '--check-formats', - action='store_true', dest='check_formats', default=None, - help='Check that the formats selected are actually downloadable') + action='store_const', const='selected', dest='check_formats', default=None, + help='Check that the selected formats are actually downloadable') + video_format.add_option( + '--check-all-formats', + action='store_true', dest='check_formats', + help='Check all formats for whether they are actually downloadable') video_format.add_option( '--no-check-formats', action='store_false', dest='check_formats', - help='Do not check that the formats selected are actually downloadable') + help='Do not check that the formats are actually downloadable') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 08f9a5dc9..2c3ab00dc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4050,6 +4050,8 @@ class LazyList(collections.abc.Sequence): def __exhaust(self): self.__cache.extend(self.__iterable) + # Discard the emptied iterable to make it pickle-able + self.__iterable = [] return self.__cache def exhaust(self): From fccf502118466bbfde7c5c6dd0279f0dfdb1311c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 24 Oct 2021 14:55:28 +0530 Subject: [PATCH 0124/2552] [youtube] Populate `thumbnail` with the best "known" thumbnail Closes #402, Related: https://github.com/yt-dlp/yt-dlp/issues/340#issuecomment-950290624 --- yt_dlp/extractor/youtube.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 6a7a2ce1a..658b45fe1 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2696,6 +2696,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): thumbnails.append({ 'url': thumbnail_url, }) + original_thumbnails = thumbnails.copy() + # The best resolution thumbnails sometimes does not appear in the webpage # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 # List of possible thumbnails - Ref: @@ -2706,7 +2708,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'default', '1', '2', '3' ] n_thumbnail_names = len(thumbnail_names) - thumbnails.extend({ 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( video_id=video_id, name=name, ext=ext, @@ -2716,6 +2717,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names) thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i) self._remove_duplicate_formats(thumbnails) + self._downloader._sort_thumbnails(original_thumbnails) category = get_first(microformats, 'category') or search_meta('genre') channel_id = str_or_none( @@ -2745,6 +2747,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': self._live_title(video_title) if is_live else video_title, 'formats': formats, 'thumbnails': thumbnails, + # The best thumbnail that we are sure exists. Prevents unnecessary + # URL checking if user don't care about getting the best possible thumbnail + 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')), 'description': video_description, 'upload_date': unified_strdate( get_first(microformats, 'uploadDate') From f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 24 Oct 2021 18:02:00 +0530 Subject: [PATCH 0125/2552] Approximate filesize from bitrate Closes #1400 --- yt_dlp/YoutubeDL.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 071f2e943..8c8cf7ecb 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2330,6 +2330,10 @@ class YoutubeDL(object): format['resolution'] = self.format_resolution(format, default=None) if format.get('dynamic_range') is None and format.get('vcodec') != 'none': format['dynamic_range'] = 'SDR' + if (info_dict.get('duration') and format.get('tbr') + and not format.get('filesize') and not format.get('filesize_approx')): + format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8) + # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() From ad64a2323f1ce0f8aeb07e4ead46630edec2bf2d Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 24 Oct 2021 16:31:33 +0000 Subject: [PATCH 0126/2552] [instagram] Fix bug in ab2ffab22d02d530e0b46f9e361ff53a2139898b (#1403) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 24f47f3a8..ccfcddd5b 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -145,6 +145,8 @@ class InstagramIE(InfoExtractor): def _login(self): username, password = self._get_login_info() + if username is None: + return login_webpage = self._download_webpage( 'https://www.instagram.com/accounts/login/', None, From 8e7ab2cf08970dbeedef304cd25bcd6abf36966b Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 25 Oct 2021 19:33:01 +0000 Subject: [PATCH 0127/2552] [Bilibili:comments] Fix infinite loop (#1423) Closes #1412 Authored by: u-spec-png --- yt_dlp/extractor/bilibili.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index ee1722e94..483f93d67 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -376,8 +376,10 @@ class BiliBiliIE(InfoExtractor): replies = traverse_obj( self._download_json( f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685', - video_id, note=f'Extracting comments from page {idx}'), - ('data', 'replies')) or [] + video_id, note=f'Extracting comments from page {idx}', fatal=False), + ('data', 'replies')) + if not replies: + return for children in map(self._get_all_children, replies): yield from children From 7e59ca440a9351aac0a99b505587698b912e500e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 19:31:00 +0530 Subject: [PATCH 0128/2552] [DiscoveryPlus] Allow language codes in URL Closes #1425 --- yt_dlp/extractor/dplay.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index e0e446b87..d62480810 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -325,7 +325,7 @@ class HGTVDeIE(DPlayIE): class DiscoveryPlusIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', 'info_dict': { @@ -343,6 +343,9 @@ class DiscoveryPlusIE(DPlayIE): 'episode_number': 1, }, 'skip': 'Available for Premium users', + }, { + 'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers', + 'only_matching': True, }] _PRODUCT = 'dplus_us' From 7de837a5e3e5eae92a77d07e66eda49c0e949b8d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 19:31:56 +0530 Subject: [PATCH 0129/2552] [utils] Sanitize URL when determining protocol Closes #1406 --- yt_dlp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2c3ab00dc..be93b0ef2 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4731,7 +4731,7 @@ def determine_protocol(info_dict): if protocol is not None: return protocol - url = info_dict['url'] + url = sanitize_url(info_dict['url']) if url.startswith('rtmp'): return 'rtmp' elif url.startswith('mms'): From 08438d2ca59fddd4147f4f957473af78d56be732 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:11:59 +0530 Subject: [PATCH 0130/2552] [outtmpl] Add type `link` for internet shortcut files and refactor related code Closes #1405 --- README.md | 2 +- yt_dlp/YoutubeDL.py | 76 ++++++++++++++++++--------------------------- yt_dlp/utils.py | 7 +++++ 3 files changed, 39 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index f9695aec5..e2fbbf2ae 100644 --- a/README.md +++ b/README.md @@ -1034,7 +1034,7 @@ To summarize, the general syntax for a field is: %(name[.keys][addition][>strf][,alternate][|default])[flags][width][.precision][length]type ``` -Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. If any of the templates (except default) is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. +Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. If any of the templates (except default) is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. The available fields are: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8c8cf7ecb..ced7d1202 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -56,9 +56,6 @@ from .utils import ( DEFAULT_OUTTMPL, determine_ext, determine_protocol, - DOT_DESKTOP_LINK_TEMPLATE, - DOT_URL_LINK_TEMPLATE, - DOT_WEBLOC_LINK_TEMPLATE, DownloadError, encode_compat_str, encodeFilename, @@ -77,6 +74,7 @@ from .utils import ( iri_to_uri, ISO3166Utils, LazyList, + LINK_TEMPLATES, locked_file, make_dir, make_HTTPS_handler, @@ -2665,53 +2663,41 @@ class YoutubeDL(object): return # Write internet shortcut files - url_link = webloc_link = desktop_link = False - if self.params.get('writelink', False): - if sys.platform == "darwin": # macOS. - webloc_link = True - elif sys.platform.startswith("linux"): - desktop_link = True - else: # if sys.platform in ['win32', 'cygwin']: - url_link = True - if self.params.get('writeurllink', False): - url_link = True - if self.params.get('writewebloclink', False): - webloc_link = True - if self.params.get('writedesktoplink', False): - desktop_link = True - - if url_link or webloc_link or desktop_link: + def _write_link_file(link_type): if 'webpage_url' not in info_dict: self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') - return - ascii_url = iri_to_uri(info_dict['webpage_url']) - - def _write_link_file(extension, template, newline, embed_filename): - linkfn = replace_extension(full_filename, extension, info_dict.get('ext')) + return False + linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): - self.to_screen('[info] Internet shortcut is already present') - else: - try: - self.to_screen('[info] Writing internet shortcut to: ' + linkfn) - with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile: - template_vars = {'url': ascii_url} - if embed_filename: - template_vars['filename'] = linkfn[:-(len(extension) + 1)] - linkfile.write(template % template_vars) - except (OSError, IOError): - self.report_error('Cannot write internet shortcut ' + linkfn) - return False + self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') + return True + try: + self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') + with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + newline='\r\n' if link_type == 'url' else '\n') as linkfile: + template_vars = {'url': iri_to_uri(info_dict['webpage_url'])} + if link_type == 'desktop': + template_vars['filename'] = linkfn[:-(len(link_type) + 1)] + linkfile.write(LINK_TEMPLATES[link_type] % template_vars) + except (OSError, IOError): + self.report_error(f'Cannot write internet shortcut {linkfn}') + return False return True - if url_link: - if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False): - return - if webloc_link: - if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False): - return - if desktop_link: - if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True): - return + write_links = { + 'url': self.params.get('writeurllink'), + 'webloc': self.params.get('writewebloclink'), + 'desktop': self.params.get('writedesktoplink'), + } + if self.params.get('writelink'): + link_type = ('webloc' if sys.platform == 'darwin' + else 'desktop' if sys.platform.startswith('linux') + else 'url') + write_links[link_type] = True + + if any(should_write and not _write_link_file(link_type) + for link_type, should_write in write_links.items()): + return try: info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index be93b0ef2..9d90eca5e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4503,6 +4503,7 @@ OUTTMPL_TYPES = { 'description': 'description', 'annotation': 'annotations.xml', 'infojson': 'info.json', + 'link': None, 'pl_thumbnail': None, 'pl_description': 'description', 'pl_infojson': 'info.json', @@ -6238,6 +6239,12 @@ URL=%(url)s Icon=text-html '''.lstrip() +LINK_TEMPLATES = { + 'url': DOT_URL_LINK_TEMPLATE, + 'desktop': DOT_DESKTOP_LINK_TEMPLATE, + 'webloc': DOT_WEBLOC_LINK_TEMPLATE, +} + def iri_to_uri(iri): """ From abad800058180da93f482915070aef12f8f63564 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:12:30 +0530 Subject: [PATCH 0131/2552] [downloader/ffmpeg] Fix vtt download with ffmpeg --- yt_dlp/postprocessor/ffmpeg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 4a0a96427..b7fcc569b 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -41,6 +41,7 @@ EXT_TO_OUT_FORMATS = { 'ts': 'mpegts', 'wma': 'asf', 'wmv': 'asf', + 'vtt': 'webvtt', } ACODECS = { 'mp3': 'libmp3lame', From 48f796874d78ad3d1849d0639893667f6cdf30d2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:15:12 +0530 Subject: [PATCH 0132/2552] [utils] Create `DownloadCancelled` exception as super-class of ExistingVideoReached, RejectedVideoReached, MaxDownloadsReached Third parties can also sub-class this to cancel the download queue from a hook --- yt_dlp/YoutubeDL.py | 13 ++++--------- yt_dlp/utils.py | 30 ++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index ced7d1202..2c2b17b20 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -56,6 +56,7 @@ from .utils import ( DEFAULT_OUTTMPL, determine_ext, determine_protocol, + DownloadCancelled, DownloadError, encode_compat_str, encodeFilename, @@ -1320,7 +1321,7 @@ class YoutubeDL(object): self.to_stderr('\r') self.report_warning('The download speed is below throttle limit. Re-extracting data') return wrapper(self, *args, **kwargs) - except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError): + except (DownloadCancelled, LazyList.IndexError): raise except Exception as e: if self.params.get('ignoreerrors'): @@ -2949,14 +2950,8 @@ class YoutubeDL(object): url, force_generic_extractor=self.params.get('force_generic_extractor', False)) except UnavailableVideoError: self.report_error('unable to download video') - except MaxDownloadsReached: - self.to_screen('[info] Maximum number of downloads reached') - raise - except ExistingVideoReached: - self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing') - raise - except RejectedVideoReached: - self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject') + except DownloadCancelled as e: + self.to_screen(f'[info] {e.msg}') raise else: if self.params.get('dump_single_json', False): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9d90eca5e..a8755a1b9 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2542,23 +2542,33 @@ class PostProcessingError(YoutubeDLError): self.msg = msg -class ExistingVideoReached(YoutubeDLError): - """ --max-downloads limit has been reached. """ - pass +class DownloadCancelled(YoutubeDLError): + """ Exception raised when the download queue should be interrupted """ + msg = 'The download was cancelled' + def __init__(self, msg=None): + if msg is not None: + self.msg = msg + YoutubeDLError.__init__(self, self.msg) -class RejectedVideoReached(YoutubeDLError): - """ --max-downloads limit has been reached. """ - pass +class ExistingVideoReached(DownloadCancelled): + """ --break-on-existing triggered """ + msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing' -class ThrottledDownload(YoutubeDLError): - """ Download speed below --throttled-rate. """ - pass + +class RejectedVideoReached(DownloadCancelled): + """ --break-on-reject triggered """ + msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject' -class MaxDownloadsReached(YoutubeDLError): +class MaxDownloadsReached(DownloadCancelled): """ --max-downloads limit has been reached. """ + msg = 'Maximum number of downloads reached, stopping due to --max-downloads' + + +class ThrottledDownload(YoutubeDLError): + """ Download speed below --throttled-rate. """ pass From 0db3bae879d57ff400f8c61261534b6e3659c470 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:17:29 +0530 Subject: [PATCH 0133/2552] [extractor] Fix some errors being converted to `ExtractorError` --- yt_dlp/extractor/common.py | 16 +++++++++++++--- yt_dlp/utils.py | 6 +++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index c0d714249..369cff418 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -74,6 +74,7 @@ from ..utils import ( strip_or_none, traverse_obj, unescapeHTML, + UnsupportedError, unified_strdate, unified_timestamp, update_Request, @@ -604,10 +605,19 @@ class InfoExtractor(object): if self.__maybe_fake_ip_and_retry(e.countries): continue raise + except UnsupportedError: + raise except ExtractorError as e: - video_id = e.video_id or self.get_temp_id(url) - raise ExtractorError( - e.msg, video_id=video_id, ie=self.IE_NAME, tb=e.traceback, expected=e.expected, cause=e.cause) + kwargs = { + 'video_id': e.video_id or self.get_temp_id(url), + 'ie': self.IE_NAME, + 'tb': e.traceback, + 'expected': e.expected, + 'cause': e.cause + } + if hasattr(e, 'countries'): + kwargs['countries'] = e.countries + raise type(e)(e.msg, **kwargs) except compat_http_client.IncompleteRead as e: raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url)) except (KeyError, StopIteration) as e: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a8755a1b9..48baa6503 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2492,9 +2492,9 @@ class GeoRestrictedError(ExtractorError): geographic location due to geographic restrictions imposed by a website. """ - def __init__(self, msg, countries=None): - super(GeoRestrictedError, self).__init__(msg, expected=True) - self.msg = msg + def __init__(self, msg, countries=None, **kwargs): + kwargs['expected'] = True + super(GeoRestrictedError, self).__init__(msg, **kwargs) self.countries = countries From c35ada33604b820a6f2b3c6a2d4045b6c9c7dedf Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 21:14:13 +0530 Subject: [PATCH 0134/2552] [twitter] Do not sort by codec Closes #1431 --- yt_dlp/extractor/twitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 485b781ca..0749263d9 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -485,7 +485,7 @@ class TwitterIE(TwitterBaseIE): fmts, subs = self._extract_variant_formats(variant, twid) subtitles = self._merge_subtitles(subtitles, subs) formats.extend(fmts) - self._sort_formats(formats) + self._sort_formats(formats, ('res', 'br', 'size', 'proto')) # The codec of http formats are unknown thumbnails = [] media_url = media.get('media_url_https') or media.get('media_url') From 0c873df3a84e6269dff03fd91ce4f23a38bd8f27 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Tue, 26 Oct 2021 21:17:39 +0530 Subject: [PATCH 0135/2552] [3speak] Add extractors (#1430) Closes #1421 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 4 ++ yt_dlp/extractor/threespeak.py | 97 ++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 yt_dlp/extractor/threespeak.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ef2b25c93..035c159c2 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1442,6 +1442,10 @@ from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE +from .threespeak import ( + ThreeSpeakIE, + ThreeSpeakUserIE, +) from .threeqsdn import ThreeQSDNIE from .tiktok import ( TikTokIE, diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py new file mode 100644 index 000000000..60e84529d --- /dev/null +++ b/yt_dlp/extractor/threespeak.py @@ -0,0 +1,97 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_strdate, +) + + +class ThreeSpeakIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P[^/$&#?]+)' + + _TESTS = [{ + 'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy', + 'info_dict': { + 'id': 'wjgoxyfy', + 'ext': 'mp4', + 'title': 'Can People who took the Vax think Critically', + 'uploader': 'dannyshine', + 'description': 'md5:181aa7ccb304afafa089b5af3bca7a10', + 'tags': ['sex', 'covid', 'antinatalism', 'comedy', 'vaccines'], + 'thumbnail': 'https://img.3speakcontent.co/wjgoxyfy/thumbnails/default.png', + 'upload_date': '20211021', + 'duration': 2703.867833, + 'filesize': 1620054781, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + json_str = self._html_search_regex(r'JSON\.parse\(\'([^\']+)\'\)', webpage, 'json') + # The json string itself is escaped. Hence the double parsing + data_json = self._parse_json(self._parse_json(f'"{json_str}"', id), id) + video_json = self._parse_json(data_json['json_metadata'], id) + formats, subtitles = [], {} + og_m3u8 = self._html_search_regex(r'', webpage, 'og m3u8', fatal=False) + if og_m3u8: + https_frmts, https_subs = self._extract_m3u8_formats_and_subtitles(og_m3u8, id, fatal=False, m3u8_id='https') + formats.extend(https_frmts) + subtitles = self._merge_subtitles(subtitles, https_subs) + ipfs_m3u8 = try_get(video_json, lambda x: x['video']['info']['ipfs']) + if ipfs_m3u8: + ipfs_frmts, ipfs_subs = self._extract_m3u8_formats_and_subtitles(f'https://ipfs.3speak.tv/ipfs/{ipfs_m3u8}', + id, fatal=False, m3u8_id='ipfs') + formats.extend(ipfs_frmts) + subtitles = self._merge_subtitles(subtitles, ipfs_subs) + mp4_file = try_get(video_json, lambda x: x['video']['info']['file']) + if mp4_file: + formats.append({ + 'url': f'https://threespeakvideo.b-cdn.net/{id}/{mp4_file}', + 'ext': 'mp4', + 'format_id': 'https-mp4', + 'duration': try_get(video_json, lambda x: x['video']['info']['duration']), + 'filesize': try_get(video_json, lambda x: x['video']['info']['filesize']), + 'quality': 11, + 'format_note': 'Original file', + }) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title') or data_json.get('root_title'), + 'uploader': data_json.get('author'), + 'description': try_get(video_json, lambda x: x['video']['content']['description']), + 'tags': try_get(video_json, lambda x: x['video']['content']['tags']), + 'thumbnail': try_get(video_json, lambda x: x['image'][0]), + 'upload_date': unified_strdate(data_json.get('created')), + 'formats': formats, + 'subtitles': subtitles, + } + + +class ThreeSpeakUserIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/user/(?P[^/$&?#]+)' + + _TESTS = [{ + 'url': 'https://3speak.tv/user/theycallmedan', + 'info_dict': { + 'id': 'theycallmedan', + }, + 'playlist_mincount': 115, + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + entries = [ + self.url_result( + 'https://3speak.tv/watch?v=%s' % video, + ie=ThreeSpeakIE.ie_key()) + for video in re.findall(r'data-payout\s?\=\s?\"([^\"]+)\"', webpage) if video + ] + return self.playlist_result(entries, id) From 673944b001447adb0de88c12fa22577a770d771a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:47:30 +0530 Subject: [PATCH 0136/2552] [compat] Don't create console in `windows_enable_vt_mode` Closes #1420 --- yt_dlp/compat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index b107b2114..8508f1465 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -19,6 +19,7 @@ import shlex import shutil import socket import struct +import subprocess import sys import tokenize import urllib @@ -162,7 +163,9 @@ except ImportError: def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 if compat_os_name != 'nt': return - os.system('') + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + subprocess.Popen('', shell=True, startupinfo=startupinfo) # Deprecated From dc88e9be03ea0974760725d1ad089b91a7fefe52 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Tue, 26 Oct 2021 18:33:43 +0200 Subject: [PATCH 0137/2552] [wakanim] Add support for MPD manifests (#1428) Closes #1426 Authored by: nyuszika7h --- yt_dlp/extractor/wakanim.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py index c956d616e..22441c38f 100644 --- a/yt_dlp/extractor/wakanim.py +++ b/yt_dlp/extractor/wakanim.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +from urllib.parse import unquote + from .common import InfoExtractor from ..utils import ( merge_dicts, @@ -37,20 +39,24 @@ class WakanimIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_url = urljoin(url, self._search_regex( - r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'm3u8 url', + manifest_url = urljoin(url, self._search_regex( + r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'manifest url', group='url')) if not self.get_param('allow_unplayable_formats'): # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls encryption = self._search_regex( r'encryption%3D(c(?:enc|bc(?:s-aapl)?))', - m3u8_url, 'encryption', default=None) + manifest_url, 'encryption', default=None) if encryption in ('cenc', 'cbcs-aapl'): self.report_drm(video_id) - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + if 'format=mpd-time-cmaf' in unquote(manifest_url): + formats = self._extract_mpd_formats( + manifest_url, video_id, mpd_id='dash') + else: + formats = self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') info = self._search_json_ld(webpage, video_id, default={}) From bd1c7923274962e3027acf63111ccb0d766b9725 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Tue, 26 Oct 2021 18:35:20 +0200 Subject: [PATCH 0138/2552] [wakanim] Detect geo-restriction (#1429) Authored by: nyuszika7h --- yt_dlp/extractor/wakanim.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py index 22441c38f..a61a630e2 100644 --- a/yt_dlp/extractor/wakanim.py +++ b/yt_dlp/extractor/wakanim.py @@ -33,12 +33,19 @@ class WakanimIE(InfoExtractor): 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu', 'only_matching': True, }] + _GEO_BYPASS = False def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if 'Geoblocking' in webpage: + if '/de/' in url: + self.raise_geo_restricted(countries=['DE', 'AT', 'CH']) + else: + self.raise_geo_restricted(countries=['RU']) + manifest_url = urljoin(url, self._search_regex( r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'manifest url', group='url')) From b7b186e7decc1236576086d9ced3579af57b07c3 Mon Sep 17 00:00:00 2001 From: ajj8 <35781586+ajj8@users.noreply.github.com> Date: Wed, 27 Oct 2021 17:08:48 +0100 Subject: [PATCH 0139/2552] [sky] Add `SkyNewsStoryIE` (#1443) Authored by: ajj8 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/sky.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 035c159c2..21c71a835 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1289,6 +1289,7 @@ from .skynewsarabia import ( from .skynewsau import SkyNewsAUIE from .sky import ( SkyNewsIE, + SkyNewsStoryIE, SkySportsIE, SkySportsNewsIE, ) diff --git a/yt_dlp/extractor/sky.py b/yt_dlp/extractor/sky.py index ff2c977a0..ad1e62d88 100644 --- a/yt_dlp/extractor/sky.py +++ b/yt_dlp/extractor/sky.py @@ -105,6 +105,34 @@ class SkyNewsIE(SkyBaseIE): } +class SkyNewsStoryIE(SkyBaseIE): + IE_NAME = 'sky:news:story' + _VALID_URL = r'https?://news\.sky\.com/story/[0-9a-z-]+-(?P[0-9]+)' + _TEST = { + 'url': 'https://news.sky.com/story/budget-2021-chancellor-rishi-sunak-vows-address-will-deliver-strong-economy-fit-for-a-new-age-of-optimism-12445425', + 'info_dict': { + 'id': 'ref:0714acb9-123d-42c8-91b8-5c1bc6c73f20', + 'title': 'md5:e408dd7aad63f31a1817bbe40c7d276f', + 'description': 'md5:a881e12f49212f92be2befe4a09d288a', + 'ext': 'mp4', + 'upload_date': '20211027', + 'timestamp': 1635317494, + 'uploader_id': '6058004172001', + } + } + + def _real_extract(self, url): + article_id = self._match_id(url) + webpage = self._download_webpage(url, article_id) + + entries = [self._process_ooyala_element(webpage, sdc_el, url) + for sdc_el in re.findall(self._SDC_EL_REGEX, webpage)] + + return self.playlist_result( + entries, article_id, self._og_search_title(webpage), + self._html_search_meta(['og:description', 'description'], webpage)) + + class SkySportsNewsIE(SkyBaseIE): IE_NAME = 'sky:sports:news' _VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P\d+)' From 5be76d1ab7fed65a5894b221c7b7f896a18fc820 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 28 Oct 2021 02:01:26 +0530 Subject: [PATCH 0140/2552] [viewlift] Add cookie-based login and series support Closes #1340, #1316 Authored by: Ashish0804, pukkandan --- yt_dlp/extractor/viewlift.py | 192 ++++++++++++++++++++++++++--------- 1 file changed, 146 insertions(+), 46 deletions(-) diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index c3b2e863d..ca53a1736 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -9,6 +9,7 @@ from ..utils import ( ExtractorError, int_or_none, parse_age_limit, + traverse_obj, ) @@ -32,26 +33,36 @@ class ViewLiftBaseIE(InfoExtractor): } _TOKENS = {} - def _call_api(self, site, path, video_id, query): - token = self._TOKENS.get(site) - if not token: - token_query = {'site': site} - email, password = self._get_login_info(netrc_machine=site) - if email: - resp = self._download_json( - self._API_BASE + 'identity/signin', video_id, - 'Logging in', query=token_query, data=json.dumps({ - 'email': email, - 'password': password, - }).encode()) - else: - resp = self._download_json( - self._API_BASE + 'identity/anonymous-token', video_id, - 'Downloading authorization token', query=token_query) - self._TOKENS[site] = token = resp['authorizationToken'] - return self._download_json( - self._API_BASE + path, video_id, - headers={'Authorization': token}, query=query) + def _fetch_token(self, site, url): + if self._TOKENS.get(site): + return + email, password = self._get_login_info(netrc_machine=site) + if email: + self.report_warning('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies']) + + cookies = self._get_cookies(url) + if cookies and cookies.get('token'): + self._TOKENS[site] = self._search_regex(r'22authorizationToken\%22:\%22([^\%]+)\%22', cookies['token'].value, 'token') + if not self._TOKENS.get(site): + self.raise_login_required('Cookies (not necessarily logged in) are needed to download from this website', method='cookies') + + def _call_api(self, site, path, video_id, url, query): + self._fetch_token(site, url) + try: + return self._download_json( + self._API_BASE + path, video_id, headers={'Authorization': self._TOKENS.get(site)}, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + webpage = e.cause.read().decode() + try: + error_message = traverse_obj(json.loads(webpage), 'errorMessage', 'message') + except json.JSONDecodeError: + raise ExtractorError(f'{site} said: {webpage}', cause=e.cause) + if error_message: + if 'has not purchased' in error_message: + self.raise_login_required(method='cookies') + raise ExtractorError(error_message, expected=True) + raise class ViewLiftEmbedIE(ViewLiftBaseIE): @@ -81,6 +92,81 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): }, { 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017', 'only_matching': True, + }, { # Free film with langauge code + 'url': 'https://www.hoichoi.tv/bn/films/title/shuyopoka', + 'info_dict': { + 'id': '7a7a9d33-1f4c-4771-9173-ee4fb6dbf196', + 'ext': 'mp4', + 'title': 'Shuyopoka', + 'description': 'md5:e28f2fb8680096a69c944d37c1fa5ffc', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211006', + 'series': None + }, + 'params': {'skip_download': True}, + }, { # Free film + 'url': 'https://www.hoichoi.tv/films/title/dadu-no1', + 'info_dict': { + 'id': '0000015b-b009-d126-a1db-b81ff3780000', + 'ext': 'mp4', + 'title': 'Dadu No.1', + 'description': 'md5:605cba408e51a79dafcb824bdeded51e', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20210827', + 'series': None + }, + 'params': {'skip_download': True}, + }, { # Free episode + 'url': 'https://www.hoichoi.tv/webseries/case-jaundice-s01-e01', + 'info_dict': { + 'id': 'f779e07c-30c8-459c-8612-5a834ab5e5ba', + 'ext': 'mp4', + 'title': 'Humans Vs. Corona', + 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20210830', + 'series': 'Case Jaundice' + }, + 'params': {'skip_download': True}, + }, { # Free video + 'url': 'https://www.hoichoi.tv/videos/1549072415320-six-episode-02-hindi', + 'info_dict': { + 'id': 'b41fa1ce-aca6-47b6-b208-283ff0a2de30', + 'ext': 'mp4', + 'title': 'Woman in red - Hindi', + 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211006', + 'series': 'Six (Hindi)' + }, + 'params': {'skip_download': True}, + }, { # Free episode + 'url': 'https://www.hoichoi.tv/shows/watch-asian-paints-moner-thikana-online-season-1-episode-1', + 'info_dict': { + 'id': '1f45d185-8500-455c-b88d-13252307c3eb', + 'ext': 'mp4', + 'title': 'Jisshu Sengupta', + 'description': 'md5:ef6ffae01a3d83438597367400f824ed', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211004', + 'series': 'Asian Paints Moner Thikana' + }, + 'params': {'skip_download': True}, + }, { # Free series + 'url': 'https://www.hoichoi.tv/shows/watch-moner-thikana-bengali-web-series-online', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'watch-moner-thikana-bengali-web-series-online', + }, + }, { # Premium series + 'url': 'https://www.hoichoi.tv/shows/watch-byomkesh-bengali-web-series-online', + 'playlist_mincount': 14, + 'info_dict': { + 'id': 'watch-byomkesh-bengali-web-series-online', + }, + }, { # Premium movie + 'url': 'https://www.hoichoi.tv/movies/detective-2020', + 'only_matching': True }] @staticmethod @@ -96,27 +182,24 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): site = domain.split('.')[-2] if site in self._SITE_MAP: site = self._SITE_MAP[site] - try: - content_data = self._call_api( - site, 'entitlement/video/status', film_id, { - 'id': film_id - })['video'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage') - if error_message == 'User does not have a valid subscription or has not purchased this content.': - self.raise_login_required() - raise ExtractorError(error_message, expected=True) - raise + + content_data = self._call_api( + site, 'entitlement/video/status', film_id, url, { + 'id': film_id + })['video'] gist = content_data['gist'] title = gist['title'] video_assets = content_data['streamingInfo']['videoAssets'] - formats = [] - mpeg_video_assets = video_assets.get('mpeg') or [] - for video_asset in mpeg_video_assets: + hls_url = video_assets.get('hls') + formats, subtitles = [], {} + if hls_url: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + + for video_asset in video_assets.get('mpeg') or []: video_asset_url = video_asset.get('url') - if not video_asset: + if not video_asset_url: continue bitrate = int_or_none(video_asset.get('bitrate')) height = int_or_none(self._search_regex( @@ -130,13 +213,17 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): 'vcodec': video_asset.get('codec'), }) - hls_url = video_assets.get('hls') - if hls_url: - formats.extend(self._extract_m3u8_formats( - hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) + subs = {} + for sub in traverse_obj(content_data, ('contentDetails', 'closedCaptions')) or []: + sub_url = sub.get('url') + if not sub_url: + continue + subs.setdefault(sub.get('language', 'English'), []).append({ + 'url': sub_url, + }) - info = { + self._sort_formats(formats) + return { 'id': film_id, 'title': title, 'description': gist.get('description'), @@ -145,14 +232,15 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): 'age_limit': parse_age_limit(content_data.get('parentalRating')), 'timestamp': int_or_none(gist.get('publishDate'), 1000), 'formats': formats, + 'subtitles': self._merge_subtitles(subs, subtitles), + 'categories': traverse_obj(content_data, ('categories', ..., 'title')), + 'tags': traverse_obj(content_data, ('tags', ..., 'title')), } - for k in ('categories', 'tags'): - info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')] - return info class ViewLiftIE(ViewLiftBaseIE): IE_NAME = 'viewlift' + _API_BASE = 'https://prod-api-cached-2.viewlift.com/' _VALID_URL = r'https?://(?:www\.)?(?P%s)(?P(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX _TESTS = [{ 'url': 'http://www.snagfilms.com/films/title/lost_for_life', @@ -228,18 +316,30 @@ class ViewLiftIE(ViewLiftBaseIE): def suitable(cls, url): return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url) + def _show_entries(self, domain, seasons): + for season in seasons: + for episode in season.get('episodes') or []: + path = traverse_obj(episode, ('gist', 'permalink')) + if path: + yield self.url_result(f'https://www.{domain}{path}', ie=self.ie_key()) + def _real_extract(self, url): domain, path, display_id = self._match_valid_url(url).groups() site = domain.split('.')[-2] if site in self._SITE_MAP: site = self._SITE_MAP[site] modules = self._call_api( - site, 'content/pages', display_id, { + site, 'content/pages', display_id, url, { 'includeContent': 'true', 'moduleOffset': 1, 'path': path, 'site': site, })['modules'] + + seasons = next((m['contentData'][0]['seasons'] for m in modules if m.get('moduleType') == 'ShowDetailModule'), None) + if seasons: + return self.playlist_result(self._show_entries(domain, seasons), display_id) + film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule') return { '_type': 'url_transparent', From 16b0d7e621c2fb4dc23e88f9b3e1a7b61cf5c60e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 28 Oct 2021 02:07:15 +0530 Subject: [PATCH 0141/2552] [utils] Add `jwt_decode_hs256` Code from #1340 Authored by: Ashish0804 --- yt_dlp/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 48baa6503..080bf260a 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6512,6 +6512,13 @@ def jwt_encode_hs256(payload_data, key, headers={}): return token +# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256 +def jwt_decode_hs256(jwt): + header_b64, payload_b64, signature_b64 = jwt.split('.') + payload_data = json.loads(base64.urlsafe_b64decode(payload_b64)) + return payload_data + + def supports_terminal_sequences(stream): if compat_os_name == 'nt': if get_windows_version() < (10, 0, 10586): From ab630a57b9df229fa150a7eaa19ff51750597fbc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 28 Oct 2021 02:14:33 +0530 Subject: [PATCH 0142/2552] [viewlift] Fix typo in 5be76d1ab7fed65a5894b221c7b7f896a18fc820 --- yt_dlp/extractor/viewlift.py | 150 +++++++++++++++++------------------ 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index ca53a1736..5b558d890 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -92,81 +92,6 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): }, { 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017', 'only_matching': True, - }, { # Free film with langauge code - 'url': 'https://www.hoichoi.tv/bn/films/title/shuyopoka', - 'info_dict': { - 'id': '7a7a9d33-1f4c-4771-9173-ee4fb6dbf196', - 'ext': 'mp4', - 'title': 'Shuyopoka', - 'description': 'md5:e28f2fb8680096a69c944d37c1fa5ffc', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20211006', - 'series': None - }, - 'params': {'skip_download': True}, - }, { # Free film - 'url': 'https://www.hoichoi.tv/films/title/dadu-no1', - 'info_dict': { - 'id': '0000015b-b009-d126-a1db-b81ff3780000', - 'ext': 'mp4', - 'title': 'Dadu No.1', - 'description': 'md5:605cba408e51a79dafcb824bdeded51e', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20210827', - 'series': None - }, - 'params': {'skip_download': True}, - }, { # Free episode - 'url': 'https://www.hoichoi.tv/webseries/case-jaundice-s01-e01', - 'info_dict': { - 'id': 'f779e07c-30c8-459c-8612-5a834ab5e5ba', - 'ext': 'mp4', - 'title': 'Humans Vs. Corona', - 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20210830', - 'series': 'Case Jaundice' - }, - 'params': {'skip_download': True}, - }, { # Free video - 'url': 'https://www.hoichoi.tv/videos/1549072415320-six-episode-02-hindi', - 'info_dict': { - 'id': 'b41fa1ce-aca6-47b6-b208-283ff0a2de30', - 'ext': 'mp4', - 'title': 'Woman in red - Hindi', - 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20211006', - 'series': 'Six (Hindi)' - }, - 'params': {'skip_download': True}, - }, { # Free episode - 'url': 'https://www.hoichoi.tv/shows/watch-asian-paints-moner-thikana-online-season-1-episode-1', - 'info_dict': { - 'id': '1f45d185-8500-455c-b88d-13252307c3eb', - 'ext': 'mp4', - 'title': 'Jisshu Sengupta', - 'description': 'md5:ef6ffae01a3d83438597367400f824ed', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20211004', - 'series': 'Asian Paints Moner Thikana' - }, - 'params': {'skip_download': True}, - }, { # Free series - 'url': 'https://www.hoichoi.tv/shows/watch-moner-thikana-bengali-web-series-online', - 'playlist_mincount': 5, - 'info_dict': { - 'id': 'watch-moner-thikana-bengali-web-series-online', - }, - }, { # Premium series - 'url': 'https://www.hoichoi.tv/shows/watch-byomkesh-bengali-web-series-online', - 'playlist_mincount': 14, - 'info_dict': { - 'id': 'watch-byomkesh-bengali-web-series-online', - }, - }, { # Premium movie - 'url': 'https://www.hoichoi.tv/movies/detective-2020', - 'only_matching': True }] @staticmethod @@ -310,6 +235,81 @@ class ViewLiftIE(ViewLiftBaseIE): }, { 'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters', 'only_matching': True, + }, { # Free film with langauge code + 'url': 'https://www.hoichoi.tv/bn/films/title/shuyopoka', + 'info_dict': { + 'id': '7a7a9d33-1f4c-4771-9173-ee4fb6dbf196', + 'ext': 'mp4', + 'title': 'Shuyopoka', + 'description': 'md5:e28f2fb8680096a69c944d37c1fa5ffc', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211006', + 'series': None + }, + 'params': {'skip_download': True}, + }, { # Free film + 'url': 'https://www.hoichoi.tv/films/title/dadu-no1', + 'info_dict': { + 'id': '0000015b-b009-d126-a1db-b81ff3780000', + 'ext': 'mp4', + 'title': 'Dadu No.1', + 'description': 'md5:605cba408e51a79dafcb824bdeded51e', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20210827', + 'series': None + }, + 'params': {'skip_download': True}, + }, { # Free episode + 'url': 'https://www.hoichoi.tv/webseries/case-jaundice-s01-e01', + 'info_dict': { + 'id': 'f779e07c-30c8-459c-8612-5a834ab5e5ba', + 'ext': 'mp4', + 'title': 'Humans Vs. Corona', + 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20210830', + 'series': 'Case Jaundice' + }, + 'params': {'skip_download': True}, + }, { # Free video + 'url': 'https://www.hoichoi.tv/videos/1549072415320-six-episode-02-hindi', + 'info_dict': { + 'id': 'b41fa1ce-aca6-47b6-b208-283ff0a2de30', + 'ext': 'mp4', + 'title': 'Woman in red - Hindi', + 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211006', + 'series': 'Six (Hindi)' + }, + 'params': {'skip_download': True}, + }, { # Free episode + 'url': 'https://www.hoichoi.tv/shows/watch-asian-paints-moner-thikana-online-season-1-episode-1', + 'info_dict': { + 'id': '1f45d185-8500-455c-b88d-13252307c3eb', + 'ext': 'mp4', + 'title': 'Jisshu Sengupta', + 'description': 'md5:ef6ffae01a3d83438597367400f824ed', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211004', + 'series': 'Asian Paints Moner Thikana' + }, + 'params': {'skip_download': True}, + }, { # Free series + 'url': 'https://www.hoichoi.tv/shows/watch-moner-thikana-bengali-web-series-online', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'watch-moner-thikana-bengali-web-series-online', + }, + }, { # Premium series + 'url': 'https://www.hoichoi.tv/shows/watch-byomkesh-bengali-web-series-online', + 'playlist_mincount': 14, + 'info_dict': { + 'id': 'watch-byomkesh-bengali-web-series-online', + }, + }, { # Premium movie + 'url': 'https://www.hoichoi.tv/movies/detective-2020', + 'only_matching': True }] @classmethod From 3783b5f1d13380f9472bcbdca192aff349c01b17 Mon Sep 17 00:00:00 2001 From: ajj8 <35781586+ajj8@users.noreply.github.com> Date: Thu, 28 Oct 2021 11:57:09 +0100 Subject: [PATCH 0143/2552] [itv] Add support for ITV News (#1456) Authored by: ajj8 --- yt_dlp/extractor/itv.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index d69782b78..6e6a3673c 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -220,16 +220,23 @@ class ITVIE(InfoExtractor): class ITVBTCCIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P[^/?#&]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:news|btcc)/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action', 'info_dict': { 'id': 'btcc-2019-brands-hatch-gp-race-action', 'title': 'BTCC 2019: Brands Hatch GP race action', }, 'playlist_count': 12, - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s' + }, { + 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', + 'info_dict': { + 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', + 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32' + }, + 'playlist_count': 4 + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' def _real_extract(self, url): playlist_id = self._match_id(url) @@ -240,15 +247,15 @@ class ITVBTCCIE(InfoExtractor): '(?s)]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)', webpage, 'json_map'), playlist_id), lambda x: x['props']['pageProps']['article']['body']['content']) or [] - # Discard empty objects - video_ids = [] + entries = [] for video in json_map: - if video['data'].get('id'): - video_ids.append(video['data']['id']) - - entries = [ - self.url_result( - smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, { + if not any(video['data'].get(attr) == 'Brightcove' for attr in ('name', 'type')): + continue + video_id = video['data']['id'] + account_id = video['data']['accountId'] + player_id = video['data']['playerId'] + entries.append(self.url_result( + smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), { # ITV does not like some GB IP ranges, so here are some # IP blocks it accepts 'geo_ip_blocks': [ @@ -256,8 +263,7 @@ class ITVBTCCIE(InfoExtractor): ], 'referrer': url, }), - ie=BrightcoveNewIE.ie_key(), video_id=video_id) - for video_id in video_ids] + ie=BrightcoveNewIE.ie_key(), video_id=video_id)) title = self._og_search_title(webpage, fatal=False) From 7b5f3f7c3d87d1bb711f6a76007a352a851e80ca Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 28 Oct 2021 23:48:09 +0530 Subject: [PATCH 0144/2552] [MLSScoccer] Add extractor (#1452) Authored by: Ashish0804 Closes #1451 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/mlssoccer.py | 118 +++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 yt_dlp/extractor/mlssoccer.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 21c71a835..1c5743604 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -793,6 +793,7 @@ from .mlb import ( MLBIE, MLBVideoIE, ) +from .mlssoccer import MLSSoccerIE from .mnet import MnetIE from .moevideo import MoeVideoIE from .mofosex import ( diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py new file mode 100644 index 000000000..2d65787e2 --- /dev/null +++ b/yt_dlp/extractor/mlssoccer.py @@ -0,0 +1,118 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class MLSSoccerIE(InfoExtractor): + _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)' + _VALID_URL = r'(?:https?://)(?:www\.)?%s/video/#?(?P[^/&$#?]+)' % _VALID_DOMAINS + + _TESTS = [{ + 'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986', + 'info_dict': { + 'id': '6276033198001', + 'ext': 'mp4', + 'title': 'The Octagon | Can Alphonso Davies lead Canada to first World Cup since 1986?', + 'description': 'md5:f0a883ee33592a0221798f451a98be8f', + 'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/static/5530036772001/1bbc44f6-c63c-4981-82fa-46b0c1f891e0/5c1ca44a-a033-4e98-b531-ff24c4947608/160x90/match/image.jpg', + 'duration': 350.165, + 'timestamp': 1633627291, + 'uploader_id': '5530036772001', + 'tags': ['club/canada'], + 'is_live': False, + 'duration_string': '5:50', + 'upload_date': '20211007', + 'filesize_approx': 255193528.83200002 + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.whitecapsfc.com/video/highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021#highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021', + 'only_matching': True + }, { + 'url': 'https://www.torontofc.ca/video/highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733#highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733', + 'only_matching': True + }, { + 'url': 'https://www.sportingkc.com/video/post-match-press-conference-john-pulskamp-oct-27-2021#post-match-press-conference-john-pulskamp-oct-27-2021', + 'only_matching': True + }, { + 'url': 'https://www.soundersfc.com/video/highlights-seattle-sounders-fc-vs-sporting-kansas-city-october-23-2021', + 'only_matching': True + }, { + 'url': 'https://www.sjearthquakes.com/video/#highlights-austin-fc-vs-san-jose-earthquakes-june-19-2021', + 'only_matching': True + }, { + 'url': 'https://www.rsl.com/video/2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21#2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21', + 'only_matching': True + }, { + 'url': 'https://www.timbers.com/video/highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose#highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose', + 'only_matching': True + }, { + 'url': 'https://www.philadelphiaunion.com/video/highlights-torvphi', + 'only_matching': True + }, { + 'url': 'https://www.orlandocitysc.com/video/highlight-columbus-crew-vs-orlando-city-sc', + 'only_matching': True + }, { + 'url': 'https://www.newyorkredbulls.com/video/all-access-matchday-double-derby-week#all-access-matchday-double-derby-week', + 'only_matching': True + }, { + 'url': 'https://www.nycfc.com/video/highlights-nycfc-1-0-chicago-fire-fc#highlights-nycfc-1-0-chicago-fire-fc', + 'only_matching': True + }, { + 'url': 'https://www.revolutionsoccer.net/video/two-minute-highlights-revs-1-rapids-0-october-27-2021#two-minute-highlights-revs-1-rapids-0-october-27-2021', + 'only_matching': True + }, { + 'url': 'https://www.nashvillesc.com/video/goal-c-j-sapong-nashville-sc-92nd-minute', + 'only_matching': True + }, { + 'url': 'https://www.cfmontreal.com/video/faits-saillants-tor-v-mtl#faits-saillants-orl-v-mtl-x5645', + 'only_matching': True + }, { + 'url': 'https://www.intermiamicf.com/video/all-access-victory-vs-nashville-sc-by-ukg#all-access-victory-vs-nashville-sc-by-ukg', + 'only_matching': True + }, { + 'url': 'https://www.lagalaxy.com/video/#moment-of-the-month-presented-by-san-manuel-casino-rayan-raveloson-scores-his-se', + 'only_matching': True + }, { + 'url': 'https://www.lafc.com/video/breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season#breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season', + 'only_matching': True + }, { + 'url': 'https://www.houstondynamofc.com/video/postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660#postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660', + 'only_matching': True + }, { + 'url': 'https://www.dcunited.com/video/tony-alfaro-my-family-pushed-me-to-believe-everything-was-possible', + 'only_matching': True + }, { + 'url': 'https://www.fcdallas.com/video/highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021#highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021', + 'only_matching': True + }, { + 'url': 'https://www.columbuscrew.com/video/match-rewind-columbus-crew-vs-new-york-red-bulls-october-23-2021', + 'only_matching': True + }, { + 'url': 'https://www.coloradorapids.com/video/postgame-reaction-robin-fraser-october-27#postgame-reaction-robin-fraser-october-27', + 'only_matching': True + }, { + 'url': 'https://www.fccincinnati.com/video/#keeping-cincy-chill-presented-by-coors-lite', + 'only_matching': True + }, { + 'url': 'https://www.chicagofirefc.com/video/all-access-fire-score-dramatic-road-win-in-cincy#all-access-fire-score-dramatic-road-win-in-cincy', + 'only_matching': True + }, { + 'url': 'https://www.austinfc.com/video/highlights-colorado-rapids-vs-austin-fc-september-29-2021#highlights-colorado-rapids-vs-austin-fc-september-29-2021', + 'only_matching': True + }, { + 'url': 'https://www.atlutd.com/video/goal-josef-martinez-scores-in-the-73rd-minute#goal-josef-martinez-scores-in-the-73rd-minute', + 'only_matching': True + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + data_json = self._parse_json(self._html_search_regex(r'data-options\=\"([^\"]+)\"', webpage, 'json'), id)['videoList'][0] + return { + 'id': id, + '_type': 'url', + 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (data_json['accountId'], data_json['videoId']), + 'ie_key': 'BrightcoveNew', + } From aeaf3b2b92bc4ab8b6f5d90c053aa43d93ab64e1 Mon Sep 17 00:00:00 2001 From: Luc Ritchie Date: Fri, 29 Oct 2021 14:17:10 -0400 Subject: [PATCH 0145/2552] [Coub] Fix media format identification (#1469) Authored by: wlritchi --- yt_dlp/extractor/coub.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/coub.py b/yt_dlp/extractor/coub.py index eba6b73ba..e90aa1954 100644 --- a/yt_dlp/extractor/coub.py +++ b/yt_dlp/extractor/coub.py @@ -57,7 +57,7 @@ class CoubIE(InfoExtractor): file_versions = coub['file_versions'] - QUALITIES = ('low', 'med', 'high') + QUALITIES = ('low', 'med', 'high', 'higher') MOBILE = 'mobile' IPHONE = 'iphone' @@ -86,6 +86,7 @@ class CoubIE(InfoExtractor): 'format_id': '%s-%s-%s' % (HTML5, kind, quality), 'filesize': int_or_none(item.get('size')), 'vcodec': 'none' if kind == 'audio' else None, + 'acodec': 'none' if kind == 'video' else None, 'quality': quality_key(quality), 'source_preference': preference_key(HTML5), }) From e6ff66efc0dcacbfbca4402e717a182c8f6b4e85 Mon Sep 17 00:00:00 2001 From: nixxo Date: Fri, 29 Oct 2021 21:39:55 +0200 Subject: [PATCH 0146/2552] [mediaset] Add playlist support (#1463) Closes #1372 Authored by: nixxo --- yt_dlp/extractor/extractors.py | 5 ++- yt_dlp/extractor/mediaset.py | 82 ++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1c5743604..9d963ee46 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -744,7 +744,10 @@ from .mdr import MDRIE from .medaltv import MedalTVIE from .mediaite import MediaiteIE from .mediaklikk import MediaKlikkIE -from .mediaset import MediasetIE +from .mediaset import ( + MediasetIE, + MediasetShowIE, +) from .mediasite import ( MediasiteIE, MediasiteCatalogIE, diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index 26e7abc49..119b39997 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -1,13 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import re from .theplatform import ThePlatformBaseIE from ..utils import ( ExtractorError, int_or_none, + OnDemandPagedList, parse_qs, + try_get, + urljoin, update_url_query, ) @@ -212,3 +216,81 @@ class MediasetIE(ThePlatformBaseIE): 'subtitles': subtitles, }) return info + + +class MediasetShowIE(MediasetIE): + _VALID_URL = r'''(?x) + (?: + https?:// + (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/ + (?: + (?:fiction|programmi-tv|serie-tv)/(?:.+?/)? + (?:[a-z]+)_SE(?P\d{12}) + (?:,ST(?P\d{12}))? + (?:,sb(?P\d{9}))?$ + ) + ) + ''' + _TESTS = [{ + # TV Show webpage (with a single playlist) + 'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556', + 'info_dict': { + 'id': '000000001556', + 'title': 'Fire Force', + }, + 'playlist_count': 1, + }, { + # TV Show webpage (with multiple playlists) + 'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763', + 'info_dict': { + 'id': '000000002763', + 'title': 'Le Iene', + }, + 'playlist_count': 7, + }, { + # TV Show specific playlist (single page) + 'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556,ST000000002738,sb100013107', + 'info_dict': { + 'id': '100013107', + 'title': 'Episodi', + }, + 'playlist_count': 4, + }, { + # TV Show specific playlist (with multiple pages) + 'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375', + 'info_dict': { + 'id': '100013375', + 'title': 'I servizi', + }, + 'playlist_count': 53, + }] + + _BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished|desc,tvSeasonEpisodeNumber|desc&range=%d-%d' + _PAGE_SIZE = 25 + + def _fetch_page(self, sb, page): + lower_limit = page * self._PAGE_SIZE + 1 + upper_limit = lower_limit + self._PAGE_SIZE - 1 + content = self._download_json( + self._BY_SUBBRAND % (sb, lower_limit, upper_limit), sb) + for entry in content.get('entries') or []: + yield self.url_result( + 'mediaset:' + entry['guid'], + playlist_title=entry['mediasetprogram$subBrandDescription']) + + def _real_extract(self, url): + playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb') + if not sb: + page = self._download_webpage(url, playlist_id) + entries = [self.url_result(urljoin('https://www.mediasetplay.mediaset.it', url)) + for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)] + title = (self._html_search_regex(r'(?s)]*>(.+?)

', page, 'title', default=None) + or self._og_search_title(page)) + return self.playlist_result(entries, st or playlist_id, title) + + entries = OnDemandPagedList( + functools.partial(self._fetch_page, sb), + self._PAGE_SIZE) + title = try_get(entries, lambda x: x[0]['playlist_title']) + + return self.playlist_result(entries, sb, title) From 10beccc980ea04913603b802d06ffaebc011cfc8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 30 Oct 2021 01:13:51 +0530 Subject: [PATCH 0147/2552] [FormatSort] Fix some fields' defaults Closes #1479 --- yt_dlp/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 369cff418..aa98c0cc9 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1549,8 +1549,8 @@ class InfoExtractor(object): 'ie_pref': {'priority': True, 'type': 'extractor'}, 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}, 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}, - 'lang': {'convert': 'ignore', 'field': 'language_preference'}, - 'quality': {'convert': 'float_none', 'default': -1}, + 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1}, + 'quality': {'convert': 'float', 'default': -1}, 'filesize': {'convert': 'bytes'}, 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}, 'id': {'convert': 'string', 'field': 'format_id'}, @@ -1561,7 +1561,7 @@ class InfoExtractor(object): 'vbr': {'convert': 'float_none'}, 'abr': {'convert': 'float_none'}, 'asr': {'convert': 'float_none'}, - 'source': {'convert': 'ignore', 'field': 'source_preference'}, + 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1}, 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}, 'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True}, From 5e7bbac3057e06bb0d5d8cb3cfd5f607d5cf8459 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 30 Oct 2021 01:53:28 +0530 Subject: [PATCH 0148/2552] [generic] parse jwplayer with only the json URL Closes #1476 --- yt_dlp/extractor/generic.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 5918c8c56..ffcf9b303 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1188,6 +1188,21 @@ class GenericIE(InfoExtractor): }, 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/', }, + # jwplayer with only the json URL + { + 'url': 'https://www.hollywoodreporter.com/news/general-news/dunkirk-team-reveals-what-christopher-nolan-said-oscar-win-meet-your-oscar-winner-1092454', + 'info_dict': { + 'id': 'TljWkvWH', + 'ext': 'mp4', + 'upload_date': '20180306', + 'title': 'md5:91eb1862f6526415214f62c00b453936', + 'description': 'md5:73048ae50ae953da10549d1d2fe9b3aa', + 'timestamp': 1520367225, + }, + 'params': { + 'skip_download': True, + }, + }, # Complex jwplayer { 'url': 'http://www.indiedb.com/games/king-machine/videos', @@ -3503,6 +3518,13 @@ class GenericIE(InfoExtractor): jwplayer_data = self._find_jwplayer_data( webpage, video_id, transform_source=js_to_json) if jwplayer_data: + if isinstance(jwplayer_data.get('playlist'), str): + return { + **info_dict, + '_type': 'url', + 'ie_key': JWPlatformIE.ie_key(), + 'url': jwplayer_data['playlist'], + } try: info = self._parse_jwplayer_data( jwplayer_data, video_id, require_title=False, base_url=url) From fa0b816e379b79abc3f4e64bd8d750fc99e40775 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 30 Oct 2021 02:03:53 +0530 Subject: [PATCH 0149/2552] [generic] Detect more json_ld Closes #1475 --- yt_dlp/extractor/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index ffcf9b303..0d279016b 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -3583,8 +3583,7 @@ class GenericIE(InfoExtractor): return info_dict # Looking for http://schema.org/VideoObject - json_ld = self._search_json_ld( - webpage, video_id, default={}, expected_type='VideoObject') + json_ld = self._search_json_ld(webpage, video_id, default={}) if json_ld.get('url'): return merge_dicts(json_ld, info_dict) From 6b301aaa34545b217fdcc276a65f683de518cbf4 Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Fri, 29 Oct 2021 21:18:59 -0400 Subject: [PATCH 0150/2552] [mtv] Fix some videos (#1453) Partial fix for #713 Authored by: Sipherdrakon --- yt_dlp/extractor/mtv.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index e0608845d..141dd7deb 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -305,6 +305,14 @@ class MTVServicesInfoExtractor(InfoExtractor): if not mgid: mgid = self._extract_triforce_mgid(webpage) + if not mgid: + mgid = self._search_regex( + r'"videoConfig":{"videoId":"(mgid:.*?)"', webpage, 'mgid', default=None) + + if not mgid: + mgid = self._search_regex( + r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None) + if not mgid: data = self._parse_json(self._search_regex( r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None) @@ -313,10 +321,6 @@ class MTVServicesInfoExtractor(InfoExtractor): video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer') mgid = video_player['props']['media']['video']['config']['uri'] - if not mgid: - mgid = self._search_regex( - r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None) - return mgid def _real_extract(self, url): From 652fb0d446524af4b783276babd55f5fc6a3afeb Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 30 Oct 2021 23:26:00 +0530 Subject: [PATCH 0151/2552] [VLive] Add upload_date and thumbnail (#1486) Closes #1472 Authored by: Ashish0804 --- yt_dlp/extractor/naver.py | 7 ++++--- yt_dlp/extractor/vlive.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index acf53c1ff..a6821ba86 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -40,6 +40,7 @@ class NaverBaseIE(InfoExtractor): formats.append({ 'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))), 'url': stream_url, + 'ext': 'mp4', 'width': int_or_none(encoding_option.get('width')), 'height': int_or_none(encoding_option.get('height')), 'vbr': int_or_none(bitrate.get('video')), @@ -174,7 +175,7 @@ class NaverLiveIE(InfoExtractor): 'url': 'https://tv.naver.com/l/52010', 'info_dict': { 'id': '52010', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': '[LIVE] 뉴스특보 : "수도권 거리두기, 2주간 2단계로 조정"', 'description': 'md5:df7f0c237a5ed5e786ce5c91efbeaab3', 'channel_id': 'NTV-ytnnews24-0', @@ -184,7 +185,7 @@ class NaverLiveIE(InfoExtractor): 'url': 'https://tv.naver.com/l/51549', 'info_dict': { 'id': '51549', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': '연합뉴스TV - 코로나19 뉴스특보', 'description': 'md5:c655e82091bc21e413f549c0eaccc481', 'channel_id': 'NTV-yonhapnewstv-0', @@ -233,7 +234,7 @@ class NaverLiveIE(InfoExtractor): continue formats.extend(self._extract_m3u8_formats( - quality.get('url'), video_id, 'm3u8', + quality.get('url'), video_id, 'mp4', m3u8_id=quality.get('qualityId'), live=True )) self._sort_formats(formats) diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index 681d95902..4340b1d4c 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -86,6 +86,12 @@ class VLiveIE(VLiveBaseIE): 'creator': "Girl's Day", 'view_count': int, 'uploader_id': 'muploader_a', + 'upload_date': '20150817', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'timestamp': 1439816449, + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://www.vlive.tv/video/16937', @@ -97,6 +103,9 @@ class VLiveIE(VLiveBaseIE): 'view_count': int, 'subtitles': 'mincount:12', 'uploader_id': 'muploader_j', + 'upload_date': '20161112', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'timestamp': 1478923074, }, 'params': { 'skip_download': True, @@ -173,6 +182,8 @@ class VLiveIE(VLiveBaseIE): 'view_count': int_or_none(video.get('playCount')), 'like_count': int_or_none(video.get('likeCount')), 'comment_count': int_or_none(video.get('commentCount')), + 'timestamp': int_or_none(video.get('createdAt'), scale=1000), + 'thumbnail': video.get('thumb'), } video_type = video.get('type') From cd9ea4104b8b5075ea4bfe92c76130e267686805 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 02:54:39 +0000 Subject: [PATCH 0152/2552] [instagram] Add more formats when logged in (#1487) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index ccfcddd5b..8c935c251 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -222,8 +222,8 @@ class InstagramIE(InfoExtractor): dict) if media: video_url = media.get('video_url') - height = int_or_none(media.get('dimensions', {}).get('height')) - width = int_or_none(media.get('dimensions', {}).get('width')) + height = try_get(media, lambda x: x['dimensions']['height']) + width = try_get(media, lambda x: x['dimensions']['width']) description = try_get( media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], compat_str) or media.get('caption') @@ -231,8 +231,8 @@ class InstagramIE(InfoExtractor): thumbnail = media.get('display_src') or media.get('display_url') duration = float_or_none(media.get('video_duration')) timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) - uploader = media.get('owner', {}).get('full_name') - uploader_id = media.get('owner', {}).get('username') + uploader = try_get(media, lambda x: x['owner']['full_name']) + uploader_id = try_get(media, lambda x: x['owner']['username']) def get_count(keys, kind): for key in variadic(keys): @@ -294,6 +294,10 @@ class InstagramIE(InfoExtractor): 'width': width, 'height': height, }] + dash = try_get(media, lambda x: x['dash_info']['video_dash_manifest']) + if dash: + formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) + self._sort_formats(formats) if not uploader_id: uploader_id = self._search_regex( From 404f611f1c4aa516fbc4301aa7b8f734ee4bc67b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 09:53:58 +0530 Subject: [PATCH 0153/2552] [youtube] Fix throttling by decrypting n-sig (#1437) --- .gitignore | 1 + test/test_jsinterp.py | 50 ++++ test/test_youtube_signature.py | 70 +++-- yt_dlp/extractor/youtube.py | 91 ++++-- yt_dlp/jsinterp.py | 488 +++++++++++++++++++++++++-------- 5 files changed, 550 insertions(+), 150 deletions(-) diff --git a/.gitignore b/.gitignore index bf06c81f0..790989b3c 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ cookies *.webp *.annotations.xml *.description +.cache/ # Allow config/media files in testdata !test/** diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 8b2b60403..380e52c33 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -112,6 +112,56 @@ class TestJSInterpreter(unittest.TestCase): ''') self.assertEqual(jsi.call_function('z'), 5) + def test_for_loop(self): + jsi = JSInterpreter(''' + function x() { a=0; for (i=0; i-10; i++) {a++} a } + ''') + self.assertEqual(jsi.call_function('x'), 10) + + def test_switch(self): + jsi = JSInterpreter(''' + function x(f) { switch(f){ + case 1:f+=1; + case 2:f+=2; + case 3:f+=3;break; + case 4:f+=4; + default:f=0; + } return f } + ''') + self.assertEqual(jsi.call_function('x', 1), 7) + self.assertEqual(jsi.call_function('x', 3), 6) + self.assertEqual(jsi.call_function('x', 5), 0) + + def test_try(self): + jsi = JSInterpreter(''' + function x() { try{return 10} catch(e){return 5} } + ''') + self.assertEqual(jsi.call_function('x'), 10) + + def test_for_loop_continue(self): + jsi = JSInterpreter(''' + function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a } + ''') + self.assertEqual(jsi.call_function('x'), 0) + + def test_for_loop_break(self): + jsi = JSInterpreter(''' + function x() { a=0; for (i=0; i-10; i++) { break; a++ } a } + ''') + self.assertEqual(jsi.call_function('x'), 0) + + def test_literal_list(self): + jsi = JSInterpreter(''' + function x() { [1, 2, "asdf", [5, 6, 7]][3] } + ''') + self.assertEqual(jsi.call_function('x'), [5, 6, 7]) + + def test_comma(self): + jsi = JSInterpreter(''' + function x() { a=5; a -= 1, a+=3; return a } + ''') + self.assertEqual(jsi.call_function('x'), 7) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index dcf6ab60d..f40a06952 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -14,9 +14,10 @@ import string from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE +from yt_dlp.jsinterp import JSInterpreter from yt_dlp.compat import compat_str, compat_urlretrieve -_TESTS = [ +_SIG_TESTS = [ ( 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', 86, @@ -64,6 +65,13 @@ _TESTS = [ ) ] +_NSIG_TESTS = [ + ( + 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', + 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', + ), # TODO: Add more tests +] + @is_download_test class TestPlayerInfo(unittest.TestCase): @@ -97,35 +105,49 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, sig_input, expected_sig): - m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url) - assert m, '%r should follow URL format' % url - test_id = m.group(1) +def t_factory(name, sig_func, url_pattern): + def make_tfunc(url, sig_input, expected_sig): + m = url_pattern.match(url) + assert m, '%r should follow URL format' % url + test_id = m.group('id') + + def test_func(self): + basename = f'player-{name}-{test_id}.js' + fn = os.path.join(self.TESTDATA_DIR, basename) + + if not os.path.exists(fn): + compat_urlretrieve(url, fn) + with io.open(fn, encoding='utf-8') as testf: + jscode = testf.read() + self.assertEqual(sig_func(jscode, sig_input), expected_sig) + + test_func.__name__ = f'test_{name}_js_{test_id}' + setattr(TestSignature, test_func.__name__, test_func) + return make_tfunc + - def test_func(self): - basename = 'player-%s.js' % test_id - fn = os.path.join(self.TESTDATA_DIR, basename) +def signature(jscode, sig_input): + func = YoutubeIE(FakeYDL())._parse_sig_js(jscode) + src_sig = ( + compat_str(string.printable[:sig_input]) + if isinstance(sig_input, int) else sig_input) + return func(src_sig) - if not os.path.exists(fn): - compat_urlretrieve(url, fn) - ydl = FakeYDL() - ie = YoutubeIE(ydl) - with io.open(fn, encoding='utf-8') as testf: - jscode = testf.read() - func = ie._parse_sig_js(jscode) - src_sig = ( - compat_str(string.printable[:sig_input]) - if isinstance(sig_input, int) else sig_input) - got_sig = func(src_sig) - self.assertEqual(got_sig, expected_sig) +def n_sig(jscode, sig_input): + funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) + return JSInterpreter(jscode).call_function(funcname, sig_input) - test_func.__name__ = str('test_signature_js_' + test_id) - setattr(TestSignature, test_func.__name__, test_func) +make_sig_test = t_factory( + 'signature', signature, re.compile(r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) +for test_spec in _SIG_TESTS: + make_sig_test(*test_spec) -for test_spec in _TESTS: - make_tfunc(*test_spec) +make_nsig_test = t_factory( + 'nsig', n_sig, re.compile(r'.+/player/(?P[a-zA-Z0-9_-]+)/.+.js$')) +for test_spec in _NSIG_TESTS: + make_nsig_test(*test_spec) if __name__ == '__main__': diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 658b45fe1..56cd2ed8d 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1720,7 +1720,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError('Cannot identify player %r' % player_url) return id_m.group('id') - def _load_player(self, video_id, player_url, fatal=True) -> bool: + def _load_player(self, video_id, player_url, fatal=True): player_id = self._extract_player_info(player_url) if player_id not in self._code_cache: code = self._download_webpage( @@ -1729,7 +1729,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): errnote='Download of %s failed' % player_url) if code: self._code_cache[player_id] = code - return player_id in self._code_cache + return self._code_cache.get(player_id) def _extract_signature_function(self, video_id, player_url, example_sig): player_id = self._extract_player_info(player_url) @@ -1743,8 +1743,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) - if self._load_player(video_id, player_url): - code = self._code_cache[player_id] + code = self._load_player(video_id, player_url) + if code: res = self._parse_sig_js(code) test_string = ''.join(map(compat_chr, range(len(example_sig)))) @@ -1755,6 +1755,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return res def _print_sig_code(self, func, example_sig): + if not self.get_param('youtube_print_sig_code'): + return + def gen_sig_code(idxs): def _genslice(start, end, step): starts = '' if start == 0 else str(start) @@ -1831,13 +1834,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) self._player_cache[player_id] = func func = self._player_cache[player_id] - if self.get_param('youtube_print_sig_code'): - self._print_sig_code(func, s) + self._print_sig_code(func, s) return func(s) except Exception as e: - tb = traceback.format_exc() - raise ExtractorError( - 'Signature extraction failed: ' + tb, cause=e) + raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e) + + def _decrypt_nsig(self, s, video_id, player_url): + """Turn the encrypted n field into a working signature""" + if player_url is None: + raise ExtractorError('Cannot decrypt nsig without player_url') + if player_url.startswith('//'): + player_url = 'https:' + player_url + elif not re.match(r'https?://', player_url): + player_url = compat_urlparse.urljoin( + 'https://www.youtube.com', player_url) + + sig_id = ('nsig_value', s) + if sig_id in self._player_cache: + return self._player_cache[sig_id] + + try: + player_id = ('nsig', player_url) + if player_id not in self._player_cache: + self._player_cache[player_id] = self._extract_n_function(video_id, player_url) + func = self._player_cache[player_id] + self._player_cache[sig_id] = func(s) + self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}') + return self._player_cache[sig_id] + except Exception as e: + raise ExtractorError(traceback.format_exc(), cause=e) + + def _extract_n_function_name(self, jscode): + return self._search_regex( + (r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',), + jscode, 'Initial JS player n function name', group='nfunc') + + def _extract_n_function(self, video_id, player_url): + player_id = self._extract_player_info(player_url) + func_code = self._downloader.cache.load('youtube-nsig', player_id) + + if func_code: + jsi = JSInterpreter(func_code) + else: + jscode = self._load_player(video_id, player_url) + funcname = self._extract_n_function_name(jscode) + jsi = JSInterpreter(jscode) + func_code = jsi.extract_function_code(funcname) + self._downloader.cache.store('youtube-nsig', player_id, func_code) + + if self.get_param('youtube_print_sig_code'): + self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n') + + return lambda s: jsi.extract_function_from_code(*func_code)([s]) def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): """ @@ -1856,9 +1904,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError(error_msg) self.report_warning(error_msg) return - if self._load_player(video_id, player_url, fatal=fatal): - player_id = self._extract_player_info(player_url) - code = self._code_cache[player_id] + code = self._load_player(video_id, player_url, fatal=fatal) + if code: sts = int_or_none(self._search_regex( r'(?:signatureTimestamp|sts)\s*:\s*(?P[0-9]{5})', code, 'JS player signature timestamp', group='sts', fatal=fatal)) @@ -2440,6 +2487,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' fmt_url += '&' + sp + '=' + signature + query = parse_qs(fmt_url) + throttled = False + if query.get('ratebypass') != ['yes'] and query.get('n'): + try: + fmt_url = update_url_query(fmt_url, { + 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) + except ExtractorError as e: + self.report_warning(f'nsig extraction failed: You may experience throttling for some formats\n{e}', only_once=True) + throttled = True + if itag: itags.append(itag) stream_ids.append(stream_id) @@ -2453,7 +2510,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'format_note': ', '.join(filter(None, ( '%s%s' % (audio_track.get('displayName') or '', ' (default)' if audio_track.get('audioIsDefault') else ''), - fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))), + fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), + throttled and 'THROTTLED'))), + 'source_preference': -10 if not throttled else -1, 'fps': int_or_none(fmt.get('fps')), 'height': height, 'quality': q(quality), @@ -2645,12 +2704,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if reason: self.raise_no_formats(reason, expected=True) - for f in formats: - if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled - f['source_preference'] = -10 - # TODO: this method is not reliable - f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)' - # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang')) diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 7bda59610..5c79a8110 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -1,5 +1,4 @@ -from __future__ import unicode_literals - +from collections.abc import MutableMapping import json import operator import re @@ -22,11 +21,54 @@ _OPERATORS = [ ('*', operator.mul), ] _ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS] -_ASSIGN_OPERATORS.append(('=', lambda cur, right: right)) +_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right))) _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*' +class JS_Break(ExtractorError): + def __init__(self): + ExtractorError.__init__(self, 'Invalid break') + + +class JS_Continue(ExtractorError): + def __init__(self): + ExtractorError.__init__(self, 'Invalid continue') + + +class LocalNameSpace(MutableMapping): + def __init__(self, *stack): + self.stack = tuple(stack) + + def __getitem__(self, key): + for scope in self.stack: + if key in scope: + return scope[key] + raise KeyError(key) + + def __setitem__(self, key, value): + for scope in self.stack: + if key in scope: + scope[key] = value + break + else: + self.stack[0][key] = value + return value + + def __delitem__(self, key): + raise NotImplementedError('Deleting is not supported') + + def __iter__(self): + for scope in self.stack: + yield from scope + + def __len__(self, key): + return len(iter(self)) + + def __repr__(self): + return f'LocalNameSpace{self.stack}' + + class JSInterpreter(object): def __init__(self, code, objects=None): if objects is None: @@ -34,11 +76,58 @@ class JSInterpreter(object): self.code = code self._functions = {} self._objects = objects + self.__named_object_counter = 0 + + def _named_object(self, namespace, obj): + self.__named_object_counter += 1 + name = f'__yt_dlp_jsinterp_obj{self.__named_object_counter}' + namespace[name] = obj + return name + + @staticmethod + def _seperate(expr, delim=',', max_split=None): + if not expr: + return + parens = {'(': 0, '{': 0, '[': 0, ']': 0, '}': 0, ')': 0} + start, splits, pos, max_pos = 0, 0, 0, len(delim) - 1 + for idx, char in enumerate(expr): + if char in parens: + parens[char] += 1 + is_in_parens = (parens['['] - parens[']'] + or parens['('] - parens[')'] + or parens['{'] - parens['}']) + if char == delim[pos] and not is_in_parens: + if pos == max_pos: + pos = 0 + yield expr[start: idx - max_pos] + start = idx + 1 + splits += 1 + if max_split and splits >= max_split: + break + else: + pos += 1 + else: + pos = 0 + yield expr[start:] + + @staticmethod + def _seperate_at_paren(expr, delim): + seperated = list(JSInterpreter._seperate(expr, delim, 1)) + if len(seperated) < 2: + raise ExtractorError(f'No terminating paren {delim} in {expr}') + return seperated[0][1:].strip(), seperated[1].strip() def interpret_statement(self, stmt, local_vars, allow_recursion=100): if allow_recursion < 0: raise ExtractorError('Recursion limit reached') + sub_statements = list(self._seperate(stmt, ';')) + stmt = (sub_statements or ['']).pop() + for sub_stmt in sub_statements: + ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1) + if should_abort: + return ret + should_abort = False stmt = stmt.lstrip() stmt_m = re.match(r'var\s', stmt) @@ -61,25 +150,118 @@ class JSInterpreter(object): if expr == '': # Empty expression return None + if expr.startswith('{'): + inner, outer = self._seperate_at_paren(expr, '}') + inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1) + if not outer or should_abort: + return inner + else: + expr = json.dumps(inner) + outer + if expr.startswith('('): - parens_count = 0 - for m in re.finditer(r'[()]', expr): - if m.group(0) == '(': - parens_count += 1 + inner, outer = self._seperate_at_paren(expr, ')') + inner = self.interpret_expression(inner, local_vars, allow_recursion) + if not outer: + return inner + else: + expr = json.dumps(inner) + outer + + if expr.startswith('['): + inner, outer = self._seperate_at_paren(expr, ']') + name = self._named_object(local_vars, [ + self.interpret_expression(item, local_vars, allow_recursion) + for item in self._seperate(inner)]) + expr = name + outer + + m = re.match(r'try\s*', expr) + if m: + if expr[m.end()] == '{': + try_expr, expr = self._seperate_at_paren(expr[m.end():], '}') + else: + try_expr, expr = expr[m.end() - 1:], '' + ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1) + if should_abort: + return ret + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + m = re.match(r'catch\s*\(', expr) + if m: + # We ignore the catch block + _, expr = self._seperate_at_paren(expr, '}') + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + m = re.match(r'for\s*\(', expr) + if m: + constructor, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') + if remaining.startswith('{'): + body, expr = self._seperate_at_paren(remaining, '}') + else: + m = re.match(r'switch\s*\(', remaining) # FIXME + if m: + switch_val, remaining = self._seperate_at_paren(remaining[m.end() - 1:], ')') + body, expr = self._seperate_at_paren(remaining, '}') + body = 'switch(%s){%s}' % (switch_val, body) else: - parens_count -= 1 - if parens_count == 0: - sub_expr = expr[1:m.start()] - sub_result = self.interpret_expression( - sub_expr, local_vars, allow_recursion) - remaining_expr = expr[m.end():].strip() - if not remaining_expr: - return sub_result - else: - expr = json.dumps(sub_result) + remaining_expr + body, expr = remaining, '' + start, cndn, increment = self._seperate(constructor, ';') + if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]: + raise ExtractorError( + f'Premature return in the initialization of a for loop in {constructor!r}') + while True: + if not self.interpret_expression(cndn, local_vars, allow_recursion): + break + try: + ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion - 1) + if should_abort: + return ret + except JS_Break: + break + except JS_Continue: + pass + if self.interpret_statement(increment, local_vars, allow_recursion - 1)[1]: + raise ExtractorError( + f'Premature return in the initialization of a for loop in {constructor!r}') + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + m = re.match(r'switch\s*\(', expr) + if m: + switch_val, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') + switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) + body, expr = self._seperate_at_paren(remaining, '}') + body, default = body.split('default:') if 'default:' in body else (body, None) + items = body.split('case ')[1:] + if default: + items.append(f'default:{default}') + matched = False + for item in items: + case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] + matched = matched or case == 'default' or switch_val == self.interpret_expression(case, local_vars, allow_recursion) + if matched: + try: + ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1) + if should_abort: + return ret + except JS_Break: break - else: - raise ExtractorError('Premature end of parens in %r' % expr) + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + # Comma seperated statements + sub_expressions = list(self._seperate(expr)) + expr = sub_expressions.pop().strip() if sub_expressions else '' + for sub_expr in sub_expressions: + self.interpret_expression(sub_expr, local_vars, allow_recursion) + + for m in re.finditer(rf'''(?x) + (?P\+\+|--)(?P{_NAME_RE})| + (?P{_NAME_RE})(?P\+\+|--)''', expr): + var = m.group('var1') or m.group('var2') + start, end = m.span() + sign = m.group('pre_sign') or m.group('post_sign') + ret = local_vars[var] + local_vars[var] += 1 if sign[0] == '+' else -1 + if m.group('pre_sign'): + ret = local_vars[var] + expr = expr[:start] + json.dumps(ret) + expr[end:] for op, opfunc in _ASSIGN_OPERATORS: m = re.match(r'''(?x) @@ -88,14 +270,13 @@ class JSInterpreter(object): (?P.*)$''' % (_NAME_RE, re.escape(op)), expr) if not m: continue - right_val = self.interpret_expression( - m.group('expr'), local_vars, allow_recursion - 1) + right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion) if m.groupdict().get('index'): lvar = local_vars[m.group('out')] - idx = self.interpret_expression( - m.group('index'), local_vars, allow_recursion) - assert isinstance(idx, int) + idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) + if not isinstance(idx, int): + raise ExtractorError(f'List indices must be integers: {idx}') cur = lvar[idx] val = opfunc(cur, right_val) lvar[idx] = val @@ -109,8 +290,13 @@ class JSInterpreter(object): if expr.isdigit(): return int(expr) + if expr == 'break': + raise JS_Break() + elif expr == 'continue': + raise JS_Continue() + var_m = re.match( - r'(?!if|return|true|false)(?P%s)$' % _NAME_RE, + r'(?!if|return|true|false|null)(?P%s)$' % _NAME_RE, expr) if var_m: return local_vars[var_m.group('name')] @@ -124,91 +310,154 @@ class JSInterpreter(object): r'(?P%s)\[(?P.+)\]$' % _NAME_RE, expr) if m: val = local_vars[m.group('in')] - idx = self.interpret_expression( - m.group('idx'), local_vars, allow_recursion - 1) + idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion) return val[idx] + for op, opfunc in _OPERATORS: + seperated = list(self._seperate(expr, op)) + if len(seperated) < 2: + continue + right_val = seperated.pop() + left_val = op.join(seperated) + left_val, should_abort = self.interpret_statement( + left_val, local_vars, allow_recursion - 1) + if should_abort: + raise ExtractorError(f'Premature left-side return of {op} in {expr!r}') + right_val, should_abort = self.interpret_statement( + right_val, local_vars, allow_recursion - 1) + if should_abort: + raise ExtractorError(f'Premature right-side return of {op} in {expr!r}') + return opfunc(left_val or 0, right_val) + m = re.match( - r'(?P%s)(?:\.(?P[^(]+)|\[(?P[^]]+)\])\s*(?:\(+(?P[^()]*)\))?$' % _NAME_RE, + r'(?P%s)(?:\.(?P[^(]+)|\[(?P[^]]+)\])\s*' % _NAME_RE, expr) if m: variable = m.group('var') member = remove_quotes(m.group('member') or m.group('member2')) - arg_str = m.group('args') - - if variable in local_vars: - obj = local_vars[variable] - else: - if variable not in self._objects: - self._objects[variable] = self.extract_object(variable) - obj = self._objects[variable] - - if arg_str is None: - # Member access - if member == 'length': - return len(obj) - return obj[member] - - assert expr.endswith(')') - # Function call - if arg_str == '': - argvals = tuple() + arg_str = expr[m.end():] + if arg_str.startswith('('): + arg_str, remaining = self._seperate_at_paren(arg_str, ')') else: - argvals = tuple([ + arg_str, remaining = None, arg_str + + def assertion(cndn, msg): + """ assert, but without risk of getting optimized out """ + if not cndn: + raise ExtractorError(f'{member} {msg}: {expr}') + + def eval_method(): + nonlocal member + if variable == 'String': + obj = str + elif variable in local_vars: + obj = local_vars[variable] + else: + if variable not in self._objects: + self._objects[variable] = self.extract_object(variable) + obj = self._objects[variable] + + if arg_str is None: + # Member access + if member == 'length': + return len(obj) + return obj[member] + + # Function call + argvals = [ self.interpret_expression(v, local_vars, allow_recursion) - for v in arg_str.split(',')]) - - if member == 'split': - assert argvals == ('',) - return list(obj) - if member == 'join': - assert len(argvals) == 1 - return argvals[0].join(obj) - if member == 'reverse': - assert len(argvals) == 0 - obj.reverse() - return obj - if member == 'slice': - assert len(argvals) == 1 - return obj[argvals[0]:] - if member == 'splice': - assert isinstance(obj, list) - index, howMany = argvals - res = [] - for i in range(index, min(index + howMany, len(obj))): - res.append(obj.pop(index)) - return res - - return obj[member](argvals) - - for op, opfunc in _OPERATORS: - m = re.match(r'(?P.+?)%s(?P.+)' % re.escape(op), expr) - if not m: - continue - x, abort = self.interpret_statement( - m.group('x'), local_vars, allow_recursion - 1) - if abort: - raise ExtractorError( - 'Premature left-side return of %s in %r' % (op, expr)) - y, abort = self.interpret_statement( - m.group('y'), local_vars, allow_recursion - 1) - if abort: - raise ExtractorError( - 'Premature right-side return of %s in %r' % (op, expr)) - return opfunc(x, y) + for v in self._seperate(arg_str)] + + if obj == str: + if member == 'fromCharCode': + assertion(argvals, 'takes one or more arguments') + return ''.join(map(chr, argvals)) + raise ExtractorError(f'Unsupported string method {member}') + + if member == 'split': + assertion(argvals, 'takes one or more arguments') + assertion(argvals == [''], 'with arguments is not implemented') + return list(obj) + elif member == 'join': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(len(argvals) == 1, 'takes exactly one argument') + return argvals[0].join(obj) + elif member == 'reverse': + assertion(not argvals, 'does not take any arguments') + obj.reverse() + return obj + elif member == 'slice': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(len(argvals) == 1, 'takes exactly one argument') + return obj[argvals[0]:] + elif member == 'splice': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(argvals, 'takes one or more arguments') + index, howMany = (argvals + [len(obj)])[:2] + if index < 0: + index += len(obj) + add_items = argvals[2:] + res = [] + for i in range(index, min(index + howMany, len(obj))): + res.append(obj.pop(index)) + for i, item in enumerate(add_items): + obj.insert(index + i, item) + return res + elif member == 'unshift': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(argvals, 'takes one or more arguments') + for item in reversed(argvals): + obj.insert(0, item) + return obj + elif member == 'pop': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(not argvals, 'does not take any arguments') + if not obj: + return + return obj.pop() + elif member == 'push': + assertion(argvals, 'takes one or more arguments') + obj.extend(argvals) + return obj + elif member == 'forEach': + assertion(argvals, 'takes one or more arguments') + assertion(len(argvals) <= 2, 'takes at-most 2 arguments') + f, this = (argvals + [''])[:2] + return [f((item, idx, obj), this=this) for idx, item in enumerate(obj)] + elif member == 'indexOf': + assertion(argvals, 'takes one or more arguments') + assertion(len(argvals) <= 2, 'takes at-most 2 arguments') + idx, start = (argvals + [0])[:2] + try: + return obj.index(idx, start) + except ValueError: + return -1 + + if isinstance(obj, list): + member = int(member) + return obj[member](argvals) + + if remaining: + return self.interpret_expression( + self._named_object(local_vars, eval_method()) + remaining, + local_vars, allow_recursion) + else: + return eval_method() - m = re.match( - r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) + m = re.match(r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) if m: fname = m.group('func') argvals = tuple([ int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple() - if fname not in self._functions: + for v in self._seperate(m.group('args'))]) + if fname in local_vars: + return local_vars[fname](argvals) + elif fname not in self._functions: self._functions[fname] = self.extract_function(fname) return self._functions[fname](argvals) - raise ExtractorError('Unsupported JS expression %r' % expr) + if expr: + raise ExtractorError('Unsupported JS expression %r' % expr) def extract_object(self, objname): _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' @@ -233,30 +482,55 @@ class JSInterpreter(object): return obj - def extract_function(self, funcname): + def extract_function_code(self, funcname): + """ @returns argnames, code """ func_m = re.search( r'''(?x) (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* \((?P[^)]*)\)\s* - \{(?P[^}]+)\}''' % ( + (?P\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % ( re.escape(funcname), re.escape(funcname), re.escape(funcname)), self.code) + code, _ = self._seperate_at_paren(func_m.group('code'), '}') # refine the match if func_m is None: raise ExtractorError('Could not find JS function %r' % funcname) - argnames = func_m.group('args').split(',') + return func_m.group('args').split(','), code - return self.build_function(argnames, func_m.group('code')) + def extract_function(self, funcname): + return self.extract_function_from_code(*self.extract_function_code(funcname)) + + def extract_function_from_code(self, argnames, code, *global_stack): + local_vars = {} + while True: + mobj = re.search(r'function\((?P[^)]*)\)\s*{', code) + if mobj is None: + break + start, body_start = mobj.span() + body, remaining = self._seperate_at_paren(code[body_start - 1:], '}') + name = self._named_object( + local_vars, + self.extract_function_from_code( + [str.strip(x) for x in mobj.group('args').split(',')], + body, local_vars, *global_stack)) + code = code[:start] + name + remaining + return self.build_function(argnames, code, local_vars, *global_stack) def call_function(self, funcname, *args): - f = self.extract_function(funcname) - return f(args) - - def build_function(self, argnames, code): - def resf(args): - local_vars = dict(zip(argnames, args)) - for stmt in code.split(';'): - res, abort = self.interpret_statement(stmt, local_vars) - if abort: + return self.extract_function(funcname)(args) + + def build_function(self, argnames, code, *global_stack): + global_stack = list(global_stack) or [{}] + local_vars = global_stack.pop(0) + + def resf(args, **kwargs): + local_vars.update({ + **dict(zip(argnames, args)), + **kwargs + }) + var_stack = LocalNameSpace(local_vars, *global_stack) + for stmt in self._seperate(code.replace('\n', ''), ';'): + ret, should_abort = self.interpret_statement(stmt, var_stack) + if should_abort: break - return res + return ret return resf From 92592bd30588ae3797d7085a58c6189b774e3ae5 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 31 Oct 2021 05:49:03 +0100 Subject: [PATCH 0154/2552] [ceskatelevize] Fix extractor (#1489) Authored by: flashdagger --- yt_dlp/extractor/ceskatelevize.py | 122 ++++++++++++------------------ yt_dlp/extractor/extractors.py | 5 +- 2 files changed, 51 insertions(+), 76 deletions(-) diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 5e04d38a2..f766dfbb7 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -20,22 +20,8 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P[^/#?]+)' _TESTS = [{ - 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', - 'info_dict': { - 'id': '61924494877246241', - 'ext': 'mp4', - 'title': 'Hyde Park Civilizace: Život v Grónsku', - 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 3350, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'info_dict': { 'id': '61924494877028507', @@ -66,12 +52,58 @@ class CeskaTelevizeIE(InfoExtractor): }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 'only_matching': True, + }, { + # video with 18+ caution trailer + 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', + 'info_dict': { + 'id': '215562210900007-bogotart', + 'title': 'Queer: Bogotart', + 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko', + }, + 'playlist': [{ + 'info_dict': { + 'id': '61924494877311053', + 'ext': 'mp4', + 'title': 'Queer: Bogotart (Varování 18+)', + 'duration': 11.9, + }, + }, { + 'info_dict': { + 'id': '61924494877068022', + 'ext': 'mp4', + 'title': 'Queer: Bogotart (Queer)', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 1558.3, + }, + }], + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # iframe embed + 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', + 'only_matching': True, }] def _real_extract(self, url): playlist_id = self._match_id(url) - + parsed_url = compat_urllib_parse_urlparse(url) webpage = self._download_webpage(url, playlist_id) + site_name = self._og_search_property('site_name', webpage, fatal=False, default=None) + playlist_title = self._og_search_title(webpage, default=None) + if site_name and playlist_title: + playlist_title = playlist_title.replace(f' — {site_name}', '', 1) + playlist_description = self._og_search_description(webpage, default=None) + if playlist_description: + playlist_description = playlist_description.replace('\xa0', ' ') + + if parsed_url.path.startswith('/porady/'): + refer_url = update_url_query(unescapeHTML(self._search_regex( + (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), + webpage, 'iframe player url', group='url')), query={'autoStart': 'true'}) + webpage = self._download_webpage(refer_url, playlist_id) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s

' % NOT_AVAILABLE_STRING in webpage: @@ -100,7 +132,7 @@ class CeskaTelevizeIE(InfoExtractor): data = { 'playlist[0][type]': type_, 'playlist[0][id]': episode_id, - 'requestUrl': compat_urllib_parse_urlparse(url).path, + 'requestUrl': parsed_url.path, 'requestSource': 'iVysilani', } @@ -108,7 +140,7 @@ class CeskaTelevizeIE(InfoExtractor): for user_agent in (None, USER_AGENTS['Safari']): req = sanitized_Request( - 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', data=urlencode_postdata(data)) req.add_header('Content-type', 'application/x-www-form-urlencoded') @@ -130,9 +162,6 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) - playlist_title = self._og_search_title(webpage, default=None) - playlist_description = self._og_search_description(webpage, default=None) - playlist = self._download_json(req, playlist_id, fatal=False) if not playlist: continue @@ -237,54 +266,3 @@ class CeskaTelevizeIE(InfoExtractor): yield line return '\r\n'.join(_fix_subtitle(subtitles)) - - -class CeskaTelevizePoradyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P[^/#?]+)' - _TESTS = [{ - # video with 18+ caution trailer - 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', - 'info_dict': { - 'id': '215562210900007-bogotart', - 'title': 'Queer: Bogotart', - 'description': 'Alternativní průvodce současným queer světem', - }, - 'playlist': [{ - 'info_dict': { - 'id': '61924494876844842', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Varování 18+)', - 'duration': 10.2, - }, - }, { - 'info_dict': { - 'id': '61924494877068022', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Queer)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 1558.3, - }, - }], - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # iframe embed - 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - data_url = update_url_query(unescapeHTML(self._search_regex( - (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', - r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), - webpage, 'iframe player url', group='url')), query={ - 'autoStart': 'true', - }) - - return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 9d963ee46..78952d268 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -235,10 +235,7 @@ from .ccc import ( from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE -from .ceskatelevize import ( - CeskaTelevizeIE, - CeskaTelevizePoradyIE, -) +from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE From 8dcf65c92ec899a34cf57a02809520698f1d7b66 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 05:08:04 +0000 Subject: [PATCH 0155/2552] [Instagram] Add login to playlist (#1488) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 108 ++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 8c935c251..6ed20d9c6 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import itertools @@ -25,9 +26,55 @@ from ..utils import ( ) -class InstagramIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' +class InstagramBaseIE(InfoExtractor): _NETRC_MACHINE = 'instagram' + _IS_LOGGED_IN = False + + def _login(self): + username, password = self._get_login_info() + if username is None or self._IS_LOGGED_IN: + return + + login_webpage = self._download_webpage( + 'https://www.instagram.com/accounts/login/', None, + note='Downloading login webpage', errnote='Failed to download login webpage') + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + login_webpage, 'shared data', default='{}'), + None) + + login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ + 'Accept': '*/*', + 'X-IG-App-ID': '936619743392459', + 'X-ASBD-ID': '198387', + 'X-IG-WWW-Claim': '0', + 'X-Requested-With': 'XMLHttpRequest', + 'X-CSRFToken': shared_data['config']['csrf_token'], + 'X-Instagram-AJAX': shared_data['rollout_hash'], + 'Referer': 'https://www.instagram.com/', + }, data=urlencode_postdata({ + 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', + 'username': username, + 'queryParams': '{}', + 'optIntoOneTap': 'false', + 'stopDeletionNonce': '', + 'trustedDeviceRecords': '{}', + })) + + if not login.get('authenticated'): + if login.get('message'): + raise ExtractorError(f'Unable to login: {login["message"]}') + raise ExtractorError('Unable to login') + InstagramBaseIE._IS_LOGGED_IN = True + + def _real_initialize(self): + self._login() + + +class InstagramIE(InstagramBaseIE): + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -143,47 +190,6 @@ class InstagramIE(InfoExtractor): if mobj: return mobj.group('link') - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_webpage = self._download_webpage( - 'https://www.instagram.com/accounts/login/', None, - note='Downloading login webpage', errnote='Failed to download login webpage') - - shared_data = self._parse_json( - self._search_regex( - r'window\._sharedData\s*=\s*({.+?});', - login_webpage, 'shared data', default='{}'), - None) - - login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ - 'Accept': '*/*', - 'X-IG-App-ID': '936619743392459', - 'X-ASBD-ID': '198387', - 'X-IG-WWW-Claim': '0', - 'X-Requested-With': 'XMLHttpRequest', - 'X-CSRFToken': shared_data['config']['csrf_token'], - 'X-Instagram-AJAX': shared_data['rollout_hash'], - 'Referer': 'https://www.instagram.com/', - }, data=urlencode_postdata({ - 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', - 'username': username, - 'queryParams': '{}', - 'optIntoOneTap': 'false', - 'stopDeletionNonce': '', - 'trustedDeviceRecords': '{}', - })) - - if not login.get('authenticated'): - if login.get('message'): - raise ExtractorError(f'Unable to login: {login["message"]}') - raise ExtractorError('Unable to login') - - def _real_initialize(self): - self._login() - def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') @@ -333,9 +339,7 @@ class InstagramIE(InfoExtractor): } -class InstagramPlaylistIE(InfoExtractor): - # A superclass for handling any kind of query based on GraphQL which - # results in a playlist. +class InstagramPlaylistBaseIE(InstagramBaseIE): _gis_tmpl = None # used to cache GIS request type @@ -462,11 +466,11 @@ class InstagramPlaylistIE(InfoExtractor): self._extract_graphql(data, url), user_or_tag, user_or_tag) -class InstagramUserIE(InstagramPlaylistIE): +class InstagramUserIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' - _TEST = { + _TESTS = [{ 'url': 'https://instagram.com/porsche', 'info_dict': { 'id': 'porsche', @@ -478,7 +482,7 @@ class InstagramUserIE(InstagramPlaylistIE): 'skip_download': True, 'playlistend': 5, } - } + }] _QUERY_HASH = '42323d64886122307be10013ad2dcc44', @@ -496,11 +500,11 @@ class InstagramUserIE(InstagramPlaylistIE): } -class InstagramTagIE(InstagramPlaylistIE): +class InstagramTagIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P[^/]+)' IE_DESC = 'Instagram hashtag search' IE_NAME = 'instagram:tag' - _TEST = { + _TESTS = [{ 'url': 'https://instagram.com/explore/tags/lolcats', 'info_dict': { 'id': 'lolcats', @@ -512,7 +516,7 @@ class InstagramTagIE(InstagramPlaylistIE): 'skip_download': True, 'playlistend': 50, } - } + }] _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314', From 2f9e021299a451b576ce67c43135393157531991 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sun, 31 Oct 2021 10:39:26 +0530 Subject: [PATCH 0156/2552] [PlanetMarathi] Add extractor (#1484) Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/planetmarathi.py | 76 +++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 yt_dlp/extractor/planetmarathi.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 78952d268..5fc18f7a0 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1072,6 +1072,7 @@ from .pinterest import ( PinterestCollectionIE, ) from .pladform import PladformIE +from .planetmarathi import PlanetMarathiIE from .platzi import ( PlatziIE, PlatziCourseIE, diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py new file mode 100644 index 000000000..d1d9911f7 --- /dev/null +++ b/yt_dlp/extractor/planetmarathi.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_strdate, +) + + +class PlanetMarathiIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?planetmarathi\.com/titles/(?P[^/#&?$]+)' + _TESTS = [{ + 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', + 'playlist_mincount': 2, + 'info_dict': { + 'id': 'ek-unad-divas', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas', + 'ext': 'mp4', + 'title': 'ek unad divas', + 'alt_title': 'चित्रपट', + 'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881', + 'season_number': None, + 'episode_number': 1, + 'duration': 5539, + 'upload_date': '20210829', + }, + }] # Trailer skipped + }, { + 'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1', + 'playlist_mincount': 10, + 'info_dict': { + 'id': 'baap-beep-baap-season-1', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1', + 'ext': 'mp4', + 'title': 'Manohar Kanhere', + 'alt_title': 'मनोहर कान्हेरे', + 'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6', + 'season_number': 1, + 'episode_number': 1, + 'duration': 29, + 'upload_date': '20210829', + }, + }] # Trailers, Episodes, other Character profiles skipped + }] + + def _real_extract(self, url): + id = self._match_id(url) + entries = [] + json_data = self._download_json(f'https://www.planetmarathi.com/api/v1/titles/{id}/assets', id)['assets'] + for asset in json_data: + asset_title = asset['mediaAssetName']['en'] + if asset_title == 'Movie': + asset_title = id.replace('-', ' ') + asset_id = f'{asset["sk"]}_{id}'.replace('#', '-') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id) + self._sort_formats(formats) + entries.append({ + 'id': asset_id, + 'title': asset_title, + 'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']), + 'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']), + 'season_number': asset.get('mediaAssetSeason'), + 'episode_number': asset.get('mediaAssetIndexForAssetType'), + 'duration': asset.get('mediaAssetDurationInSeconds'), + 'upload_date': unified_strdate(asset.get('created')), + 'formats': formats, + 'subtitles': subtitles, + }) + return self.playlist_result(entries, playlist_id=id) From b2f25dc242616bd9eae6d5dbbe7ff56280e7d396 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 05:10:42 +0000 Subject: [PATCH 0157/2552] [Olympics] Fix extractor (#1483) Authored by: u-spec-png --- yt_dlp/extractor/olympics.py | 73 ++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 0bc9206ed..bca1f1928 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -2,22 +2,27 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + int_or_none, + try_get +) class OlympicsReplayIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?olympics\.com/tokyo-2020/(?:[a-z]{2}/)?replay/(?P[^/#&?]+)' + _VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P[^/#&?]+)' _TESTS = [{ - 'url': 'https://olympics.com/tokyo-2020/en/replay/300622eb-abc0-43ea-b03b-c5f2d429ec7b/jumping-team-qualifier', + 'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays', 'info_dict': { - 'id': '300622eb-abc0-43ea-b03b-c5f2d429ec7b', + 'id': 'f6a0753c-8e6f-4b7d-a435-027054a4f8e9', 'ext': 'mp4', - 'title': 'Jumping Team Qualifier', - 'release_date': '20210806', - 'upload_date': '20210713', + 'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020', + 'upload_date': '20210801', + 'timestamp': 1627783200, + 'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3', }, 'params': { - 'format': 'bv', + 'format': 'bestvideo', + 'skip_download': True, }, }, { 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp', @@ -26,31 +31,41 @@ class OlympicsReplayIE(InfoExtractor): def _real_extract(self, url): id = self._match_id(url) - # The parameters are hardcoded in the webpage, it's not necessary to download the webpage just for these parameters. - # If in downloading webpage serves other functions aswell, then extract these parameters from it. - token_url = 'https://appovptok.ovpobs.tv/api/identity/app/token?api_key=OTk5NDcxOjpvY3N3LWFwaXVzZXI%3D&api_secret=ODY4ODM2MjE3ODMwYmVjNTAxMWZlMDJiMTYxZmY0MjFiMjMwMjllMjJmNDA1YWRiYzA5ODcxYTZjZTljZDkxOTo6NTM2NWIzNjRlMTM1ZmI2YWNjNmYzMGMzOGM3NzZhZTY%3D' - token = self._download_webpage(token_url, id) - headers = {'x-obs-app-token': token} - data_json = self._download_json(f'https://appocswtok.ovpobs.tv/api/schedule-sessions/{id}?include=stream', - id, headers=headers) - meta_data = data_json['data']['attributes'] - for t_dict in data_json['included']: - if t_dict.get('type') == 'Stream': - stream_data = t_dict['attributes'] + + webpage = self._download_webpage(url, id) + title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage) + uuid = self._html_search_meta('episode_uid', webpage) + m3u8_url = self._html_search_meta('video_url', webpage) + json_ld = self._search_json_ld(webpage, uuid) + thumbnails_list = json_ld.get('image') + if not thumbnails_list: + thumbnails_list = self._html_search_regex( + r'["\']image["\']:\s*["\']([^"\']+)["\']', webpage, 'images', default='') + thumbnails_list = thumbnails_list.replace('[', '').replace(']', '').split(',') + thumbnails_list = [thumbnail.strip() for thumbnail in thumbnails_list] + thumbnails = [] + for thumbnail in thumbnails_list: + width_a, height_a, width = self._search_regex( + r'/images/image/private/t_(?P\d+)-(?P\d+)_(?P\d+)/primary/[\W\w\d]+', + thumbnail, 'thumb', group=(1, 2, 3), default=(None, None, None)) + width_a, height_a, width = int_or_none(width_a), int_or_none(height_a), int_or_none(width) + thumbnails.append({ + 'url': thumbnail, + 'width': width, + 'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)) + }) m3u8_url = self._download_json( - 'https://meteringtok.ovpobs.tv/api/playback-sessions', id, headers=headers, query={ - 'alias': stream_data['alias'], - 'stream': stream_data['stream'], - 'type': 'vod' - })['data']['attributes']['url'] - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, m3u8_id='hls') self._sort_formats(formats) return { - 'id': id, - 'title': meta_data['title'], - 'release_date': unified_strdate(meta_data.get('start') or meta_data.get('broadcastPublished')), - 'upload_date': unified_strdate(meta_data.get('publishedAt')), + 'id': uuid, + 'title': title, + 'timestamp': json_ld.get('timestamp'), + 'description': json_ld.get('description'), + 'thumbnails': thumbnails, + 'duration': json_ld.get('duration'), 'formats': formats, 'subtitles': subtitles, } From 5b6cb5620797e745a113cfb8118ea7def1484784 Mon Sep 17 00:00:00 2001 From: kaz-us <32769754+kaz-us@users.noreply.github.com> Date: Sun, 31 Oct 2021 09:13:49 +0400 Subject: [PATCH 0158/2552] [vk] Add subtitles (#1480) Authored by: kaz-us --- yt_dlp/extractor/vk.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index d8a9b9ab4..a8a980de6 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -471,6 +471,13 @@ class VKIE(VKBaseIE): }) self._sort_formats(formats) + subtitles = {} + for sub in data.get('subs') or {}: + subtitles.setdefault(sub.get('lang', 'en'), []).append({ + 'ext': sub.get('title', '.srt').split('.')[-1], + 'url': url_or_none(sub.get('url')), + }) + return { 'id': video_id, 'formats': formats, @@ -484,6 +491,7 @@ class VKIE(VKBaseIE): 'like_count': int_or_none(mv_data.get('likes')), 'comment_count': int_or_none(mv_data.get('commcount')), 'is_live': is_live, + 'subtitles': subtitles, } From da4832007574a60b397dff11f26cc20cace685de Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 13:08:03 +0530 Subject: [PATCH 0159/2552] [linkedin] Don't login multiple times --- yt_dlp/extractor/linkedin.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 3ce906e2f..c2d347efd 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -19,6 +19,7 @@ from ..utils import ( class LinkedInLearningBaseIE(InfoExtractor): _NETRC_MACHINE = 'linkedin' _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' + _logged_in = False def _call_api(self, course_slug, fields, video_slug=None, resolution=None): query = { @@ -34,6 +35,8 @@ class LinkedInLearningBaseIE(InfoExtractor): }) sub = ' %dp' % resolution api_url = 'https://www.linkedin.com/learning-api/detailedCourses' + if not self._get_cookies(api_url).get('JSESSIONID'): + self.raise_login_required() return self._download_json( api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={ 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, @@ -50,6 +53,8 @@ class LinkedInLearningBaseIE(InfoExtractor): return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) def _real_initialize(self): + if self._logged_in: + return email, password = self._get_login_info() if email is None: return @@ -72,6 +77,7 @@ class LinkedInLearningBaseIE(InfoExtractor): login_submit_page, 'error', default=None) if error: raise ExtractorError(error, expected=True) + LinkedInLearningBaseIE._logged_in = True class LinkedInLearningIE(LinkedInLearningBaseIE): From a0bb6ce58db5b3124962037ca12e78cbd348f56c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 13:26:44 +0530 Subject: [PATCH 0160/2552] [youtube] refactor itag processing --- yt_dlp/extractor/youtube.py | 56 +++++++++++++++---------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 56cd2ed8d..64475edec 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2434,7 +2434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return prs, player_url def _extract_formats(self, streaming_data, video_id, player_url, is_live): - itags, stream_ids = [], [] + itags, stream_ids = {}, [] itag_qualities, res_qualities = {}, {} q = qualities([ # Normally tiny is the smallest video-only formats. But @@ -2498,7 +2498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): throttled = True if itag: - itags.append(itag) + itags[itag] = 'https' stream_ids.append(stream_id) tbr = float_or_none( @@ -2548,46 +2548,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor): and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)) get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True) - def guess_quality(f): - for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)): - if val in qdict: - return q(qdict[val]) - return -1 + def process_manifest_format(f, proto, itag): + if itag in itags: + if itags[itag] == proto or f'{itag}-{proto}' in itags: + return False + itag = f'{itag}-{proto}' + if itag: + f['format_id'] = itag + itags[itag] = proto + + f['quality'] = next(( + q(qdict[val]) + for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)) + if val in qdict), -1) + return True for sd in streaming_data: hls_manifest_url = get_hls and sd.get('hlsManifestUrl') if hls_manifest_url: for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False): - itag = self._search_regex( - r'/itag/(\d+)', f['url'], 'itag', default=None) - if itag in itags: - itag += '-hls' - if itag in itags: - continue - if itag: - f['format_id'] = itag - itags.append(itag) - f['quality'] = guess_quality(f) - yield f + if process_manifest_format(f, 'hls', self._search_regex( + r'/itag/(\d+)', f['url'], 'itag', default=None)): + yield f dash_manifest_url = get_dash and sd.get('dashManifestUrl') if dash_manifest_url: for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False): - itag = f['format_id'] - if itag in itags: - itag += '-dash' - if itag in itags: - continue - if itag: - f['format_id'] = itag - itags.append(itag) - f['quality'] = guess_quality(f) - filesize = int_or_none(self._search_regex( - r'/clen/(\d+)', f.get('fragment_base_url') - or f['url'], 'file size', default=None)) - if filesize: - f['filesize'] = filesize - yield f + if process_manifest_format(f, 'dash', f['format_id']): + f['filesize'] = int_or_none(self._search_regex( + r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) + yield f def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) From 0930b11fdaff2141ad951a8ed6d90417bfde7059 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 14:45:59 +0530 Subject: [PATCH 0161/2552] [docs,cleanup] Improve docs and minor cleanup Closes #1387, #1404, #1408, #1485, #1415, #1450, #1492 --- .github/workflows/build.yml | 6 +- CONTRIBUTING.md | 2 +- README.md | 116 +++++++++++++++++++++------------- yt_dlp/YoutubeDL.py | 4 +- yt_dlp/__init__.py | 1 + yt_dlp/cookies.py | 4 +- yt_dlp/extractor/common.py | 4 +- yt_dlp/extractor/telemundo.py | 2 +- yt_dlp/extractor/tiktok.py | 8 +-- yt_dlp/options.py | 12 ++-- 10 files changed, 93 insertions(+), 66 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3329c141f..0fff6cae3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -115,12 +115,12 @@ jobs: release_name: yt-dlp ${{ steps.bump_version.outputs.ytdlp_version }} commitish: ${{ steps.push_update.outputs.head_sha }} body: | - ### Changelog: - ${{ env.changelog }} + #### [A description of the various files]((https://github.com/yt-dlp/yt-dlp#release-files)) are in the README --- - ### See [this](https://github.com/yt-dlp/yt-dlp#release-files) for a description of the release files + ### Changelog: + ${{ env.changelog }} draft: false prerelease: false - name: Upload yt-dlp Unix binary diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fb539ec0d..249000490 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,7 +148,7 @@ If you want to create a build of yt-dlp yourself, you can follow the instruction Before you start writing code for implementing a new feature, open an issue explaining your feature request and atleast one use case. This allows the maintainers to decide whether such a feature is desired for the project in the first place, and will provide an avenue to discuss some implementation details. If you open a pull request for a new feature without discussing with us first, do not be surprised when we ask for large changes to the code, or even reject it outright. -The same applies for overarching changes to the architecture, documentation or code style +The same applies for changes to the documentation, code style, or overarching changes to the architecture ## Adding support for a new site diff --git a/README.md b/README.md index e2fbbf2ae..31bfca6a8 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,6 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) * [Developer Instructions](CONTRIBUTING.md#developer-instructions) * [MORE](#more) - # NEW FEATURES @@ -123,7 +122,7 @@ If you are coming from [youtube-dl](https://github.com/ytdl-org/youtube-dl), the ### Differences in default behavior -Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc. +Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as as an alternative to `ffmpeg` @@ -143,7 +142,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead * Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this -* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the seperate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this. +* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the seperate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options @@ -152,17 +151,14 @@ For ease of use, a few more compat options are available: # INSTALLATION -yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS You can install yt-dlp using one of the following methods: -* Download [the binary](#release-files) from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) -* With Homebrew, `brew install yt-dlp/taps/yt-dlp` -* Use [PyPI package](https://pypi.org/project/yt-dlp): `python3 -m pip install --upgrade yt-dlp` -* Install master branch: `python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip` -Note that on some systems, you may need to use `py` or `python` instead of `python3` +#### Using the release binary + +You can simply download the [correct binary file](#release-files) for your OS: **[[Windows](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)] [[UNIX-like](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)]** -UNIX users (Linux, macOS, BSD) can also install the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) one of the following ways: +In UNIX-like OSes (MacOS, Linux, BSD), you can also install the same in one of the following ways: ``` sudo curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp @@ -179,16 +175,41 @@ sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o sudo chmod a+rx /usr/local/bin/yt-dlp ``` -macOS or Linux users that are using Homebrew (formerly known as Linuxbrew for Linux users) can also install it by: +PS: The manpages, shell completion files etc. are available in [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) + +#### With [PIP](https://pypi.org/project/pip) + +You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: +``` +python3 -m pip install -U yt-dlp +``` + +On some systems (like Termux), it is not possible to install pycryptodomex. In that case, install without dependancies: +``` +python3 -m pip install --no-deps -U yt-dlp +``` + +You can also install the master branch with: +``` +python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip +``` + +Note that on some systems, you may need to use `py` or `python` instead of `python3` + +#### With [Homebrew](https://brew.sh) + +macOS or Linux users that are using Homebrew can also install it by: ``` brew install yt-dlp/taps/yt-dlp ``` ### UPDATE -You can use `yt-dlp -U` to update if you are using the provided release. -If you are using `pip`, simply re-run the same command that was used to install the program. -If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` +You can use `yt-dlp -U` to update if you are [using the provided release](#using-the-release-binary) + +If you [installed with pip](#with-pip), simply re-run the same command that was used to install the program + +If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp` ### RELEASE FILES @@ -196,18 +217,18 @@ If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` File|Description :---|:--- -[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for **UNIX-like systems**) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (Recommended for **Windows**) +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independant binary. Needs Python (recommended for **UNIX-like systems**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) #### Alternatives File|Description :---|:--- [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32bit) binary +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32-bit) binary [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 -[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable (No auto-update) -[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (No auto-update) +[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) #### Misc @@ -227,20 +248,20 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) -* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) -* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) -* [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) -* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) -* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](http://rtmpdump.mplayerhq.hu) -* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) -* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licenced under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) -* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licenced under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) +* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) +* [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licensed under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) +* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) +* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) +* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) +* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) +* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * Any external downloader that you want to use with `--downloader` To use or redistribute the dependencies, you must agree to their respective licensing terms. -The windows releases are already built with the python interpreter, mutagen, pycryptodomex and websockets included. +The Windows and MacOS standalone release binaries are already built with the python interpreter, mutagen, pycryptodomex and websockets included. **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependancy, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds/wiki/Latest#latest-autobuilds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specifc issues solved by these builds @@ -276,7 +297,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t sure that you have sufficient permissions (run with sudo if needed) -i, --ignore-errors Ignore download and postprocessing errors. - The download will be considered successfull + The download will be considered successful even if the postprocessing fails --no-abort-on-error Continue with next video on download errors; e.g. to skip unavailable videos in @@ -366,7 +387,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t SIZE (e.g. 50k or 44.6m) --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) - --date DATE Download only videos uploaded in this date. + --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format "(now|today)[+-][0-9](day|week|month|year)(s)?" --datebefore DATE Download only videos uploaded on or before @@ -510,9 +531,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t filenames --no-restrict-filenames Allow Unicode characters, "&" and spaces in filenames (default) - --windows-filenames Force filenames to be windows compatible - --no-windows-filenames Make filenames windows compatible only if - using windows (default) + --windows-filenames Force filenames to be Windows-compatible + --no-windows-filenames Make filenames Windows-compatible only if + using Windows (default) --trim-filenames LENGTH Limit the filename length (excluding extension) to the specified number of characters @@ -608,9 +629,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t anything to disk --no-simulate Download the video even if printing/listing options are used - --ignore-no-formats-error Ignore "No video formats" error. Usefull - for extracting metadata even if the videos - are not actually available for download + --ignore-no-formats-error Ignore "No video formats" error. Useful for + extracting metadata even if the videos are + not actually available for download (experimental) --no-ignore-no-formats-error Throw error when no downloadable video formats are found (default) @@ -644,7 +665,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t "postprocess:", or "postprocess-title:". The video's fields are accessible under the "info" key and the progress attributes are - accessible under "progress" key. Eg: + accessible under "progress" key. E.g.: --console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s" -v, --verbose Print various debugging information @@ -657,7 +678,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t ## Workarounds: --encoding ENCODING Force the specified encoding (experimental) - --no-check-certificate Suppress HTTPS certificate validation + --no-check-certificates Suppress HTTPS certificate validation --prefer-insecure Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube) @@ -706,10 +727,12 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t containers irrespective of quality --no-prefer-free-formats Don't give any special preference to free containers (default) - --check-formats Check that the formats selected are + --check-formats Check that the selected formats are actually downloadable - --no-check-formats Do not check that the formats selected are + --check-all-formats Check all formats for whether they are actually downloadable + --no-check-formats Do not check that the formats are actually + downloadable -F, --list-formats List available formats of each video. Simulate unless --no-simulate is used --merge-output-format FORMAT If a merge is required (e.g. @@ -1018,7 +1041,7 @@ The `-o` option is used to indicate a template for the output file names while ` The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). -It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. +It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields @@ -1159,7 +1182,7 @@ Each aforementioned sequence when referenced in an output template will be repla Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). -**Tip**: Look at the `-j` output to identify which fields are available for the purticular URL +**Tip**: Look at the `-j` output to identify which fields are available for the particular URL For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. @@ -1303,7 +1326,7 @@ The available fields are: - `vext`: Video Extension (`mp4` > `webm` > `flv` > other > unknown). If `--prefer-free-formats` is used, `webm` is prefered. - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other > unknown). If `--prefer-free-formats` is used, the order changes to `opus` > `ogg` > `webm` > `m4a` > `mp3` > `aac`. - `ext`: Equivalent to `vext,aext` - - `filesize`: Exact filesize, if know in advance. This will be unavailable for mu38 and DASH formats. + - `filesize`: Exact filesize, if known in advance - `fs_approx`: Approximate filesize calculated from the manifests - `size`: Exact filesize if available, otherwise approximate filesize - `height`: Height of video @@ -1506,6 +1529,9 @@ $ yt-dlp --parse-metadata '%(series)s S%(season_number)02dE%(episode_number)02d: # Set "comment" field in video metadata using description instead of webpage_url $ yt-dlp --parse-metadata 'description:(?s)(?P.+)' --add-metadata +# Remove "formats" field from the infojson by setting it to an empty string +$ yt-dlp --parse-metadata ':(?P)' -j + # Replace all spaces and "_" in title and uploader with a `-` $ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-' @@ -1513,7 +1539,7 @@ $ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-' # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` The following extractors use this feature: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c2b17b20..4a9f4775b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -214,8 +214,8 @@ class YoutubeDL(object): ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually available for download (experimental) - format_sort: How to sort the video formats. see "Sorting Formats" - for more details. + format_sort: A list of fields by which to sort the video formats. + See "Sorting Formats" for more details. format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. allow_multiple_video_streams: Allow multiple video streams to be merged diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 5c3d33df0..84628bf45 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -225,6 +225,7 @@ def _real_main(argv=None): if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: raise ValueError('Playlist end must be greater than playlist start') if opts.extractaudio: + opts.audioformat = opts.audioformat.lower() if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): parser.error('invalid audio format specified') if opts.audioquality: diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index c9ae9b6db..ec68a809d 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -117,7 +117,7 @@ def _extract_firefox_cookies(profile, logger): raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) - with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir: + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) @@ -236,7 +236,7 @@ def _extract_chrome_cookies(browser_name, profile, logger): decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger) - with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir: + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index aa98c0cc9..2bbe23699 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -441,11 +441,11 @@ class InfoExtractor(object): _WORKING = True _LOGIN_HINTS = { - 'any': 'Use --cookies, --username and --password or --netrc to provide account credentials', + 'any': 'Use --cookies, --username and --password, or --netrc to provide account credentials', 'cookies': ( 'Use --cookies-from-browser or --cookies for the authentication. ' 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), - 'password': 'Use --username and --password or --netrc to provide account credentials', + 'password': 'Use --username and --password, or --netrc to provide account credentials', } def __init__(self, downloader=None): diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py index 18552a0ef..e326bbdd5 100644 --- a/yt_dlp/extractor/telemundo.py +++ b/yt_dlp/extractor/telemundo.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 1db6327e2..859951637 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -38,8 +38,8 @@ class TikTokBaseIE(InfoExtractor): 'build_number': self._APP_VERSION, 'manifest_version_code': self._MANIFEST_APP_VERSION, 'update_version_code': self._MANIFEST_APP_VERSION, - 'openudid': ''.join(random.choice('0123456789abcdef') for i in range(16)), - 'uuid': ''.join([random.choice(string.digits) for num in range(16)]), + 'openudid': ''.join(random.choice('0123456789abcdef') for _ in range(16)), + 'uuid': ''.join([random.choice(string.digits) for _ in range(16)]), '_rticket': int(time.time() * 1000), 'ts': int(time.time()), 'device_brand': 'Google', @@ -66,7 +66,7 @@ class TikTokBaseIE(InfoExtractor): 'as': 'a1qwert123', 'cp': 'cbfhckdckkde1', } - self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) + self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) return self._download_json( 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id, fatal=fatal, note=note, errnote=errnote, headers={ @@ -416,7 +416,7 @@ class TikTokUserIE(TikTokBaseIE): 'max_cursor': 0, 'min_cursor': 0, 'retry_type': 'no_retry', - 'device_id': ''.join(random.choice(string.digits) for i in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + 'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. } max_retries = self.get_param('extractor_retries', 3) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 5499ab13e..a3a6c74b3 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -209,7 +209,7 @@ def parseOpts(overrideArguments=None): general.add_option( '-i', '--ignore-errors', action='store_true', dest='ignoreerrors', - help='Ignore download and postprocessing errors. The download will be considered successfull even if the postprocessing fails') + help='Ignore download and postprocessing errors. The download will be considered successful even if the postprocessing fails') general.add_option( '--no-abort-on-error', action='store_const', dest='ignoreerrors', const='only_download', @@ -383,7 +383,7 @@ def parseOpts(overrideArguments=None): '--date', metavar='DATE', dest='date', default=None, help=( - 'Download only videos uploaded in this date. ' + 'Download only videos uploaded on this date. ' 'The date can be "YYYYMMDD" or in the format ' '"(now|today)[+-][0-9](day|week|month|year)(s)?"')) selection.add_option( @@ -840,7 +840,7 @@ def parseOpts(overrideArguments=None): '--ignore-no-formats-error', action='store_true', dest='ignore_no_formats_error', default=False, help=( - 'Ignore "No video formats" error. Usefull for extracting metadata ' + 'Ignore "No video formats" error. Useful for extracting metadata ' 'even if the videos are not actually available for download (experimental)')) verbosity.add_option( '--no-ignore-no-formats-error', @@ -935,7 +935,7 @@ def parseOpts(overrideArguments=None): 'Template for progress outputs, optionally prefixed with one of "download:" (default), ' '"download-title:" (the console title), "postprocess:", or "postprocess-title:". ' 'The video\'s fields are accessible under the "info" key and ' - 'the progress attributes are accessible under "progress" key. Eg: ' + 'the progress attributes are accessible under "progress" key. E.g.: ' # TODO: Document the fields inside "progress" '--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"')) verbosity.add_option( @@ -1028,11 +1028,11 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--windows-filenames', action='store_true', dest='windowsfilenames', default=False, - help='Force filenames to be windows compatible') + help='Force filenames to be Windows-compatible') filesystem.add_option( '--no-windows-filenames', action='store_false', dest='windowsfilenames', - help='Make filenames windows compatible only if using windows (default)') + help='Make filenames Windows-compatible only if using Windows (default)') filesystem.add_option( '--trim-filenames', '--trim-file-names', metavar='LENGTH', dest='trim_file_name', default=0, type=int, From f0ffaa1621fc40ba033aa3c98a14aa4c93533915 Mon Sep 17 00:00:00 2001 From: kaz-us <32769754+kaz-us@users.noreply.github.com> Date: Sun, 31 Oct 2021 18:16:12 +0400 Subject: [PATCH 0162/2552] [vk] Fix login (#1495) Closes #1459 Authored by: kaz-us --- yt_dlp/extractor/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index a8a980de6..9a5c9ee6b 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -51,7 +51,7 @@ class VKBaseIE(InfoExtractor): self._apply_first_set_cookie_header(url_handle, 'remixlhk') login_page = self._download_webpage( - 'https://login.vk.com/?act=login', None, + 'https://vk.com/login', None, note='Logging in', data=urlencode_postdata(login_form)) From c588b602d34f005dc018ae004281226741414192 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 14:20:09 +0000 Subject: [PATCH 0163/2552] [Instagram] Fix incorrect resolution (#1494) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 6ed20d9c6..4eca9eb92 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -228,8 +228,8 @@ class InstagramIE(InstagramBaseIE): dict) if media: video_url = media.get('video_url') - height = try_get(media, lambda x: x['dimensions']['height']) - width = try_get(media, lambda x: x['dimensions']['width']) + height = int_or_none(self._html_search_meta(('og:video:height', 'video:height'), webpage)) or try_get(media, lambda x: x['dimensions']['height']) + width = int_or_none(self._html_search_meta(('og:video:width', 'video:width'), webpage)) or try_get(media, lambda x: x['dimensions']['width']) description = try_get( media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], compat_str) or media.get('caption') From a1fc7ca0743c8df06416e68ee74b64e07dfe7135 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:25:48 +0530 Subject: [PATCH 0164/2552] [jsinterp] Handle default in switch better --- test/test_jsinterp.py | 15 +++++++++++++++ test/test_youtube_signature.py | 6 +++++- yt_dlp/jsinterp.py | 22 +++++++++++++--------- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 380e52c33..e230b045f 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -132,6 +132,21 @@ class TestJSInterpreter(unittest.TestCase): self.assertEqual(jsi.call_function('x', 3), 6) self.assertEqual(jsi.call_function('x', 5), 0) + def test_switch_default(self): + jsi = JSInterpreter(''' + function x(f) { switch(f){ + case 2: f+=2; + default: f-=1; + case 5: + case 6: f+=6; + case 0: break; + case 1: f+=1; + } return f } + ''') + self.assertEqual(jsi.call_function('x', 1), 2) + self.assertEqual(jsi.call_function('x', 5), 11) + self.assertEqual(jsi.call_function('x', 9), 14) + def test_try(self): jsi = JSInterpreter(''' function x() { try{return 10} catch(e){return 5} } diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index f40a06952..60d8eabf5 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -69,7 +69,11 @@ _NSIG_TESTS = [ ( 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', - ), # TODO: Add more tests + ), + ( + 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', + 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', + ), ] diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 5c79a8110..bb2a0ae0b 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -228,21 +228,25 @@ class JSInterpreter(object): switch_val, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) body, expr = self._seperate_at_paren(remaining, '}') - body, default = body.split('default:') if 'default:' in body else (body, None) - items = body.split('case ')[1:] - if default: - items.append(f'default:{default}') - matched = False - for item in items: - case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] - matched = matched or case == 'default' or switch_val == self.interpret_expression(case, local_vars, allow_recursion) - if matched: + items = body.replace('default:', 'case default:').split('case ')[1:] + for default in (False, True): + matched = False + for item in items: + case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] + if default: + matched = matched or case == 'default' + elif not matched: + matched = case != 'default' and switch_val == self.interpret_expression(case, local_vars, allow_recursion) + if not matched: + continue try: ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1) if should_abort: return ret except JS_Break: break + if matched: + break return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] # Comma seperated statements From 9bd979ca40f4f7b1f3918386b8347e03820766b4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:26:34 +0530 Subject: [PATCH 0165/2552] [utils] Parse `vp09` as vp9 --- yt_dlp/utils.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 080bf260a..2953909fc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4656,19 +4656,18 @@ def parse_codecs(codecs_str): str.strip, codecs_str.strip().strip(',').split(',')))) vcodec, acodec, hdr = None, None, None for full_codec in split_codecs: - codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'): + parts = full_codec.split('.') + codec = parts[0].replace('0', '') + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', + 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'): if not vcodec: - vcodec = full_codec + vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec if codec in ('dvh1', 'dvhe'): hdr = 'DV' - elif codec == 'vp9' and vcodec.startswith('vp9.2'): + elif codec == 'av1' and len(parts) > 3 and parts[3] == '10': + hdr = 'HDR10' + elif full_codec.replace('0', '').startswith('vp9.2'): hdr = 'HDR10' - elif codec == 'av01': - parts = full_codec.split('.') - if len(parts) > 3 and parts[3] == '10': - hdr = 'HDR10' - vcodec = '.'.join(parts[:4]) elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): if not acodec: acodec = full_codec From d89257f398fed8a44fae7d12d849114f9f4ca2be Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:27:34 +0530 Subject: [PATCH 0166/2552] [youtube] Remove unnecessary no-playlist warning --- yt_dlp/extractor/youtube.py | 83 ++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 64475edec..428414383 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2623,49 +2623,48 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or search_meta(['og:title', 'twitter:title', 'title'])) video_description = get_first(video_details, 'shortDescription') - if not smuggled_data.get('force_singlefeed', False): - if not self.get_param('noplaylist'): - multifeed_metadata_list = get_first( - player_responses, - ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'), - expected_type=str) - if multifeed_metadata_list: - entries = [] - feed_ids = [] - for feed in multifeed_metadata_list.split(','): - # Unquote should take place before split on comma (,) since textual - # fields may contain comma as well (see - # https://github.com/ytdl-org/youtube-dl/issues/8536) - feed_data = compat_parse_qs( - compat_urllib_parse_unquote_plus(feed)) - - def feed_entry(name): - return try_get( - feed_data, lambda x: x[name][0], compat_str) - - feed_id = feed_entry('id') - if not feed_id: - continue - feed_title = feed_entry('title') - title = video_title - if feed_title: - title += ' (%s)' % feed_title - entries.append({ - '_type': 'url_transparent', - 'ie_key': 'Youtube', - 'url': smuggle_url( - '%swatch?v=%s' % (base_url, feed_data['id'][0]), - {'force_singlefeed': True}), - 'title': title, - }) - feed_ids.append(feed_id) - self.to_screen( - 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' - % (', '.join(feed_ids), video_id)) - return self.playlist_result( - entries, video_id, video_title, video_description) - else: + multifeed_metadata_list = get_first( + player_responses, + ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'), + expected_type=str) + if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'): + if self.get_param('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + else: + entries = [] + feed_ids = [] + for feed in multifeed_metadata_list.split(','): + # Unquote should take place before split on comma (,) since textual + # fields may contain comma as well (see + # https://github.com/ytdl-org/youtube-dl/issues/8536) + feed_data = compat_parse_qs( + compat_urllib_parse_unquote_plus(feed)) + + def feed_entry(name): + return try_get( + feed_data, lambda x: x[name][0], compat_str) + + feed_id = feed_entry('id') + if not feed_id: + continue + feed_title = feed_entry('title') + title = video_title + if feed_title: + title += ' (%s)' % feed_title + entries.append({ + '_type': 'url_transparent', + 'ie_key': 'Youtube', + 'url': smuggle_url( + '%swatch?v=%s' % (base_url, feed_data['id'][0]), + {'force_singlefeed': True}), + 'title': title, + }) + feed_ids.append(feed_id) + self.to_screen( + 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' + % (', '.join(feed_ids), video_id)) + return self.playlist_result( + entries, video_id, video_title, video_description) live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails')) is_live = get_first(video_details, 'isLive') From bd93fd5d45e104561bad919d4775feba869d0145 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:28:45 +0530 Subject: [PATCH 0167/2552] [fragment] Fix progress display in fragmented downloads Closes #1517 --- yt_dlp/downloader/common.py | 2 ++ yt_dlp/downloader/fragment.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 6cfbb6657..bcf8ac955 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -319,6 +319,8 @@ class FileDownloader(object): msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' else: msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' + if s.get('fragment_index'): + msg_template += ' (frag %(fragment_index)s)' s['_default_template'] = msg_template % s self._report_progress_status(s) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index a9d1471f8..d08fd52a1 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -31,6 +31,10 @@ class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass + def report_retry(self, err, count, retries): + super().to_screen( + f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') + class FragmentFD(FileDownloader): """ @@ -167,7 +171,7 @@ class FragmentFD(FileDownloader): self.ydl, { 'continuedl': True, - 'quiet': True, + 'quiet': self.params.get('quiet'), 'noprogress': True, 'ratelimit': self.params.get('ratelimit'), 'retries': self.params.get('retries', 0), @@ -237,6 +241,7 @@ class FragmentFD(FileDownloader): start = time.time() ctx.update({ 'started': start, + 'fragment_started': start, # Amount of fragment's bytes downloaded by the time of the previous # frag progress hook invocation 'prev_frag_downloaded_bytes': 0, @@ -267,6 +272,9 @@ class FragmentFD(FileDownloader): ctx['fragment_index'] = state['fragment_index'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_total_bytes) + ctx['fragment_started'] = time.time() ctx['prev_frag_downloaded_bytes'] = 0 else: frag_downloaded_bytes = s['downloaded_bytes'] @@ -275,8 +283,8 @@ class FragmentFD(FileDownloader): state['eta'] = self.calc_eta( start, time_now, estimated_size - resume_len, state['downloaded_bytes'] - resume_len) - state['speed'] = s.get('speed') or ctx.get('speed') - ctx['speed'] = state['speed'] + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_downloaded_bytes) ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state, info_dict) From 31c49255bf647373734c2c7f917e0d24ab81ac95 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 00:05:53 +0530 Subject: [PATCH 0168/2552] [ExtractAudio] Rescale --audio-quality correctly Authored by: CrypticSignal, pukkandan --- yt_dlp/__init__.py | 4 +++- yt_dlp/options.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 37 +++++++++++++++++++++------------- yt_dlp/utils.py | 2 +- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 84628bf45..0070d50a8 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -29,6 +29,8 @@ from .utils import ( error_to_compat_str, ExistingVideoReached, expand_path, + float_or_none, + int_or_none, match_filter_func, MaxDownloadsReached, parse_duration, @@ -230,7 +232,7 @@ def _real_main(argv=None): parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') - if not opts.audioquality.isdigit(): + if int_or_none(float_or_none(opts.audioquality)) is None: # int_or_none prevents inf, nan parser.error('invalid audio quality specified') if opts.recodevideo is not None: opts.recodevideo = opts.recodevideo.replace(' ', '') diff --git a/yt_dlp/options.py b/yt_dlp/options.py index a3a6c74b3..bd9fdd37b 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1215,7 +1215,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', - help='Specify ffmpeg audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') + help='Specify ffmpeg audio quality, insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)') postproc.add_option( '--remux-video', metavar='FORMAT', dest='remuxvideo', default=None, diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index b7fcc569b..96f7be6ff 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -371,9 +371,29 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) self._preferredcodec = preferredcodec or 'best' - self._preferredquality = preferredquality + self._preferredquality = float_or_none(preferredquality) self._nopostoverwrites = nopostoverwrites + def _quality_args(self, codec): + if self._preferredquality is None: + return [] + elif self._preferredquality > 10: + return ['-b:a', f'{self._preferredquality}k'] + + limits = { + 'libmp3lame': (10, 0), + 'aac': (0.1, 11), + 'vorbis': (0, 10), + 'opus': None, # doesn't support -q:a + 'wav': None, + 'flac': None, + }[codec] + if not limits: + return [] + + q = limits[1] + (limits[0] - limits[1]) * (self._preferredquality / 10) + return ['-q:a', f'{q}'] + def run_ffmpeg(self, path, out_path, codec, more_opts): if codec is None: acodec_opts = [] @@ -417,23 +437,12 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # MP3 otherwise. acodec = 'libmp3lame' extension = 'mp3' - more_opts = [] - if self._preferredquality is not None: - if int(self._preferredquality) < 10: - more_opts += ['-q:a', self._preferredquality] - else: - more_opts += ['-b:a', self._preferredquality + 'k'] + more_opts = self._quality_args(acodec) else: # We convert the audio (lossy if codec is lossy) acodec = ACODECS[self._preferredcodec] extension = self._preferredcodec - more_opts = [] - if self._preferredquality is not None: - # The opus codec doesn't support the -aq option - if int(self._preferredquality) < 10 and extension != 'opus': - more_opts += ['-q:a', self._preferredquality] - else: - more_opts += ['-b:a', self._preferredquality + 'k'] + more_opts = self._quality_args(acodec) if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] if self._preferredcodec == 'm4a': diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2953909fc..62f83c9ce 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3871,7 +3871,7 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): return default try: return int(v) * invscale // scale - except (ValueError, TypeError): + except (ValueError, TypeError, OverflowError): return default From 9af98e17bd2b761d304e88a359b0f7a40e6c0a67 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 00:23:48 +0530 Subject: [PATCH 0169/2552] [ffmpeg] Framework for feature detection Related: #1502, #1237, https://github.com/ytdl-org/youtube-dl/pull/29581 --- yt_dlp/__init__.py | 3 +- yt_dlp/postprocessor/ffmpeg.py | 79 ++++++++++++++++++---------------- yt_dlp/utils.py | 15 ++++--- 3 files changed, 55 insertions(+), 42 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 0070d50a8..3020b6e95 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -232,7 +232,8 @@ def _real_main(argv=None): parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') - if int_or_none(float_or_none(opts.audioquality)) is None: # int_or_none prevents inf, nan + audioquality = int_or_none(float_or_none(opts.audioquality)) # int_or_none prevents inf, nan + if audioquality is None or audioquality < 0: parser.error('invalid audio quality specified') if opts.recodevideo is not None: opts.recodevideo = opts.recodevideo.replace(' ', '') diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 96f7be6ff..c2415c59a 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -16,7 +16,8 @@ from ..utils import ( encodeArgument, encodeFilename, float_or_none, - get_exe_version, + _get_exe_version_output, + detect_exe_version, is_outdated_version, ISO639Utils, orderedSet, @@ -80,10 +81,10 @@ class FFmpegPostProcessor(PostProcessor): def _determine_executables(self): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] - prefer_ffmpeg = True - def get_ffmpeg_version(path): - ver = get_exe_version(path, args=['-version']) + def get_ffmpeg_version(path, prog): + out = _get_exe_version_output(path, ['-bsfs']) + ver = detect_exe_version(out) if out else False if ver: regexs = [ r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1] @@ -94,42 +95,46 @@ class FFmpegPostProcessor(PostProcessor): mobj = re.match(regex, ver) if mobj: ver = mobj.group(1) - return ver + self._versions[prog] = ver + if prog != 'ffmpeg' or not out: + return + + # TODO: Feature detection self.basename = None self.probe_basename = None - self._paths = None self._versions = None - if self._downloader: - prefer_ffmpeg = self.get_param('prefer_ffmpeg', True) - location = self.get_param('ffmpeg_location') - if location is not None: - if not os.path.exists(location): - self.report_warning( - 'ffmpeg-location %s does not exist! ' - 'Continuing without ffmpeg.' % (location)) - self._versions = {} - return - elif os.path.isdir(location): - dirname, basename = location, None - else: - basename = os.path.splitext(os.path.basename(location))[0] - basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg') - dirname = os.path.dirname(os.path.abspath(location)) - if basename in ('ffmpeg', 'ffprobe'): - prefer_ffmpeg = True - - self._paths = dict( - (p, os.path.join(dirname, p)) for p in programs) - if basename: - self._paths[basename] = location - self._versions = dict( - (p, get_ffmpeg_version(self._paths[p])) for p in programs) - if self._versions is None: - self._versions = dict( - (p, get_ffmpeg_version(p)) for p in programs) - self._paths = dict((p, p) for p in programs) + self._features = {} + + prefer_ffmpeg = self.get_param('prefer_ffmpeg', True) + location = self.get_param('ffmpeg_location') + if location is None: + self._paths = {p: p for p in programs} + else: + if not os.path.exists(location): + self.report_warning( + 'ffmpeg-location %s does not exist! ' + 'Continuing without ffmpeg.' % (location)) + self._versions = {} + return + elif os.path.isdir(location): + dirname, basename = location, None + else: + basename = os.path.splitext(os.path.basename(location))[0] + basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg') + dirname = os.path.dirname(os.path.abspath(location)) + if basename in ('ffmpeg', 'ffprobe'): + prefer_ffmpeg = True + + self._paths = dict( + (p, os.path.join(dirname, p)) for p in programs) + if basename: + self._paths[basename] = location + + self._versions = {} + for p in programs: + get_ffmpeg_version(self._paths[p], p) if prefer_ffmpeg is False: prefs = ('avconv', 'ffmpeg') @@ -382,7 +387,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): limits = { 'libmp3lame': (10, 0), - 'aac': (0.1, 11), + # FFmpeg's AAC encoder does not have an upper limit for the value of -q:a. + # Experimentally, with values over 4, bitrate changes were minimal or non-existent + 'aac': (0.1, 4), 'vorbis': (0, 10), 'opus': None, # doesn't support -q:a 'wav': None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 62f83c9ce..55e452a15 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4007,10 +4007,7 @@ def check_executable(exe, args=[]): return exe -def get_exe_version(exe, args=['--version'], - version_re=None, unrecognized='present'): - """ Returns the version of the specified executable, - or False if the executable is not present """ +def _get_exe_version_output(exe, args): try: # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. @@ -4022,7 +4019,7 @@ def get_exe_version(exe, args=['--version'], return False if isinstance(out, bytes): # Python 2.x out = out.decode('ascii', 'ignore') - return detect_exe_version(out, version_re, unrecognized) + return out def detect_exe_version(output, version_re=None, unrecognized='present'): @@ -4036,6 +4033,14 @@ def detect_exe_version(output, version_re=None, unrecognized='present'): return unrecognized +def get_exe_version(exe, args=['--version'], + version_re=None, unrecognized='present'): + """ Returns the version of the specified executable, + or False if the executable is not present """ + out = _get_exe_version_output(exe, args) + return detect_exe_version(out, version_re, unrecognized) if out else False + + class LazyList(collections.abc.Sequence): ''' Lazy immutable list from an iterable Note that slices of a LazyList are lists and not LazyList''' From 673c0057e81410b3da2b0c07ebf7abca13286eab Mon Sep 17 00:00:00 2001 From: CrypticSignal Date: Thu, 4 Nov 2021 02:23:40 +0530 Subject: [PATCH 0170/2552] [ExtractAudio] Use `libfdk_aac` if available Closes #1502 Authored by: CrypticSignal --- yt_dlp/postprocessor/ffmpeg.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index c2415c59a..3f82eabf5 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -99,7 +99,7 @@ class FFmpegPostProcessor(PostProcessor): if prog != 'ffmpeg' or not out: return - # TODO: Feature detection + self._features['fdk'] = '--enable-libfdk-aac' in out self.basename = None self.probe_basename = None @@ -391,6 +391,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # Experimentally, with values over 4, bitrate changes were minimal or non-existent 'aac': (0.1, 4), 'vorbis': (0, 10), + 'libfdk_aac': (1, 5), 'opus': None, # doesn't support -q:a 'wav': None, 'flac': None, @@ -399,6 +400,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): return [] q = limits[1] + (limits[0] - limits[1]) * (self._preferredquality / 10) + if codec == 'libfdk_aac': + return ['-vbr', f'{int(q)}'] return ['-q:a', f'{q}'] def run_ffmpeg(self, path, out_path, codec, more_opts): @@ -448,6 +451,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): else: # We convert the audio (lossy if codec is lossy) acodec = ACODECS[self._preferredcodec] + if acodec == 'aac' and self._features.get('fdk'): + acodec = 'libfdk_aac' extension = self._preferredcodec more_opts = self._quality_args(acodec) if self._preferredcodec == 'aac': From 832e9000c71c5bbd97c93d21051044cf61a3b87f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 02:24:12 +0530 Subject: [PATCH 0171/2552] [ffmpeg] Accurately detect presence of setts Closes #1237 --- yt_dlp/postprocessor/ffmpeg.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 3f82eabf5..139b97fb4 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -99,7 +99,10 @@ class FFmpegPostProcessor(PostProcessor): if prog != 'ffmpeg' or not out: return - self._features['fdk'] = '--enable-libfdk-aac' in out + self._features = { + 'fdk': '--enable-libfdk-aac' in out, + 'setts': 'setts' in out.splitlines(), + } self.basename = None self.probe_basename = None @@ -827,11 +830,10 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): - required_version = '4.4' - if is_outdated_version(self._versions[self.basename], required_version): + if not self._features.get('setts'): self.report_warning( 'A re-encode is needed to fix timestamps in older versions of ffmpeg. ' - f'Please install ffmpeg {required_version} or later to fixup without re-encoding') + 'Please install ffmpeg 4.4 or later to fixup without re-encoding') opts = ['-vf', 'setpts=PTS-STARTPTS'] else: opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS'] From 8913ef74d76d8e93e4aeaf9d2827ca950c17f8ce Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 03:10:49 +0530 Subject: [PATCH 0172/2552] [ffmpeg] Detect libavformat version for `aac_adtstoasc` and print available features in verbose head Based on https://github.com/ytdl-org/youtube-dl/pull/29581 --- yt_dlp/YoutubeDL.py | 6 +++++- yt_dlp/downloader/external.py | 3 +-- yt_dlp/postprocessor/ffmpeg.py | 10 +++++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4a9f4775b..a866178b0 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3350,7 +3350,11 @@ class YoutubeDL(object): platform.architecture()[0], platform_name())) - exe_versions = FFmpegPostProcessor.get_versions(self) + exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) + ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} + if ffmpeg_features: + exe_versions['ffmpeg'] += f' (%s)' % ','.join(ffmpeg_features) + exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ce3370fb7..1efbb2fab 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -21,7 +21,6 @@ from ..utils import ( encodeArgument, handle_youtubedl_headers, check_executable, - is_outdated_version, Popen, sanitize_open, ) @@ -459,7 +458,7 @@ class FFmpegFD(ExternalFD): args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] - if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 139b97fb4..46e87baeb 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -75,9 +75,14 @@ class FFmpegPostProcessor(PostProcessor): self.basename, self.basename, required_version) self.report_warning(warning) + @staticmethod + def get_versions_and_features(downloader=None): + pp = FFmpegPostProcessor(downloader) + return pp._versions, pp._features + @staticmethod def get_versions(downloader=None): - return FFmpegPostProcessor(downloader)._versions + return FFmpegPostProcessor.get_version_and_features(downloader)[0] def _determine_executables(self): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] @@ -99,9 +104,12 @@ class FFmpegPostProcessor(PostProcessor): if prog != 'ffmpeg' or not out: return + mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P[0-9. ]+)', out) + lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None self._features = { 'fdk': '--enable-libfdk-aac' in out, 'setts': 'setts' in out.splitlines(), + 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False), } self.basename = None From a4211baff55f72bd1ca0649407c3d134bfcd2646 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 03:40:35 +0530 Subject: [PATCH 0173/2552] [cleanup] Minor cleanup --- .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml | 2 +- .../2_site_support_request.yml | 2 +- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml | 4 ++-- README.md | 24 +++++++++---------- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/downloader/common.py | 4 +++- yt_dlp/extractor/picarto.py | 2 +- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/options.py | 2 +- 9 files changed, 24 insertions(+), 22 deletions(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml index fdca0e53a..e23bc4195 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml @@ -43,7 +43,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml index f7a48edc7..f35384821 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -54,7 +54,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output using one of the example URLs provided above. + Provide the complete verbose output **using one of the example URLs provided above**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml index e4d669bb7..8219ebfd4 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml @@ -37,8 +37,8 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. - Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. + Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] diff --git a/README.md b/README.md index 31bfca6a8..ccd221bb4 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works * Mixes supports downloading multiple pages of content * Most (but not all) age-gated content can be downloaded without cookies - * Partial workaround for throttling issue + * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) * Redirect channel's home URL automatically to `/video` to preserve the old behaviour * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)) @@ -154,7 +154,7 @@ For ease of use, a few more compat options are available: You can install yt-dlp using one of the following methods: -#### Using the release binary +### Using the release binary You can simply download the [correct binary file](#release-files) for your OS: **[[Windows](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)] [[UNIX-like](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)]** @@ -177,7 +177,7 @@ sudo chmod a+rx /usr/local/bin/yt-dlp PS: The manpages, shell completion files etc. are available in [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) -#### With [PIP](https://pypi.org/project/pip) +### With [PIP](https://pypi.org/project/pip) You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: ``` @@ -196,7 +196,7 @@ python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip Note that on some systems, you may need to use `py` or `python` instead of `python3` -#### With [Homebrew](https://brew.sh) +### With [Homebrew](https://brew.sh) macOS or Linux users that are using Homebrew can also install it by: @@ -204,14 +204,14 @@ macOS or Linux users that are using Homebrew can also install it by: brew install yt-dlp/taps/yt-dlp ``` -### UPDATE +## UPDATE You can use `yt-dlp -U` to update if you are [using the provided release](#using-the-release-binary) If you [installed with pip](#with-pip), simply re-run the same command that was used to install the program If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp` -### RELEASE FILES +## RELEASE FILES #### Recommended @@ -238,7 +238,7 @@ File|Description [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums -### DEPENDENCIES +## DEPENDENCIES Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. ', '', html) From 89fcdff5d8e62c6153763650f12ec4eb4453bdff Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sat, 23 Oct 2021 03:25:09 +0200 Subject: [PATCH 0205/2552] [polskieradio] Add extractors (#1386) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/polskieradio.py | 303 +++++++++++++++++++++++++++---- 2 files changed, 267 insertions(+), 40 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 641481d01..741b9f021 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1108,6 +1108,10 @@ from .pokemon import ( from .polskieradio import ( PolskieRadioIE, PolskieRadioCategoryIE, + PolskieRadioPlayerIE, + PolskieRadioPodcastIE, + PolskieRadioPodcastListIE, + PolskieRadioRadioKierowcowIE, ) from .popcorntimes import PopcorntimesIE from .popcorntv import PopcornTVIE diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index 53fe0340a..b2b3eb29c 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals import itertools +import json +import math import re from .common import InfoExtractor @@ -12,15 +14,45 @@ from ..compat import ( ) from ..utils import ( extract_attributes, + ExtractorError, + InAdvancePagedList, int_or_none, + js_to_json, + parse_iso8601, strip_or_none, unified_timestamp, unescapeHTML, + url_or_none, ) -class PolskieRadioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' +class PolskieRadioBaseExtractor(InfoExtractor): + def _extract_webpage_player_entries(self, webpage, playlist_id, base_data): + media_urls = set() + + for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', webpage): + media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) + if not media.get('file') or not media.get('desc'): + continue + media_url = self._proto_relative_url(media['file']) + if media_url in media_urls: + continue + media_urls.add(media_url) + entry = base_data.copy() + entry.update({ + 'id': compat_str(media['id']), + 'url': media_url, + 'duration': int_or_none(media.get('length')), + 'vcodec': 'none' if media.get('provider') == 'audio' else None, + }) + entry_title = compat_urllib_parse_unquote(media['desc']) + if entry_title: + entry['title'] = entry_title + yield entry + + +class PolskieRadioIE(PolskieRadioBaseExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/Artykul/(?P[0-9]+)' _TESTS = [{ # Old-style single broadcast. 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', 'info_dict': { @@ -59,22 +91,14 @@ class PolskieRadioIE(InfoExtractor): 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' }, }], - }, { # Old-style multiple broadcast playlist. - 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2487823,Marek-Kondrat-czyta-Mistrza-i-Malgorzate', - 'info_dict': { - 'id': '2487823', - 'title': 'Marek Kondrat czyta "Mistrza i Małgorzatę"', - 'description': 'md5:8422a95cc83834f2aaeff9d82e9c8f39', - }, - 'playlist_mincount': 50, - }, { # New-style multiple broadcast playlist. - 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2541317,Czytamy-Kalendarz-i-klepsydre-Tadeusza-Konwickiego', + }, { + # PR4 audition - other frontend + 'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301', 'info_dict': { - 'id': '2541317', - 'title': 'Czytamy "Kalendarz i klepsydrę" Tadeusza Konwickiego', - 'description': 'md5:0baeaa46d877f1351fb2eeed3e871f9f', + 'id': '2610977', + 'ext': 'mp3', + 'title': 'Pogłos 29 października godz. 23:01', }, - 'playlist_mincount': 15, }, { 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', 'only_matching': True, @@ -85,6 +109,9 @@ class PolskieRadioIE(InfoExtractor): # with mp4 video 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', 'only_matching': True, + }, { + 'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci', + 'only_matching': True, }] def _real_extract(self, url): @@ -94,39 +121,37 @@ class PolskieRadioIE(InfoExtractor): content = self._search_regex( r'(?s)]+class="\s*this-article\s*"[^>]*>(.+?)]+class="tags"[^>]*>', - webpage, 'content') + webpage, 'content', default=None) timestamp = unified_timestamp(self._html_search_regex( r'(?s)]+id="datetime2"[^>]*>(.+?)', - webpage, 'timestamp', fatal=False)) + webpage, 'timestamp', default=None)) - thumbnail_url = self._og_search_thumbnail(webpage) + thumbnail_url = self._og_search_thumbnail(webpage, default=None) - entries = [] + title = self._og_search_title(webpage).strip() - media_urls = set() + description = strip_or_none(self._og_search_description(webpage, default=None)) + description = description.replace('\xa0', ' ') if description is not None else None - for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', content): - media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) - if not media.get('file') or not media.get('desc'): - continue - media_url = self._proto_relative_url(media['file'], 'http:') - if media_url in media_urls: - continue - media_urls.add(media_url) - entries.append({ - 'id': compat_str(media['id']), - 'url': media_url, - 'title': compat_urllib_parse_unquote(media['desc']), - 'duration': int_or_none(media.get('length')), - 'vcodec': 'none' if media.get('provider') == 'audio' else None, + if not content: + return { + 'id': playlist_id, + 'url': self._proto_relative_url( + self._search_regex( + r"source:\s*'(//static\.prsa\.pl/[^']+)'", + webpage, 'audition record url')), + 'title': title, + 'description': description, 'timestamp': timestamp, - 'thumbnail': thumbnail_url - }) + 'thumbnail': thumbnail_url, + } - title = self._og_search_title(webpage).strip() - description = strip_or_none(self._og_search_description(webpage)) - description = description.replace('\xa0', ' ') if description is not None else None + entries = self._extract_webpage_player_entries(content, playlist_id, { + 'title': title, + 'timestamp': timestamp, + 'thumbnail': thumbnail_url, + }) return self.playlist_result(entries, playlist_id, title, description) @@ -207,3 +232,201 @@ class PolskieRadioCategoryIE(InfoExtractor): return self.playlist_result( self._entries(url, webpage, category_id), category_id, title) + + +class PolskieRadioPlayerIE(InfoExtractor): + IE_NAME = 'polskieradio:player' + _VALID_URL = r'https?://player\.polskieradio\.pl/anteny/(?P[^/]+)' + + _BASE_URL = 'https://player.polskieradio.pl' + _PLAYER_URL = 'https://player.polskieradio.pl/main.bundle.js' + _STATIONS_API_URL = 'https://apipr.polskieradio.pl/api/stacje' + + _TESTS = [{ + 'url': 'https://player.polskieradio.pl/anteny/trojka', + 'info_dict': { + 'id': '3', + 'ext': 'm4a', + 'title': 'Trójka', + }, + 'params': { + 'format': 'bestaudio', + 'skip_download': 'endless stream', + }, + }] + + def _get_channel_list(self, channel_url='no_channel'): + player_code = self._download_webpage( + self._PLAYER_URL, channel_url, + note='Downloading js player') + channel_list = js_to_json(self._search_regex( + r';var r="anteny",a=(\[.+?\])},', player_code, 'channel list')) + return self._parse_json(channel_list, channel_url) + + def _real_extract(self, url): + channel_url = self._match_id(url) + channel_list = self._get_channel_list(channel_url) + + channel = next((c for c in channel_list if c.get('url') == channel_url), None) + + if not channel: + raise ExtractorError('Channel not found') + + station_list = self._download_json(self._STATIONS_API_URL, channel_url, + note='Downloading stream url list', + headers={ + 'Accept': 'application/json', + 'Referer': url, + 'Origin': self._BASE_URL, + }) + station = next((s for s in station_list + if s.get('Name') == (channel.get('streamName') or channel.get('name'))), None) + if not station: + raise ExtractorError('Station not found even though we extracted channel') + + formats = [] + for stream_url in station['Streams']: + stream_url = self._proto_relative_url(stream_url) + if stream_url.endswith('/playlist.m3u8'): + formats.extend(self._extract_m3u8_formats(stream_url, channel_url, live=True)) + elif stream_url.endswith('/manifest.f4m'): + formats.extend(self._extract_mpd_formats(stream_url, channel_url)) + elif stream_url.endswith('/Manifest'): + formats.extend(self._extract_ism_formats(stream_url, channel_url)) + else: + formats.append({ + 'url': stream_url, + }) + + self._sort_formats(formats) + + return { + 'id': compat_str(channel['id']), + 'formats': formats, + 'title': channel.get('name') or channel.get('streamName'), + 'display_id': channel_url, + 'thumbnail': f'{self._BASE_URL}/images/{channel_url}-color-logo.png', + 'is_live': True, + } + + +class PolskieRadioPodcastBaseExtractor(InfoExtractor): + _API_BASE = 'https://apipodcasts.polskieradio.pl/api' + + def _parse_episode(self, data): + return { + 'id': data['guid'], + 'formats': [{ + 'url': data['url'], + 'filesize': int_or_none(data.get('fileSize')), + }], + 'title': data['title'], + 'description': data.get('description'), + 'duration': int_or_none(data.get('length')), + 'timestamp': parse_iso8601(data.get('publishDate')), + 'thumbnail': url_or_none(data.get('image')), + 'series': data.get('podcastTitle'), + 'episode': data['title'], + } + + +class PolskieRadioPodcastListIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast:list' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/podcast/(?P\d+)' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/podcast/8/', + 'info_dict': { + 'id': '8', + 'title': 'Śniadanie w Trójce', + 'description': 'md5:57abcc27bc4c6a6b25baa3061975b9ef', + 'uploader': 'Beata Michniewicz', + }, + 'playlist_mincount': 714, + }] + _PAGE_SIZE = 10 + + def _call_api(self, podcast_id, page): + return self._download_json( + f'{self._API_BASE}/Podcasts/{podcast_id}/?pageSize={self._PAGE_SIZE}&page={page}', + podcast_id, f'Downloading page {page}') + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._call_api(podcast_id, 1) + + def get_page(page_num): + page_data = self._call_api(podcast_id, page_num + 1) if page_num else data + yield from (self._parse_episode(ep) for ep in page_data['items']) + + return { + '_type': 'playlist', + 'entries': InAdvancePagedList( + get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE), + 'id': str(data['id']), + 'title': data['title'], + 'description': data.get('description'), + 'uploader': data.get('announcer'), + } + + +class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/track/(?P[a-f\d]{8}(?:-[a-f\d]{4}){4}[a-f\d]{8})' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/track/6eafe403-cb8f-4756-b896-4455c3713c32', + 'info_dict': { + 'id': '6eafe403-cb8f-4756-b896-4455c3713c32', + 'ext': 'mp3', + 'title': 'Theresa May rezygnuje. Co dalej z brexitem?', + 'description': 'md5:e41c409a29d022b70ef0faa61dbded60', + }, + }] + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._download_json( + f'{self._API_BASE}/audio', + podcast_id, 'Downloading podcast metadata', + data=json.dumps({ + 'guids': [podcast_id], + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + }) + return self._parse_episode(data[0]) + + +class PolskieRadioRadioKierowcowIE(PolskieRadioBaseExtractor): + _VALID_URL = r'https?://(?:www\.)?radiokierowcow\.pl/artykul/(?P[0-9]+)' + IE_NAME = 'polskieradio:kierowcow' + + _TESTS = [{ + 'url': 'https://radiokierowcow.pl/artykul/2694529', + 'info_dict': { + 'id': '2694529', + 'title': 'Zielona fala reliktem przeszłości?', + 'description': 'md5:343950a8717c9818fdfd4bd2b8ca9ff2', + }, + 'playlist_count': 3, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) + nextjs_build = self._search_nextjs_data(webpage, media_id)['buildId'] + article = self._download_json( + f'https://radiokierowcow.pl/_next/data/{nextjs_build}/artykul/{media_id}.json?articleId={media_id}', + media_id) + data = article['pageProps']['data'] + title = data['title'] + entries = self._extract_webpage_player_entries(data['content'], media_id, { + 'title': title, + }) + + return { + '_type': 'playlist', + 'id': media_id, + 'entries': entries, + 'title': title, + 'description': data.get('lead'), + } From ed76230b3f61d3440da5b71170e243cd2bfe693b Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sat, 23 Oct 2021 01:46:56 +0200 Subject: [PATCH 0206/2552] [polsatgo] Add extractor (#1386) Authored by: selfisekai, sdomi Co-authored-by: Dominika Liberda --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/polsatgo.py | 90 ++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 yt_dlp/extractor/polsatgo.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 741b9f021..bd0da2c38 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1105,6 +1105,7 @@ from .pokemon import ( PokemonIE, PokemonWatchIE, ) +from .polsatgo import PolsatGoIE from .polskieradio import ( PolskieRadioIE, PolskieRadioCategoryIE, diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py new file mode 100644 index 000000000..1e3f46c07 --- /dev/null +++ b/yt_dlp/extractor/polsatgo.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from uuid import uuid4 +import json + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + url_or_none, + ExtractorError, +) + + +class PolsatGoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polsat(?:box)?go\.pl/.+/(?P[0-9a-fA-F]+)(?:[/#?]|$)' + _TESTS = [{ + 'url': 'https://polsatgo.pl/wideo/seriale/swiat-wedlug-kiepskich/5024045/sezon-1/5028300/swiat-wedlug-kiepskich-odcinek-88/4121', + 'info_dict': { + 'id': '4121', + 'ext': 'mp4', + 'title': 'Świat według Kiepskich - Odcinek 88', + 'age_limit': 12, + }, + }] + + def _extract_formats(self, sources, video_id): + for source in sources or []: + if not source.get('id'): + continue + url = url_or_none(self._call_api( + 'drm', video_id, 'getPseudoLicense', + {'mediaId': video_id, 'sourceId': source['id']}).get('url')) + if not url: + continue + yield { + 'url': url, + 'height': int_or_none(try_get(source, lambda x: x['quality'][:-1])) + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('navigation', video_id, 'prePlayData', {'mediaId': video_id})['mediaItem'] + + formats = list(self._extract_formats( + try_get(media, lambda x: x['playback']['mediaSources']), video_id)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': media['displayInfo']['title'], + 'formats': formats, + 'age_limit': int_or_none(media['displayInfo']['ageGroup']) + } + + def _call_api(self, endpoint, media_id, method, params): + rand_uuid = str(uuid4()) + res = self._download_json( + f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id, + note=f'Downloading {method} JSON metadata', + data=json.dumps({ + 'method': method, + 'id': '2137', + 'jsonrpc': '2.0', + 'params': { + **params, + 'userAgentData': { + 'deviceType': 'mobile', + 'application': 'native', + 'os': 'android', + 'build': 10003, + 'widevine': False, + 'portal': 'pg', + 'player': 'cpplayer', + }, + 'deviceId': { + 'type': 'other', + 'value': rand_uuid, + }, + 'clientId': rand_uuid, + 'cpid': 1, + }, + }).encode('utf-8'), + headers={'Content-type': 'application/json'}) + if not res.get('result'): + if res['error']['code'] == 13404: + raise ExtractorError('This video is either unavailable in your region or is DRM protected', expected=True) + raise ExtractorError(f'Solorz said: {res["error"]["message"]} - {res["error"]["data"]["userMessage"]}') + return res['result'] From 3f771f75d7277e54411a6e2ae36e74d7ddb993dd Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 10:58:57 +0530 Subject: [PATCH 0207/2552] [radiokapital] Add extractors (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 4 ++ yt_dlp/extractor/radiokapital.py | 99 ++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 yt_dlp/extractor/radiokapital.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bd0da2c38..4a06ec578 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1159,6 +1159,10 @@ from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE +from .radiokapital import ( + RadioKapitalIE, + RadioKapitalShowIE, +) from .radlive import ( RadLiveIE, RadLiveChannelIE, diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py new file mode 100644 index 000000000..2e93e034f --- /dev/null +++ b/yt_dlp/extractor/radiokapital.py @@ -0,0 +1,99 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + clean_html, + traverse_obj, + unescapeHTML, +) + +import itertools +from urllib.parse import urlencode + + +class RadioKapitalBaseIE(InfoExtractor): + def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): + return self._download_json( + f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}', + video_id, note=note) + + def _parse_episode(self, data): + release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3]) + return { + '_type': 'url_transparent', + 'url': data['mixcloud_url'], + 'ie_key': 'Mixcloud', + 'title': unescapeHTML(data['title']), + 'description': clean_html(data.get('content')), + 'tags': traverse_obj(data, ('tags', ..., 'name')), + 'release_date': release, + 'series': traverse_obj(data, ('show', 'title')), + } + + +class RadioKapitalIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P[a-z\d-]+)' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial', + 'info_dict': { + 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20', + 'ext': 'm4a', + 'title': '#5: It’s okay to\xa0be\xa0immaterial', + 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4', + 'uploader': 'Radio Kapitał', + 'uploader_id': 'radiokapital', + 'timestamp': 1621640164, + 'upload_date': '20210521', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + episode = self._call_api('episodes/%s' % video_id, video_id) + return self._parse_episode(episode) + + +class RadioKapitalShowIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital:show' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P[a-z\d-]+)/?(?:$|[?#])' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/wesz', + 'info_dict': { + 'id': '100', + 'title': 'WĘSZ', + 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c', + }, + 'playlist_mincount': 17, + }] + + def _get_episode_list(self, series_id, page_no): + return self._call_api( + 'episodes', series_id, + f'Downloading episode list page #{page_no}', qs={ + 'show': series_id, + 'page': page_no, + }) + + def _entries(self, series_id): + for page_no in itertools.count(1): + episode_list = self._get_episode_list(series_id, page_no) + yield from (self._parse_episode(ep) for ep in episode_list['items']) + if episode_list['next'] is None: + break + + def _real_extract(self, url): + series_id = self._match_id(url) + + show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata') + entries = self._entries(series_id) + return { + '_type': 'playlist', + 'entries': entries, + 'id': str(show['id']), + 'title': show.get('title'), + 'description': clean_html(show.get('content')), + } From c0599d4fe493730236c7e62ed63575ea0d3f3fa2 Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 10:59:17 +0530 Subject: [PATCH 0208/2552] [wppilot] Add extractors (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/wppilot.py | 177 +++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 yt_dlp/extractor/wppilot.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4a06ec578..d47c06647 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1788,6 +1788,10 @@ from .wistia import ( WistiaPlaylistIE, ) from .worldstarhiphop import WorldStarHipHopIE +from .wppilot import ( + WPPilotIE, + WPPilotChannelsIE, +) from .wsj import ( WSJIE, WSJArticleIE, diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py new file mode 100644 index 000000000..3003a0f10 --- /dev/null +++ b/yt_dlp/extractor/wppilot.py @@ -0,0 +1,177 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + try_get, + ExtractorError, +) + +import json +import random +import re + + +class WPPilotBaseIE(InfoExtractor): + _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s' + _VIDEO_GUEST_URL = 'https://pilot.wp.pl/api/v1/guest/channel/%s' + + _HEADERS_WEB = { + 'Content-Type': 'application/json; charset=UTF-8', + 'Referer': 'https://pilot.wp.pl/tv/', + } + + def _get_channel_list(self, cache=True): + if cache is True: + cache_res = self._downloader.cache.load('wppilot', 'channel-list') + if cache_res: + return cache_res, True + webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage') + page_data_base_url = self._search_regex( + r'', + ], webpage, 'video id', default=page_id) return { '_type': 'url_transparent', 'url': 'tvp:' + video_id, 'description': self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'description', webpage, default=None), + webpage, default=None) or (self._html_search_meta( + 'description', webpage, default=None) + if '//s.tvp.pl/files/portal/v' in webpage else None), 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'ie_key': 'TVPEmbed', } @@ -252,18 +417,20 @@ class TVPWebsiteIE(InfoExtractor): _TESTS = [{ # series - 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', + 'url': 'https://vod.tvp.pl/website/wspaniale-stulecie,17069012/video', 'info_dict': { - 'id': '38678312', + 'id': '17069012', }, - 'playlist_count': 115, + 'playlist_count': 312, }, { # film - 'url': 'https://vod.tvp.pl/website/gloria,35139666', + 'url': 'https://vod.tvp.pl/website/krzysztof-krawczyk-cale-moje-zycie,51374466', 'info_dict': { - 'id': '36637049', + 'id': '51374509', 'ext': 'mp4', - 'title': 'Gloria, Gloria', + 'title': 'Krzysztof Krawczyk – całe moje życie, Krzysztof Krawczyk – całe moje życie', + 'description': 'md5:2e80823f00f5fc263555482f76f8fa42', + 'age_limit': 12, }, 'params': { 'skip_download': True, From ebfab36fca0901f99076158f9eb4f7fc9d87589b Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 11:03:04 +0530 Subject: [PATCH 0211/2552] [tvp] Add TVPStreamIE (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/tvp.py | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d47c06647..4f9de71e2 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1571,6 +1571,7 @@ from .tvnow import ( from .tvp import ( TVPEmbedIE, TVPIE, + TVPStreamIE, TVPWebsiteIE, ) from .tvplay import ( diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 22cfbd25e..48e2c6e76 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -251,6 +251,52 @@ class TVPIE(InfoExtractor): } +class TVPStreamIE(InfoExtractor): + IE_NAME = 'tvp:stream' + _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P\d*)' + _TESTS = [{ + # untestable as "video" id changes many times across a day + 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', + 'only_matching': True, + }, { + 'url': 'tvpstream:39821455', + 'only_matching': True, + }, { + # the default stream when you provide no channel_id, most probably TVP Info + 'url': 'tvpstream:', + 'only_matching': True, + }, { + 'url': 'https://tvpstream.vod.tvp.pl/', + 'only_matching': True, + }] + + _PLAYER_BOX_RE = r']*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)' + _BUTTON_RE = r']*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')' + + def _real_extract(self, url): + channel_id = self._match_id(url) + channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default') + webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage') + if not channel_id: + channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel', + webpage, 'default channel id') + video_id = self._search_regex(self._PLAYER_BOX_RE % 'video', + webpage, 'video id') + audition_title, station_name = self._search_regex( + self._BUTTON_RE % (re.escape(channel_id)), webpage, + 'audition title and station name', + group=(1, 2)) + return { + '_type': 'url_transparent', + 'id': channel_id, + 'url': 'tvp:%s' % video_id, + 'title': audition_title, + 'alt_title': station_name, + 'is_live': True, + 'ie_key': 'TVPEmbed', + } + + class TVPEmbedIE(InfoExtractor): IE_NAME = 'tvp:embed' IE_DESC = 'Telewizja Polska' From 86c1a8aae4db4a5b720cbd7c9465de350d64edef Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 6 Nov 2021 09:30:38 +0530 Subject: [PATCH 0212/2552] Release 2021.11.10 --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 2 +- .../ISSUE_TEMPLATE/2_site_support_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 4 +- CONTRIBUTORS | 10 +++ Changelog.md | 85 +++++++++++++++++++ README.md | 18 ++-- supportedsites.md | 24 +++++- 7 files changed, 133 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 862e7235f..67145d8b2 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -43,7 +43,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index aa00b8ad7..30cebec91 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -54,7 +54,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output using one of the example URLs provided above. + Provide the complete verbose output **using one of the example URLs provided above**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 9003bb19a..445945df4 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -37,8 +37,8 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. - Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. + Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 2bf96affe..f035ce10d 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -129,3 +129,13 @@ Bojidarist nixklai smplayer-dev Zirro +CrypticSignal +flashdagger +fractalf +frafra +kaz-us +ozburo +rhendric +sdomi +selfisekai +stanoarn diff --git a/Changelog.md b/Changelog.md index d74237dd4..6124d6bd0 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,91 @@ --> +### 2021.11.10 + +* [youtube] **Fix throttling by decrypting n-sig** +* Merging extractors from [haruhi-dl](https://git.sakamoto.pl/laudom/haruhi-dl) by [selfisekai](https://github.com/selfisekai) + * [extractor] Add `_search_nextjs_data` + * [tvp] Fix extractors + * [tvp] Add TVPStreamIE + * [wppilot] Add extractors + * [polskieradio] Add extractors + * [radiokapital] Add extractors + * [polsatgo] Add extractor by [selfisekai](https://github.com/selfisekai), [sdomi](https://github.com/sdomi) +* Separate `--check-all-formats` from `--check-formats` +* Approximate filesize from bitrate +* Don't create console in `windows_enable_vt_mode` +* Fix bug in `--load-infojson` of playlists +* [minicurses] Add colors to `-F` and standardize color-printing code +* [outtmpl] Add type `link` for internet shortcut files +* [outtmpl] Add alternate forms for `q` and `j` +* [outtmpl] Do not traverse `None` +* [fragment] Fix progress display in fragmented downloads +* [downloader/ffmpeg] Fix vtt download with ffmpeg +* [ffmpeg] Detect presence of setts and libavformat version +* [ExtractAudio] Rescale --audio-quality correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) +* [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal) +* [FormatSort] `eac3` is better than `ac3` +* [FormatSort] Fix some fields' defaults +* [generic] Detect more json_ld +* [generic] parse jwplayer with only the json URL +* [extractor] Add keyword automatically to SearchIE descriptions +* [extractor] Fix some errors being converted to `ExtractorError` +* [utils] Add `join_nonempty` +* [utils] Add `jwt_decode_hs256` by [Ashish0804](https://github.com/Ashish0804) +* [utils] Create `DownloadCancelled` exception +* [utils] Parse `vp09` as vp9 +* [utils] Sanitize URL when determining protocol +* [test/download] Fallback test to `bv` +* [docs] Minor documentation improvements +* [cleanup] Improvements to error and debug messages +* [cleanup] Minor fixes and cleanup +* [3speak] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [AmazonStore] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [Gab] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [mediaset] Add playlist support by [nixxo](https://github.com/nixxo) +* [MLSScoccer] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [N1] Add support for nova.rs by [u-spec-png](https://github.com/u-spec-png) +* [PlanetMarathi] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [RaiplayRadio] Add extractors by [frafra](https://github.com/frafra) +* [roosterteeth] Add series extractor +* [sky] Add `SkyNewsStoryIE` by [ajj8](https://github.com/ajj8) +* [youtube] Fix sorting for some videos +* [youtube] Populate `thumbnail` with the best "known" thumbnail +* [youtube] Refactor itag processing +* [youtube] Remove unnecessary no-playlist warning +* [youtube:tab] Add Invidious list for playlists/channels by [rhendric](https://github.com/rhendric) +* [Bilibili:comments] Fix infinite loop by [u-spec-png](https://github.com/u-spec-png) +* [ceskatelevize] Fix extractor by [flashdagger](https://github.com/flashdagger) +* [Coub] Fix media format identification by [wlritchi](https://github.com/wlritchi) +* [crunchyroll] Add extractor-args `language` and `hardsub` +* [DiscoveryPlus] Allow language codes in URL +* [imdb] Fix thumbnail by [ozburo](https://github.com/ozburo) +* [instagram] Add IOS URL support by [u-spec-png](https://github.com/u-spec-png) +* [instagram] Improve login code by [u-spec-png](https://github.com/u-spec-png) +* [Instagram] Improve metadata extraction by [u-spec-png](https://github.com/u-spec-png) +* [iPrima] Fix extractor by [stanoarn](https://github.com/stanoarn) +* [itv] Add support for ITV News by [ajj8](https://github.com/ajj8) +* [la7] Fix extractor by [nixxo](https://github.com/nixxo) +* [linkedin] Don't login multiple times +* [mtv] Fix some videos by [Sipherdrakon](https://github.com/Sipherdrakon) +* [Newgrounds] Fix description by [u-spec-png](https://github.com/u-spec-png) +* [Nrk] Minor fixes by [fractalf](https://github.com/fractalf) +* [Olympics] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [piksel] Fix sorting +* [twitter] Do not sort by codec +* [viewlift] Add cookie-based login and series support by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) +* [vimeo] Detect source extension and misc cleanup by [flashdagger](https://github.com/flashdagger) +* [vimeo] Fix ondemand videos and direct URLs with hash +* [vk] Fix login and add subtitles by [kaz-us](https://github.com/kaz-us) +* [VLive] Add upload_date and thumbnail by [Ashish0804](https://github.com/Ashish0804) +* [VRT] Fix login by [pgaig](https://github.com/pgaig) +* [Vupload] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [wakanim] Add support for MPD manifests by [nyuszika7h](https://github.com/nyuszika7h) +* [wakanim] Detect geo-restriction by [nyuszika7h](https://github.com/nyuszika7h) +* [ZenYandex] Fix extractor by [u-spec-png](https://github.com/u-spec-png) + + ### 2021.10.22 * [build] Improvements diff --git a/README.md b/README.md index 713c2c4a0..24975ad6f 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) and private playlists supports downloading multiple pages of content * Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works * Mixes supports downloading multiple pages of content - * Most (but not all) age-gated content can be downloaded without cookies + * Some (but not all) age-gated content can be downloaded without cookies * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) * Redirect channel's home URL automatically to `/video` to preserve the old behaviour * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given @@ -92,9 +92,13 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload, NovaPlay, SkyNewsAU, EUScreen, Gronkh, microsoftstream, on24, trovo channels +* **New extractors**: 17live, 3speak, amazonstore, animelab, audius, bandcampmusic, bannedvideo, biliintl, bitwave.tv, blackboardcollaborate, cam4, cgtn, chingari, ciscowebex, damtomo, discoveryplus.in, douyin, epicon, euscreen, fancode, filmmodu, gab, gedi, gettr, gopro, gotostage, gronkh, koo, manototv, mediaite, mediaklikk, mediasetshow, mediathek, microsoftstream, mildom, mirrativ, mlsscoccer, mtv.it, musescore, mxplayershow, n1, nebula, nfhsnetwork, novaplay, nzherald, olympics replay, on24, openrec, parlview-AU, peloton, planetmarathi, pluto.tv, polsatgo, polskieradio, pornflip, projectveritas, radiko, radiokapital, radlive, raiplayradio, rcs, rctiplus, saitosan, sciencechannel, shemaroome, skynews-AU, skynews-story, sovietscloset, startv, streamanity, telemundo, theta, theta, tokentube, tv2huseries, ukcolumn, utreon, veo, vidiolive, vidiopremier, voicy, vupload, whowatch, wim.tv, wppilot, youtube webarchive, zee5, zen.yandex -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme, francetv, 7plus, tagesschau +* **New playlist extractors**: bilibili categories, eroprofile albums, hotstar series, hungama albums, newgrounds user, niconico search/users, paramountplus series, patreon user, peertube playlist/channels, roosterteeth series, sonyliv series, tiktok user, trovo channels, voot series + +* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo + +* **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details @@ -108,7 +112,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection) etc -* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details +* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details * **Self-updater**: The releases can be updated using `yt-dlp -U` @@ -184,12 +188,12 @@ You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: python3 -m pip install -U yt-dlp ``` -You can also install without any dependencies using: +You can install without any of the optional dependencies using: ``` python3 -m pip install --no-deps -U yt-dlp ``` -You can also install the master branch with: +If you want to be on the cutting edge, you can also install the master branch with: ``` python3 -m pip3 install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip ``` @@ -790,7 +794,7 @@ You can also fork the project on github and push it to a release branch in your formats are: best (default) or one of best|aac|flac|mp3|m4a|opus|vorbis|wav --audio-quality QUALITY Specify ffmpeg audio quality, insert a - value between 0 (better) and 9 (worse) for + value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if diff --git a/supportedsites.md b/supportedsites.md index 01c3f43a9..50fa7f9f1 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -48,6 +48,7 @@ - **Alura** - **AluraCourse** - **Amara** + - **AmazonStore** - **AMCNetworks** - **AmericasTestKitchen** - **AmericasTestKitchenSeason** @@ -184,7 +185,6 @@ - **CCTV**: 央视网 - **CDA** - **CeskaTelevize** - - **CeskaTelevizePorady** - **CGTN** - **channel9**: Channel 9 - **CharlieRose** @@ -366,6 +366,7 @@ - **Funk** - **Fusion** - **Fux** + - **Gab** - **GabTV** - **Gaia** - **GameInformer** @@ -449,9 +450,11 @@ - **Instagram** - **instagram:tag**: Instagram hashtag search - **instagram:user**: Instagram user profile + - **InstagramIOS**: IOS instagram:// URL - **Internazionale** - **InternetVideoArchive** - **IPrima** + - **IPrimaCNN** - **iqiyi**: 爱奇艺 - **Ir90Tv** - **ITTF** @@ -560,6 +563,7 @@ - **MediaKlikk** - **Medialaan** - **Mediaset** + - **MediasetShow** - **Mediasite** - **MediasiteCatalog** - **MediasiteNamedCatalog** @@ -592,6 +596,7 @@ - **mixcloud:user** - **MLB** - **MLBVideo** + - **MLSSoccer** - **Mnet** - **MNetTV** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net @@ -801,6 +806,7 @@ - **Pinterest** - **PinterestCollection** - **Pladform** + - **PlanetMarathi** - **Platzi** - **PlatziCourse** - **play.fm** @@ -817,7 +823,12 @@ - **podomatic** - **Pokemon** - **PokemonWatch** + - **PolsatGo** - **PolskieRadio** + - **polskieradio:kierowcow** + - **polskieradio:player** + - **polskieradio:podcast** + - **polskieradio:podcast:list** - **PolskieRadioCategory** - **Popcorntimes** - **PopcornTV** @@ -860,6 +871,8 @@ - **radiocanada:audiovideo** - **radiofrance** - **RadioJavan** + - **radiokapital** + - **radiokapital:show** - **radlive** - **radlive:channel** - **radlive:season** @@ -867,6 +880,8 @@ - **RaiPlay** - **RaiPlayLive** - **RaiPlayPlaylist** + - **RaiPlayRadio** + - **RaiPlayRadioPlaylist** - **RayWenderlich** - **RayWenderlichCourse** - **RBMARadio** @@ -894,6 +909,7 @@ - **RMCDecouverte** - **RockstarGames** - **RoosterTeeth** + - **RoosterTeethSeries** - **RottenTomatoes** - **Roxwel** - **Rozhlas** @@ -961,6 +977,7 @@ - **Sina** - **sky.it** - **sky:news** + - **sky:news:story** - **sky:sports** - **sky:sports:news** - **skyacademy.it** @@ -1079,6 +1096,8 @@ - **ThisAmericanLife** - **ThisAV** - **ThisOldHouse** + - **ThreeSpeak** + - **ThreeSpeakUser** - **TikTok** - **tiktok:user** - **tinypic**: tinypic.com videos @@ -1142,6 +1161,7 @@ - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** + - **tvp:stream** - **TVPlayer** - **TVPlayHome** - **Tweakers** @@ -1296,6 +1316,8 @@ - **WistiaPlaylist** - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** + - **wppilot** + - **wppilot:channels** - **WSJ**: Wall Street Journal - **WSJArticle** - **WWE** From 2e9a445bc34e79182f900909d727ba87f8487522 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 01:14:33 +0000 Subject: [PATCH 0213/2552] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 67145d8b2..8200bdeb4 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.22 (exe) + [debug] yt-dlp version 2021.11.10 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.22) + yt-dlp is up to date (2021.11.10) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 30cebec91..8736184a3 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.22 (exe) + [debug] yt-dlp version 2021.11.10 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.22) + yt-dlp is up to date (2021.11.10) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 59578b712..a8576e21c 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 445945df4..56b233ce7 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.22 (exe) + [debug] yt-dlp version 2021.11.10 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.22) + yt-dlp is up to date (2021.11.10) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 134416f4e..0937f09ce 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index e7203be6b..197e7389c 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.10.22' +__version__ = '2021.11.10' From 7144b697fc20d6615690e5ec63e6c134ddb7aa5e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 06:58:42 +0530 Subject: [PATCH 0214/2552] Release 2021.11.10.1 :ci skip all --- .github/workflows/build.yml | 11 ++++++----- Changelog.md | 4 ++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0fff6cae3..f75b11700 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -146,6 +146,7 @@ jobs: build_macos: runs-on: macos-11 needs: build_unix + if: False outputs: sha256_macos: ${{ steps.sha256_macos.outputs.sha256_macos }} sha512_macos: ${{ steps.sha512_macos.outputs.sha512_macos }} @@ -344,7 +345,7 @@ jobs: finish: runs-on: ubuntu-latest - needs: [build_unix, build_windows, build_windows32, build_macos] + needs: [build_unix, build_windows, build_windows32] steps: - name: Make SHA2-256SUMS file @@ -364,8 +365,8 @@ jobs: echo "${{ env.SHA256_PY2EXE }} yt-dlp_min.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-256SUMS - echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS - echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS + # echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS + # echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS - name: Upload 256SUMS file id: upload-sums uses: actions/upload-release-asset@v1 @@ -393,8 +394,8 @@ jobs: echo "${{ env.SHA512_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-512SUMS echo "${{ env.SHA512_PY2EXE }} yt-dlp_min.exe" >> SHA2-512SUMS echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS - echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS - echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS + # echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS + # echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS - name: Upload 512SUMS file id: upload-512sums uses: actions/upload-release-asset@v1 diff --git a/Changelog.md b/Changelog.md index 6124d6bd0..5ac2aa615 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,10 @@ --> +### 2021.11.10.1 + +* Temporarily disable MacOS Build + ### 2021.11.10 * [youtube] **Fix throttling by decrypting n-sig** From 9ebf3c6ab97c29b2d5872122e532bc98b93ad8b3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 01:47:10 +0000 Subject: [PATCH 0215/2552] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 8200bdeb4..27e07fb18 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.11.10 (exe) + [debug] yt-dlp version 2021.11.10.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.11.10) + yt-dlp is up to date (2021.11.10.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 8736184a3..b27418544 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.11.10 (exe) + [debug] yt-dlp version 2021.11.10.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.11.10) + yt-dlp is up to date (2021.11.10.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index a8576e21c..9df0902f4 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 56b233ce7..14cc17ac9 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.11.10 (exe) + [debug] yt-dlp version 2021.11.10.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.11.10) + yt-dlp is up to date (2021.11.10.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 0937f09ce..ae0c277b3 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 197e7389c..5290afa2d 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.11.10' +__version__ = '2021.11.10.1' From b47d236d724f7a129c7ed0792fb847eb12e6f8a5 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Wed, 10 Nov 2021 15:28:38 +0000 Subject: [PATCH 0216/2552] [Tokentube] Fix description (#1578) Authored by: u-spec-png --- yt_dlp/extractor/tokentube.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/tokentube.py b/yt_dlp/extractor/tokentube.py index d6362117f..579623fed 100644 --- a/yt_dlp/extractor/tokentube.py +++ b/yt_dlp/extractor/tokentube.py @@ -6,7 +6,10 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, + get_element_by_class, parse_count, + remove_end, unified_strdate, js_to_json, OnDemandPagedList, @@ -35,7 +38,7 @@ class TokentubeIE(InfoExtractor): 'id': '3950239124', 'ext': 'mp4', 'title': 'Linux Ubuntu Studio perus käyttö', - 'description': 'md5:854ff1dc732ff708976de2880ea32050', + 'description': 'md5:46077d0daaba1974f2dc381257f9d64c', 'uploader': 'jyrilehtonen', 'upload_date': '20210825', }, @@ -45,7 +48,7 @@ class TokentubeIE(InfoExtractor): 'id': '3582463289', 'ext': 'mp4', 'title': 'Police for Freedom - toiminta aloitetaan Suomessa ❤️??', - 'description': 'md5:cd92e620d7f5fa162e8410d0fc9a08be', + 'description': 'md5:37ebf1cb44264e0bf23ed98b337ee63e', 'uploader': 'Voitontie', 'upload_date': '20210428', } @@ -90,7 +93,10 @@ class TokentubeIE(InfoExtractor): r']+>(.+?)
', webpage, 'uploader', fatal=False) - description = self._html_search_meta('description', webpage) + description = (clean_html(get_element_by_class('p-d-txt', webpage)) + or self._html_search_meta(('og:description', 'description', 'twitter:description'), webpage)) + + description = remove_end(description, 'Category') self._sort_formats(formats) From 013ae2e5038178420966fa7e029908b37ecda821 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Wed, 10 Nov 2021 14:37:05 -0500 Subject: [PATCH 0217/2552] [CBC Gem] Fix for shows that don't have all seasons (#1621) Closes #1594 Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 4fcf2a9c1..413053499 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -390,7 +390,8 @@ class CBCGemPlaylistIE(InfoExtractor): show = match.group('show') show_info = self._download_json(self._API_BASE + show, season_id) season = int(match.group('season')) - season_info = try_get(show_info, lambda x: x['seasons'][season - 1]) + + season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None) if season_info is None: raise ExtractorError(f'Couldn\'t find season {season} of {show}') From 44bcb8d1225c2fcfb9b1814282b74f0563ee26d1 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 18:33:37 +0530 Subject: [PATCH 0218/2552] Fix bug in parsing `--add-header` Closes #1614 --- yt_dlp/options.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 89a1a8637..89401910e 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -151,25 +151,25 @@ def parseOpts(overrideArguments=None): def _dict_from_options_callback( option, opt_str, value, parser, - allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None, multiple_keys=True): + allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None, multiple_keys=True, + process_key=str.lower): out_dict = getattr(parser.values, option.dest) if multiple_keys: allowed_keys = r'(%s)(,(%s))*' % (allowed_keys, allowed_keys) mobj = re.match(r'(?i)(?P%s)%s(?P.*)$' % (allowed_keys, delimiter), value) if mobj is not None: - keys = [k.strip() for k in mobj.group('keys').lower().split(',')] - val = mobj.group('val') + keys, val = mobj.group('keys').split(','), mobj.group('val') elif default_key is not None: keys, val = [default_key], value else: raise optparse.OptionValueError( 'wrong %s formatting; it should be %s, not "%s"' % (opt_str, option.metavar, value)) try: + keys = map(process_key, keys) if process_key else keys val = process(val) if process else val except Exception as err: - raise optparse.OptionValueError( - 'wrong %s formatting; %s' % (opt_str, err)) + raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}') for key in keys: out_dict[key] = val @@ -792,7 +792,7 @@ def parseOpts(overrideArguments=None): '--add-header', metavar='FIELD:VALUE', dest='headers', default={}, type='str', action='callback', callback=_dict_from_options_callback, - callback_kwargs={'multiple_keys': False}, + callback_kwargs={'multiple_keys': False, 'process_key': None}, help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times', ) workarounds.add_option( From 093a17107ea5e375ba606ed1c31d1c259f93e0df Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 21:41:41 +0530 Subject: [PATCH 0219/2552] Allow using a custom format selector through API Closes #1619, #1464 --- README.md | 51 ++++++++++++++++++++++++++++++++++++++------- yt_dlp/YoutubeDL.py | 13 +++++++++--- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 24975ad6f..7a4ec55bb 100644 --- a/README.md +++ b/README.md @@ -1600,14 +1600,14 @@ From a Python program, you can embed yt-dlp in a more powerful fashion, like thi ```python from yt_dlp import YoutubeDL -ydl_opts = {} +ydl_opts = {'format': 'bestaudio'} with YoutubeDL(ydl_opts) as ydl: ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L154-L452). -Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), converts the video to an mp3 file, implements a custom postprocessor and prints the final info_dict as json: +Here's a more complete example demonstrating various functionality: ```python import json @@ -1633,23 +1633,56 @@ class MyLogger: print(msg) +# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor class MyCustomPP(yt_dlp.postprocessor.PostProcessor): + # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run def run(self, info): self.to_screen('Doing stuff') return [], info +# ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL def my_hook(d): if d['status'] == 'finished': print('Done downloading, now converting ...') +def format_selector(ctx): + """ Select the best video and the best audio that won't result in an mkv. + This is just an example and does not handle all cases """ + + # formats are already sorted worst to best + formats = ctx.get('formats')[::-1] + + # acodec='none' means there is no audio + best_video = next(f for f in formats + if f['vcodec'] != 'none' and f['acodec'] == 'none') + + # find compatible audio extension + audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] + # vcodec='none' means there is no video + best_audio = next(f for f in formats if ( + f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) + + yield { + # These are the minimum required fields for a merged format + 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', + 'ext': best_video['ext'], + 'requested_formats': [best_video, best_audio], + # Must be + seperated list of protocols + 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' + } + + +# ℹ️ See docstring of yt_dlp.YoutubeDL for a description of the options ydl_opts = { - 'format': 'bestaudio/best', + 'format': format_selector, 'postprocessors': [{ - 'key': 'FFmpegExtractAudio', - 'preferredcodec': 'mp3', - 'preferredquality': '192', + # Embed metadata in video using ffmpeg. + # ℹ️ See yt_dlp.postprocessor.FFmpegMetadataPP for the arguments it accepts + 'key': 'FFmpegMetadata', + 'add_chapters': True, + 'add_metadata': True, }], 'logger': MyLogger(), 'progress_hooks': [my_hook], @@ -1659,14 +1692,16 @@ ydl_opts = { # Add custom headers yt_dlp.utils.std_headers.update({'Referer': 'https://www.google.com'}) +# ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions. +# Eg: "ydl.download", "ydl.download_with_info_file" with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.add_post_processor(MyCustomPP()) info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') + + # ℹ️ ydl.sanitize_info makes the info json-serializable print(json.dumps(ydl.sanitize_info(info))) ``` -See the public functions in [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py) for other available functions. Eg: `ydl.download`, `ydl.download_with_info_file` - **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2439fc82b..5d6b1d5b2 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -211,6 +211,9 @@ class YoutubeDL(object): simulate: Do not download the video files. If unset (or None), simulate only if listsubtitles, listformats or list_thumbnails is used format: Video format code. see "FORMAT SELECTION" for more details. + You can also pass a function. The function takes 'ctx' as + argument and returns the formats to download. + See "build_format_selector" for an implementation allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually @@ -613,6 +616,7 @@ class YoutubeDL(object): # Creating format selector here allows us to catch syntax errors before the extraction self.format_selector = ( None if self.params.get('format') is None + else self.params['format'] if callable(self.params['format']) else self.build_format_selector(self.params['format'])) self._setup_opener() @@ -1927,9 +1931,9 @@ class YoutubeDL(object): 'format_id': '+'.join(filtered('format_id')), 'ext': output_ext, 'protocol': '+'.join(map(determine_protocol, formats_info)), - 'language': '+'.join(orderedSet(filtered('language'))), - 'format_note': '+'.join(orderedSet(filtered('format_note'))), - 'filesize_approx': sum(filtered('filesize', 'filesize_approx')), + 'language': '+'.join(orderedSet(filtered('language'))) or None, + 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None, + 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None, 'tbr': sum(filtered('tbr', 'vbr', 'abr')), } @@ -2357,6 +2361,9 @@ class YoutubeDL(object): info_dict, _ = self.pre_process(info_dict) + # The pre-processors may have modified the formats + formats = info_dict.get('formats', [info_dict]) + if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) if self.params.get('listformats'): From e08a85d86595705126d1304eafd3829e6f3811d0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 11 Nov 2021 08:00:43 +0530 Subject: [PATCH 0220/2552] Fix writing playlist infojson with `--no-clean-infojson` --- yt_dlp/YoutubeDL.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5d6b1d5b2..4699e58b1 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1583,10 +1583,11 @@ class YoutubeDL(object): if entry is not None] n_entries = len(entries) - if not playlistitems and (playliststart or playlistend): + if not playlistitems and (playliststart != 1 or playlistend): playlistitems = list(range(playliststart, playliststart + n_entries)) ie_result['requested_entries'] = playlistitems + _infojson_written = False if not self.params.get('simulate') and self.params.get('allow_playlist_files', True): ie_copy = { 'playlist': playlist, @@ -1599,8 +1600,9 @@ class YoutubeDL(object): } ie_copy.update(dict(ie_result)) - if self._write_info_json('playlist', ie_result, - self.prepare_filename(ie_copy, 'pl_infojson')) is None: + _infojson_written = self._write_info_json( + 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) + if _infojson_written is None: return if self._write_description('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_description')) is None: @@ -1656,6 +1658,12 @@ class YoutubeDL(object): # TODO: skip failed (empty) entries? playlist_results.append(entry_result) ie_result['entries'] = playlist_results + + # Write the updated info to json + if _infojson_written and self._write_info_json( + 'updated playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: + return self.to_screen('[download] Finished downloading playlist: %s' % playlist) return ie_result @@ -3472,8 +3480,10 @@ class YoutubeDL(object): encoding = preferredencoding() return encoding - def _write_info_json(self, label, ie_result, infofn): + def _write_info_json(self, label, ie_result, infofn, overwrite=None): ''' Write infojson and returns True = written, False = skip, None = error ''' + if overwrite is None: + overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): return False elif not infofn: @@ -3481,7 +3491,7 @@ class YoutubeDL(object): return False elif not self._ensure_dir_exists(infofn): return None - elif not self.params.get('overwrites', True) and os.path.exists(infofn): + elif not overwrite and os.path.exists(infofn): self.to_screen(f'[info] {label.title()} metadata is already present') else: self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') From bf5f605e7674c96d752aabb102cf627f5d7258ae Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 11 Nov 2021 08:44:54 +0530 Subject: [PATCH 0221/2552] bugfix for e08a85d86595705126d1304eafd3829e6f3811d0 --- yt_dlp/YoutubeDL.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4699e58b1..1b3873254 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1506,9 +1506,9 @@ class YoutubeDL(object): raise EntryNotInPlaylist('There are no entries') incomplete_entries = bool(ie_result.get('requested_entries')) if incomplete_entries: - def fill_missing_entries(entries, indexes): - ret = [None] * max(*indexes) - for i, entry in zip(indexes, entries): + def fill_missing_entries(entries, indices): + ret = [None] * max(indices) + for i, entry in zip(indices, entries): ret[i - 1] = entry return ret ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries']) @@ -2991,7 +2991,8 @@ class YoutubeDL(object): try: self.__download_wrapper(self.process_ie_result)(info, download=True) except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e: - self.to_stderr('\r') + if not isinstance(e, EntryNotInPlaylist): + self.to_stderr('\r') webpage_url = info.get('webpage_url') if webpage_url is not None: self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') From c1dc0ee56e0d29cefe6948621d253385fff3e20f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 12 Nov 2021 03:12:53 +0530 Subject: [PATCH 0222/2552] [NovaEmbed] Fix extractor Closes #1570 --- yt_dlp/extractor/nova.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index 3acb88121..0007b6b12 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, js_to_json, qualities, + traverse_obj, unified_strdate, url_or_none, ) @@ -17,30 +18,44 @@ from ..utils import ( class NovaEmbedIE(InfoExtractor): _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1', - 'md5': 'ee009bafcc794541570edd44b71cbea3', 'info_dict': { 'id': '8o0n0r', - 'ext': 'mp4', 'title': '2180. díl', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 2578, }, - } + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'expected_warnings': ['DRM protected', 'Requested format is not available'], + }, { + 'url': 'https://media.cms.nova.cz/embed/KybpWYvcgOa', + 'info_dict': { + 'id': 'KybpWYvcgOa', + 'ext': 'mp4', + 'title': 'Borhyová oslavila 60? Soutěžící z pořadu odboural moderátora Ondřeje Sokola', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 114, + }, + 'params': {'skip_download': 'm3u8'}, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + has_drm = False duration = None formats = [] player = self._parse_json( self._search_regex( - r'Player\.init\s*\([^,]+,\s*(?:\w+\s*\?\s*{.+?}\s*:\s*)?({.+})\s*,\s*{.+?}\s*\)\s*;', - webpage, 'player', default='{}'), video_id, fatal=False) + r'Player\.init\s*\([^,]+,(?P\s*\w+\s*\?)?\s*(?P{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)', + webpage, 'player', default='{}', group='json'), video_id, fatal=False) if player: for format_id, format_list in player['tracks'].items(): if not isinstance(format_list, list): @@ -48,6 +63,10 @@ class NovaEmbedIE(InfoExtractor): for format_dict in format_list: if not isinstance(format_dict, dict): continue + if (not self.get_param('allow_unplayable_formats') + and traverse_obj(format_dict, ('drm', 'keySystem'))): + has_drm = True + continue format_url = url_or_none(format_dict.get('src')) format_type = format_dict.get('type') ext = determine_ext(format_url) @@ -104,6 +123,8 @@ class NovaEmbedIE(InfoExtractor): f['format_id'] = f_id formats.append(f) + if not formats and has_drm: + self.report_drm(video_id) self._sort_formats(formats) title = self._og_search_title( From 48e931066091fba7af1c447787685bbf7c889a25 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 12 Nov 2021 03:59:32 +0530 Subject: [PATCH 0223/2552] [nexx] Better error message for unsupported format Related: #1637 --- yt_dlp/extractor/nexx.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index a30108483..8aceebd49 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -385,8 +385,7 @@ class NexxIE(InfoExtractor): elif cdn == 'free': formats = self._extract_free_formats(video, video_id) else: - # TODO: reverse more cdns - assert False + self.raise_no_formats(f'{cdn} formats are currently not supported', video_id) self._sort_formats(formats) From df03de2c02192e43e5b51c8708619179a268b4cf Mon Sep 17 00:00:00 2001 From: MinePlayersPE Date: Fri, 12 Nov 2021 20:46:19 +0700 Subject: [PATCH 0224/2552] [RoosterTeethSeries] Fix for multiple pages (#1642) Authored by: MinePlayersPE --- yt_dlp/extractor/roosterteeth.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py index be796804c..18672b2e3 100644 --- a/yt_dlp/extractor/roosterteeth.py +++ b/yt_dlp/extractor/roosterteeth.py @@ -12,6 +12,7 @@ from ..utils import ( url_or_none, urlencode_postdata, urljoin, + update_url_query, ) @@ -182,6 +183,13 @@ class RoosterTeethSeriesIE(RoosterTeethBaseIE): 'id': 'role-initiative', 'title': 'Role Initiative', } + }, { + 'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9', + 'playlist_mincount': 50, + 'info_dict': { + 'id': 'let-s-play-minecraft-9', + 'title': 'Let\'s Play Minecraft - Season 9', + } }] def _entries(self, series_id, season_number): @@ -192,7 +200,7 @@ class RoosterTeethSeriesIE(RoosterTeethBaseIE): idx = traverse_obj(data, ('attributes', 'number')) if season_number and idx != season_number: continue - season_url = urljoin(self._API_BASE, data['links']['episodes']) + season_url = update_url_query(urljoin(self._API_BASE, data['links']['episodes']), {'per_page': 1000}) season = self._download_json(season_url, display_id, f'Downloading season {idx} JSON metadata')['data'] for episode in season: yield self.url_result( From 92775d8a40728fe045af000755f1c3eeffb2089d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 15:07:48 +0530 Subject: [PATCH 0225/2552] [CuriosityStream] Fix series Bug indroduced in ed807c18376ecb61c2219b506040bc3e9464bde9 --- yt_dlp/extractor/curiositystream.py | 56 +++++++++++++++++------------ yt_dlp/extractor/extractors.py | 3 +- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 41c0f845a..628c83631 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -44,7 +44,7 @@ class CuriosityStreamBaseIE(InfoExtractor): 'password': password, })) self._handle_errors(result) - self._auth_token = result['message']['auth_token'] + CuriosityStreamBaseIE._auth_token = result['message']['auth_token'] class CuriosityStreamIE(CuriosityStreamBaseIE): @@ -142,9 +142,26 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): } -class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): - IE_NAME = 'curiositystream:collection' - _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P\d+)' +class CuriosityStreamCollectionBaseIE(CuriosityStreamBaseIE): + + def _real_extract(self, url): + collection_id = self._match_id(url) + collection = self._call_api(collection_id, collection_id) + entries = [] + for media in collection.get('media', []): + media_id = compat_str(media.get('id')) + media_type, ie = ('series', CuriosityStreamSeriesIE) if media.get('is_collection') else ('video', CuriosityStreamIE) + entries.append(self.url_result( + 'https://curiositystream.com/%s/%s' % (media_type, media_id), + ie=ie.ie_key(), video_id=media_id)) + return self.playlist_result( + entries, collection_id, + collection.get('title'), collection.get('description')) + + +class CuriosityStreamCollectionsIE(CuriosityStreamCollectionBaseIE): + IE_NAME = 'curiositystream:collections' + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/collections/(?P\d+)' _API_BASE_URL = 'https://api.curiositystream.com/v2/collections/' _TESTS = [{ 'url': 'https://curiositystream.com/collections/86', @@ -155,7 +172,17 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): }, 'playlist_mincount': 7, }, { - 'url': 'https://app.curiositystream.com/collection/2', + 'url': 'https://curiositystream.com/collections/36', + 'only_matching': True, + }] + + +class CuriosityStreamSeriesIE(CuriosityStreamCollectionBaseIE): + IE_NAME = 'curiositystream:series' + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:series|collection)/(?P\d+)' + _API_BASE_URL = 'https://api.curiositystream.com/v2/series/' + _TESTS = [{ + 'url': 'https://curiositystream.com/series/2', 'info_dict': { 'id': '2', 'title': 'Curious Minds: The Internet', @@ -163,23 +190,6 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): }, 'playlist_mincount': 16, }, { - 'url': 'https://curiositystream.com/series/2', - 'only_matching': True, - }, { - 'url': 'https://curiositystream.com/collections/36', + 'url': 'https://curiositystream.com/collection/2', 'only_matching': True, }] - - def _real_extract(self, url): - collection_id = self._match_id(url) - collection = self._call_api(collection_id, collection_id) - entries = [] - for media in collection.get('media', []): - media_id = compat_str(media.get('id')) - media_type, ie = ('series', CuriosityStreamCollectionIE) if media.get('is_collection') else ('video', CuriosityStreamIE) - entries.append(self.url_result( - 'https://curiositystream.com/%s/%s' % (media_type, media_id), - ie=ie.ie_key(), video_id=media_id)) - return self.playlist_result( - entries, collection_id, - collection.get('title'), collection.get('description')) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4f9de71e2..2eee2a864 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -307,7 +307,8 @@ from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( CuriosityStreamIE, - CuriosityStreamCollectionIE, + CuriosityStreamCollectionsIE, + CuriosityStreamSeriesIE, ) from .cwtv import CWTVIE from .dailymail import DailyMailIE From 39c04074e7e108bc6e36f3a34ef08a163663144a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 15:11:33 +0530 Subject: [PATCH 0226/2552] [ExtractAudio] Fix conversion to `wav` Closes #1645 --- yt_dlp/postprocessor/ffmpeg.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 46e87baeb..b2f28d658 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -403,10 +403,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): 'aac': (0.1, 4), 'vorbis': (0, 10), 'libfdk_aac': (1, 5), - 'opus': None, # doesn't support -q:a - 'wav': None, - 'flac': None, - }[codec] + }.get(codec) if not limits: return [] From e339d25a0d0d5de7e237e6ff8c7676aaa2cbb8a8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 15:11:59 +0530 Subject: [PATCH 0227/2552] [youtube] Minor improvement to format sorting --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7bcd6e7dc..3ae0f5a27 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2563,7 +2563,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f['quality'] = next(( q(qdict[val]) - for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)) + for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities)) if val in qdict), -1) return True From 7c7f7161fc0d778cd74d8b89162ba9df3d4e5da8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 17:30:33 +0530 Subject: [PATCH 0228/2552] Fix `--load-info-json` of playlists with failed entries --- yt_dlp/YoutubeDL.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1b3873254..70106db7e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1504,10 +1504,12 @@ class YoutubeDL(object): if 'entries' not in ie_result: raise EntryNotInPlaylist('There are no entries') + + MissingEntry = object() incomplete_entries = bool(ie_result.get('requested_entries')) if incomplete_entries: def fill_missing_entries(entries, indices): - ret = [None] * max(indices) + ret = [MissingEntry] * max(indices) for i, entry in zip(indices, entries): ret[i - 1] = entry return ret @@ -1561,7 +1563,7 @@ class YoutubeDL(object): entry = None try: entry = get_entry(i) - if entry is None: + if entry is MissingEntry: raise EntryNotInPlaylist() except (IndexError, EntryNotInPlaylist): if incomplete_entries: @@ -1655,7 +1657,6 @@ class YoutubeDL(object): self.report_error( 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures)) break - # TODO: skip failed (empty) entries? playlist_results.append(entry_result) ie_result['entries'] = playlist_results From 9ac24e235ea9ef91c711c35b0f793d17ea284a54 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 23:49:14 +0530 Subject: [PATCH 0229/2552] [curiositystream] Add more metadata Closes #1568 --- yt_dlp/extractor/common.py | 1 + yt_dlp/extractor/curiositystream.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5c6e59901..6f0650296 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -342,6 +342,7 @@ class InfoExtractor(object): series, programme or podcast: series: Title of the series or programme the video episode belongs to. + series_id: Id of the series or programme the video episode belongs to, as a unicode string. season: Title of the season the video episode belongs to. season_number: Number of the season the video episode belongs to, as an integer. season_id: Id of the season the video episode belongs to, as a unicode string. diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 628c83631..286a4c6af 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -50,19 +50,23 @@ class CuriosityStreamBaseIE(InfoExtractor): class CuriosityStreamIE(CuriosityStreamBaseIE): IE_NAME = 'curiositystream' _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://app.curiositystream.com/video/2', 'info_dict': { 'id': '2', 'ext': 'mp4', 'title': 'How Did You Develop The Internet?', 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', + 'channel': 'Curiosity Stream', + 'categories': ['Technology', 'Interview'], + 'average_rating': 96.79, + 'series_id': '2', }, 'params': { # m3u8 download 'skip_download': True, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -139,6 +143,10 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): 'duration': int_or_none(media.get('duration')), 'tags': media.get('tags'), 'subtitles': subtitles, + 'channel': media.get('producer'), + 'categories': [media.get('primary_category'), media.get('type')], + 'average_rating': media.get('rating_percentage'), + 'series_id': str(media.get('collection_id') or '') or None, } From d0e6121adf4f82b266c82d7e632f7fe79f05096c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 23:55:12 +0530 Subject: [PATCH 0230/2552] [curiositystream] Fix login Bug from 92775d8a40728fe045af000755f1c3eeffb2089d --- yt_dlp/extractor/curiositystream.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 286a4c6af..485b6031f 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -15,7 +15,6 @@ from ..utils import ( class CuriosityStreamBaseIE(InfoExtractor): _NETRC_MACHINE = 'curiositystream' _auth_token = None - _API_BASE_URL = 'https://api.curiositystream.com/v1/' def _handle_errors(self, result): error = result.get('error', {}).get('message') @@ -39,7 +38,8 @@ class CuriosityStreamBaseIE(InfoExtractor): if email is None: return result = self._download_json( - self._API_BASE_URL + 'login', None, data=urlencode_postdata({ + 'https://api.curiositystream.com/v1/login', None, + note='Logging in', data=urlencode_postdata({ 'email': email, 'password': password, })) @@ -68,12 +68,14 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): }, }] + _API_BASE_URL = 'https://api.curiositystream.com/v1/media/' + def _real_extract(self, url): video_id = self._match_id(url) formats = [] for encoding_format in ('m3u8', 'mpd'): - media = self._call_api('media/' + video_id, video_id, query={ + media = self._call_api(video_id, video_id, query={ 'encodingsNew': 'true', 'encodingsFormat': encoding_format, }) From f279aaee8e246f510e56fe35b163520f35085338 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Nov 2021 01:25:47 +0530 Subject: [PATCH 0231/2552] Add compat-option embed-metadata --- README.md | 1 + yt_dlp/options.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7a4ec55bb..1612bda5a 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files * `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-infojson`. Use `--compat-options no-attach-info-json` to revert this +* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this * All *experiences* of a funimation episode are considered as a single video. This behavior breaks existing archives. Use `--compat-options seperate-video-versions` to extract information from only the default player diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 89401910e..209f199bd 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -278,7 +278,7 @@ def parseOpts(overrideArguments=None): 'allowed_values': { 'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', - 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', + 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', }, 'aliases': { 'youtube-dl': ['-multistreams', 'all'], diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index b2f28d658..d6734e8d9 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -721,6 +721,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor): add('season_number') add('episode_id', ('episode', 'episode_id')) add('episode_sort', 'episode_number') + if 'embed-metadata' in self.get_param('compat_opts', []): + add('comment', 'description') + metadata.pop('synopsis', None) for key, value in info.items(): if value is not None and key != meta_prefix and key.startswith(meta_prefix): From dac5df5a988a75ed12343e4ee8fcafbc76ae847d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Nov 2021 04:03:41 +0530 Subject: [PATCH 0232/2552] Add option `--embed-info-json` to embed info-json in mkv Closes #1644 --- yt_dlp/YoutubeDL.py | 8 +++--- yt_dlp/__init__.py | 10 +++++++- yt_dlp/options.py | 12 ++++++++- yt_dlp/postprocessor/ffmpeg.py | 47 +++++++++++++++++++++++++--------- 4 files changed, 60 insertions(+), 17 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 70106db7e..a102ecc32 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -431,7 +431,7 @@ class YoutubeDL(object): compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort - no-clean-infojson, no-playlist-metafiles, no-keep-subs. + no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', @@ -2654,6 +2654,8 @@ class YoutubeDL(object): infofn = self.prepare_filename(info_dict, 'infojson') _infojson_written = self._write_info_json('video', info_dict, infofn) if _infojson_written: + info_dict['infojson_filename'] = infofn + # For backward compatability, even though it was a private field info_dict['__infojson_filename'] = infofn elif _infojson_written is None: return @@ -3012,8 +3014,8 @@ class YoutubeDL(object): keep_keys = ['_type'] # Always keep this to facilitate load-info-json if remove_private_keys: remove_keys |= { - 'requested_formats', 'requested_subtitles', 'requested_entries', - 'filepath', 'entries', 'original_url', 'playlist_autonumber', + 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', + 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', } empty_values = (None, {}, [], set(), tuple()) reject = lambda k, v: k not in keep_keys and ( diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index d72e08b35..63b9b6e2f 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -290,6 +290,11 @@ def _real_main(argv=None): set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') set_default_compat('no-playlist-metafiles', 'allow_playlist_files') set_default_compat('no-clean-infojson', 'clean_infojson') + if 'no-attach-info-json' in compat_opts: + if opts.embed_infojson: + _unused_compat_opt('no-attach-info-json') + else: + opts.embed_infojson = False if 'format-sort' in compat_opts: opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) @@ -526,11 +531,14 @@ def _real_main(argv=None): # By default ffmpeg preserves metadata applicable for both # source and target containers. From this point the container won't change, # so metadata can be added here. - if opts.addmetadata or opts.addchapters: + if opts.addmetadata or opts.addchapters or opts.embed_infojson: + if opts.embed_infojson is None: + opts.embed_infojson = 'if_exists' postprocessors.append({ 'key': 'FFmpegMetadata', 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, + 'add_infojson': opts.embed_infojson, }) # Note: Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 209f199bd..0843d5ff7 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1287,7 +1287,9 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--embed-metadata', '--add-metadata', action='store_true', dest='addmetadata', default=False, - help='Embed metadata to the video file. Also adds chapters to file unless --no-add-chapters is used (Alias: --add-metadata)') + help=( + 'Embed metadata to the video file. Also embeds chapters/infojson if present ' + 'unless --no-embed-chapters/--no-embed-info-json are used (Alias: --add-metadata)')) postproc.add_option( '--no-embed-metadata', '--no-add-metadata', action='store_false', dest='addmetadata', @@ -1300,6 +1302,14 @@ def parseOpts(overrideArguments=None): '--no-embed-chapters', '--no-add-chapters', action='store_false', dest='addchapters', help='Do not add chapter markers (default) (Alias: --no-add-chapters)') + postproc.add_option( + '--embed-info-json', + action='store_true', dest='embed_infojson', default=None, + help='Embed the infojson as an attachment to mkv/mka video files') + postproc.add_option( + '--no-embed-info-json', + action='store_false', dest='embed_infojson', + help='Do not embed the infojson as an attachment to the video file') postproc.add_option( '--metadata-from-title', metavar='FORMAT', dest='metafromtitle', diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index d6734e8d9..eacee8ee9 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -28,6 +28,7 @@ from ..utils import ( shell_quote, traverse_obj, variadic, + write_json_file, ) @@ -636,10 +637,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): class FFmpegMetadataPP(FFmpegPostProcessor): - def __init__(self, downloader, add_metadata=True, add_chapters=True): + def __init__(self, downloader, add_metadata=True, add_chapters=True, add_infojson='if_exists'): FFmpegPostProcessor.__init__(self, downloader) self._add_metadata = add_metadata self._add_chapters = add_chapters + self._add_infojson = add_infojson @staticmethod def _options(target_ext): @@ -652,13 +654,23 @@ class FFmpegMetadataPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): filename, metadata_filename = info['filepath'], None - options = [] + files_to_delete, options = [], [] if self._add_chapters and info.get('chapters'): metadata_filename = replace_extension(filename, 'meta') options.extend(self._get_chapter_opts(info['chapters'], metadata_filename)) + files_to_delete.append(metadata_filename) if self._add_metadata: options.extend(self._get_metadata_opts(info)) + if self._add_infojson: + if info['ext'] in ('mkv', 'mka'): + infojson_filename = info.get('infojson_filename') + options.extend(self._get_infojson_opts(info, infojson_filename)) + if not infojson_filename: + files_to_delete.append(info.get('infojson_filename')) + elif self._add_infojson is True: + self.to_screen('The info-json can only be attached to mkv/mka files') + if not options: self.to_screen('There isn\'t any metadata to add') return [], info @@ -668,8 +680,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor): self.run_ffmpeg_multiple_files( (filename, metadata_filename), temp_filename, itertools.chain(self._options(info['ext']), *options)) - if metadata_filename: - os.remove(metadata_filename) + for file in filter(None, files_to_delete): + os.remove(file) # Don't obey --keep-files os.replace(temp_filename, filename) return [], info @@ -741,15 +753,26 @@ class FFmpegMetadataPP(FFmpegPostProcessor): yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang) stream_idx += stream_count - if ('no-attach-info-json' not in self.get_param('compat_opts', []) - and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')): - old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') - if old_stream is not None: - yield ('-map', '-0:%d' % old_stream) - new_stream -= 1 + def _get_infojson_opts(self, info, infofn): + if not infofn or not os.path.exists(infofn): + if self._add_infojson is not True: + return + infofn = infofn or '%s.temp' % ( + self._downloader.prepare_filename(info, 'infojson') + or replace_extension(self._downloader.prepare_filename(info), 'info.json', info['ext'])) + if not self._downloader._ensure_dir_exists(infofn): + return + self.write_debug(f'Writing info-json to: {infofn}') + write_json_file(self._downloader.sanitize_info(info, self.get_param('clean_infojson', True)), infofn) + info['infojson_filename'] = infofn + + old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') + if old_stream is not None: + yield ('-map', '-0:%d' % old_stream) + new_stream -= 1 - yield ('-attach', info['__infojson_filename'], - '-metadata:s:%d' % new_stream, 'mimetype=application/json') + yield ('-attach', infofn, + '-metadata:s:%d' % new_stream, 'mimetype=application/json') class FFmpegMergerPP(FFmpegPostProcessor): From 013b50b7949563e445936302d6e486bab7100018 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Nov 2021 04:50:11 +0530 Subject: [PATCH 0233/2552] Fix 'postprocessor_hooks` Closes #1650 --- yt_dlp/YoutubeDL.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a102ecc32..197ec11e6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -633,11 +633,14 @@ class YoutubeDL(object): pp = pp_class(self, **compat_kwargs(pp_def)) self.add_post_processor(pp, when=when) - for ph in self.params.get('post_hooks', []): - self.add_post_hook(ph) - - for ph in self.params.get('progress_hooks', []): - self.add_progress_hook(ph) + hooks = { + 'post_hooks': self.add_post_hook, + 'progress_hooks': self.add_progress_hook, + 'postprocessor_hooks': self.add_postprocessor_hook, + } + for opt, fn in hooks.items(): + for ph in self.params.get(opt, []): + fn(ph) register_socks_protocols() From d0d012d4e79cd1420e96ce5c3d509771110d3ea1 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 16 Nov 2021 14:22:01 +1300 Subject: [PATCH 0234/2552] [youtube] Add `default` player client (#1685) Authored-by: coletdjnz --- README.md | 2 +- yt_dlp/extractor/youtube.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1612bda5a..96f5d7ecb 100644 --- a/README.md +++ b/README.md @@ -1552,7 +1552,7 @@ The following extractors use this feature: #### youtube * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests -* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients +* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `include_live_dash`: Include live dash formats (These formats don't download properly) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 3ae0f5a27..203f4a92a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2339,18 +2339,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_requested_clients(self, url, smuggled_data): requested_clients = [] + default = ['android', 'web'] allowed_clients = sorted( [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'], key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): if client in allowed_clients: requested_clients.append(client) + elif client == 'default': + requested_clients.extend(default) elif client == 'all': requested_clients.extend(allowed_clients) else: self.report_warning(f'Skipping unsupported client {client}') if not requested_clients: - requested_clients = ['android', 'web'] + requested_clients = default if smuggled_data.get('is_music_url') or self.is_music_url(url): requested_clients.extend( From d8cf8d97a8dbc9602556de474af133b5ab0e0a29 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 16 Nov 2021 21:14:02 +0530 Subject: [PATCH 0235/2552] [utils] Fix `PagedList` --- yt_dlp/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index f07eef61f..a9e066257 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4179,7 +4179,9 @@ class PagedList: self._cache = {} def getpage(self, pagenum): - page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum)) + page_results = self._cache.get(pagenum) + if page_results is None: + page_results = list(self._pagefunc(pagenum)) if self._use_cache: self._cache[pagenum] = page_results return page_results @@ -4195,7 +4197,9 @@ class PagedList: if not isinstance(idx, int) or idx < 0: raise TypeError('indices must be non-negative integers') entries = self.getslice(idx, idx + 1) - return entries[0] if entries else None + if not entries: + raise IndexError() + return entries[0] class OnDemandPagedList(PagedList): From 720c309932ea6724223d0a6b7781a0e92a74262c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 17 Nov 2021 01:26:23 +0530 Subject: [PATCH 0236/2552] [youtube] Add storyboard formats Closes: #1553, https://github.com/ytdl-org/youtube-dl/issues/9868 Related: https://github.com/ytdl-org/youtube-dl/pull/14951 --- yt_dlp/extractor/youtube.py | 53 ++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 203f4a92a..41e7fce10 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -9,6 +9,7 @@ import datetime import hashlib import itertools import json +import math import os.path import random import re @@ -28,6 +29,7 @@ from ..compat import ( ) from ..jsinterp import JSInterpreter from ..utils import ( + bug_reports_message, bytes_to_intlist, clean_html, datetime_from_str, @@ -66,6 +68,10 @@ from ..utils import ( ) +def get_first(obj, keys, **kwargs): + return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) + + # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { @@ -2586,6 +2592,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) yield f + def _extract_storyboard(self, player_responses, duration): + spec = get_first( + player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1] + if not spec: + return + base_url = spec.pop() + L = len(spec) - 1 + for i, args in enumerate(spec): + args = args.split('#') + counts = list(map(int_or_none, args[:5])) + if len(args) != 8 or not all(counts): + self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}') + continue + width, height, frame_count, cols, rows = counts + N, sigh = args[6:] + + url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}' + fragment_count = frame_count / (cols * rows) + fragment_duration = duration / fragment_count + yield { + 'format_id': f'sb{i}', + 'format_note': 'storyboard', + 'ext': 'mhtml', + 'protocol': 'mhtml', + 'acodec': 'none', + 'vcodec': 'none', + 'url': url, + 'width': width, + 'height': height, + 'fragments': [{ + 'path': url.replace('$M', str(j)), + 'duration': min(fragment_duration, duration - (j * fragment_duration)), + } for j in range(math.ceil(fragment_count))], + } + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) @@ -2603,8 +2644,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._get_requested_clients(url, smuggled_data), video_id, webpage, master_ytcfg) - get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) - playability_statuses = traverse_obj( player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[]) @@ -2700,10 +2739,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if reason: self.raise_no_formats(reason, expected=True) - # Source is given priority since formats that throttle are given lower source_preference - # When throttling issue is fully fixed, remove this - self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto')) - keywords = get_first(video_details, 'keywords', expected_type=list) or [] if not keywords and webpage: keywords = [ @@ -2791,6 +2826,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not duration and live_endtime and live_starttime: duration = live_endtime - live_starttime + formats.extend(self._extract_storyboard(player_responses, duration)) + + # Source is given priority since formats that throttle are given lower source_preference + # When throttling issue is fully fixed, remove this + self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto')) + info = { 'id': video_id, 'title': self._live_title(video_title) if is_live else video_title, From 450bdf69bc080d882cb4db26cde8c2f9681b7e18 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:27:50 +0530 Subject: [PATCH 0237/2552] [OneFootball] Add extractor (#1613) Closes: #1598 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/onefootball.py | 51 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/onefootball.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2eee2a864..a60e27186 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1000,6 +1000,7 @@ from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE +from .onefootball import OneFootballIE from .onet import ( OnetIE, OnetChannelIE, diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py new file mode 100644 index 000000000..79501003d --- /dev/null +++ b/yt_dlp/extractor/onefootball.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class OneFootballIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P\d+)' + + _TESTS = [{ + 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334', + 'info_dict': { + 'id': '34012334', + 'ext': 'mp4', + 'title': 'Highlights: FC Zürich 3-3 FC Basel', + 'description': 'md5:33d9855cb790702c4fe42a513700aba8', + 'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34012334', + 'timestamp': 1635874604, + 'upload_date': '20211102' + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020', + 'info_dict': { + 'id': '34041020', + 'ext': 'mp4', + 'title': 'Klopp fumes at VAR decisions in West Ham defeat', + 'description': 'md5:9c50371095a01ad3f63311c73d8f51a5', + 'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34041020', + 'timestamp': 1636314103, + 'upload_date': '20211107' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + data_json = self._search_json_ld(webpage, id) + m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title'), + 'description': data_json.get('description'), + 'thumbnail': data_json.get('thumbnail'), + 'timestamp': data_json.get('timestamp'), + 'formats': formats, + 'subtitles': subtitles, + } From 266a1b5d52d4a48a966d0a0b6286ca2740482409 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:28:51 +0530 Subject: [PATCH 0238/2552] [ESPNCricInfo] Add extractor (#1652) Closes: #1635 Authored by: Ashish0804 --- yt_dlp/extractor/espn.py | 43 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 44 insertions(+) diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index d4a66c29f..dc50f3b8b 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -7,7 +7,9 @@ from .once import OnceIE from ..compat import compat_str from ..utils import ( determine_ext, + dict_get, int_or_none, + unified_strdate, unified_timestamp, ) @@ -236,3 +238,44 @@ class FiveThirtyEightIE(InfoExtractor): webpage, 'embed url') return self.url_result(embed_url, 'AbcNewsVideo') + + +class ESPNCricInfoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/video/[^#$&?/]+-(?P\d+)' + _TESTS = [{ + 'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135', + 'info_dict': { + 'id': '1289135', + 'ext': 'mp4', + 'title': 'Finch: Chasing comes with \'risks\' despite World Cup trend', + 'description': 'md5:ea32373303e25efbb146efdfc8a37829', + 'upload_date': '20211113', + 'duration': 96, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + data_json = self._download_json(f'https://hs-consumer-api.espncricinfo.com/v1/pages/video/video-details?videoId={id}', id)['video'] + formats, subtitles = [], {} + for item in data_json.get('playbacks') or []: + if item.get('type') == 'HLS' and item.get('url'): + m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(item['url'], id) + formats.extend(m3u8_frmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + elif item.get('type') == 'AUDIO' and item.get('url'): + formats.append({ + 'url': item['url'], + 'vcodec': 'none', + }) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title'), + 'description': data_json.get('summary'), + 'upload_date': unified_strdate(dict_get(data_json, ('publishedAt', 'recordedAt'))), + 'duration': data_json.get('duration'), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a60e27186..a3674d836 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -418,6 +418,7 @@ from .espn import ( ESPNIE, ESPNArticleIE, FiveThirtyEightIE, + ESPNCricInfoIE, ) from .esri import EsriVideoIE from .europa import EuropaIE From 9d63137eac4a5753dae775712599dc5c7adb0e8c Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:29:53 +0530 Subject: [PATCH 0239/2552] [CanalAlpha] Add extractor (#1655) Closes: #1528 Authored by: Ashish0804 --- yt_dlp/extractor/canalalpha.py | 98 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 99 insertions(+) create mode 100644 yt_dlp/extractor/canalalpha.py diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py new file mode 100644 index 000000000..7287677c1 --- /dev/null +++ b/yt_dlp/extractor/canalalpha.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + clean_html, + dict_get, + try_get, + unified_strdate, +) + + +class CanalAlphaIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P\d+)/?.*' + + _TESTS = [{ + 'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021', + 'info_dict': { + 'id': '24520', + 'ext': 'mp4', + 'title': 'Jeudi 28 octobre 2021', + 'description': 'md5:d30c6c3e53f8ad40d405379601973b30', + 'thumbnail': 'https://static.canalalpha.ch/poster/journal/journal_20211028.jpg', + 'upload_date': '20211028', + 'duration': 1125, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/le-journal/topic/24512/la-poste-fait-de-neuchatel-un-pole-cryptographique', + 'info_dict': { + 'id': '24512', + 'ext': 'mp4', + 'title': 'La Poste fait de Neuchâtel un pôle cryptographique', + 'description': 'md5:4ba63ae78a0974d1a53d6703b6e1dedf', + 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_39712.jpg', + 'upload_date': '20211028', + 'duration': 138, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/eureka/episode/24484/ces-innovations-qui-veulent-rendre-lagriculture-plus-durable', + 'info_dict': { + 'id': '24484', + 'ext': 'mp4', + 'title': 'Ces innovations qui veulent rendre l’agriculture plus durable', + 'description': 'md5:3de3f151180684621e85be7c10e4e613', + 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg', + 'upload_date': '20211026', + 'duration': 360, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/avec-le-temps/episode/23516/redonner-de-leclat-grace-au-polissage', + 'info_dict': { + 'id': '23516', + 'ext': 'mp4', + 'title': 'Redonner de l\'éclat grâce au polissage', + 'description': 'md5:0d8fbcda1a5a4d6f6daa3165402177e1', + 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_9990.png', + 'upload_date': '20210726', + 'duration': 360, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + data_json = self._parse_json(self._search_regex( + r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;', + webpage, 'data_json'), id)['1']['data']['data'] + manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {} + subtitles = {} + formats = [{ + 'url': video['$url'], + 'ext': 'mp4', + 'width': try_get(video, lambda x: x['res']['width'], expected_type=int), + 'height': try_get(video, lambda x: x['res']['height'], expected_type=int), + } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')] + if manifests.get('hls'): + m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], id) + formats.extend(m3u8_frmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + if manifests.get('dash'): + dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'], id) + formats.extend(dash_frmts) + subtitles = self._merge_subtitles(subtitles, dash_subs) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title').strip(), + 'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))), + 'thumbnail': data_json.get('poster'), + 'upload_date': unified_strdate(dict_get(data_json, ('webPublishAt', 'featuredAt', 'diffusionDate'))), + 'duration': try_get(data_json, lambda x: x['video']['duration'], expected_type=int), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a3674d836..2c0a885b9 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -193,6 +193,7 @@ from .camdemy import ( ) from .cammodels import CamModelsIE from .camwithher import CamWithHerIE +from .canalalpha import CanalAlphaIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .canvas import ( From 525d9e0c7d4e8e1ad121d75f14ae40e8ee023079 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:30:48 +0530 Subject: [PATCH 0240/2552] [HotStar] Set language field from tags (#1700) Authored by: Ashish0804 --- yt_dlp/extractor/hotstar.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 12e6c53d4..0bdf772a1 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -230,6 +230,11 @@ class HotStarIE(HotStarBaseIE): if tags and 'encryption:plain' not in tags: for f in current_formats: f['has_drm'] = True + if tags and 'language' in tags: + lang = re.search(r'language:(?P[a-z]+)', tags).group('lang') + for f in current_formats: + if not f.get('langauge'): + f['language'] = lang formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) if not formats and geo_restricted: From 11852843e738bfdb01e1c65d3466629dc9645813 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:43:39 +0530 Subject: [PATCH 0241/2552] [AmazonStoreIE] Fix regex to not match vdp urls (#1699) Closes: #1698 Authored by: Ashish0804 --- yt_dlp/extractor/amazon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 01d6f2a54..7c5d35f47 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -4,7 +4,7 @@ from ..utils import int_or_none class AmazonStoreIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/[^/]*/?(?:dp|gp/product)/(?P[^/&#$?]+)' + _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P[^/&#$?]+)' _TESTS = [{ 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', From 61be785a6700be8b9e064572ddfb6546b20cb8f9 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:20:45 +0000 Subject: [PATCH 0242/2552] [peer.tv] Add extractor (#1499) Closes #1388 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/peertv.py | 57 ++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 yt_dlp/extractor/peertv.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2c0a885b9..458e6e2c8 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1063,6 +1063,7 @@ from .peertube import ( PeerTubeIE, PeerTubePlaylistIE, ) +from .peertv import PeerTVIE from .peloton import ( PelotonIE, PelotonLiveIE diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py new file mode 100644 index 000000000..002d33a88 --- /dev/null +++ b/yt_dlp/extractor/peertv.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import js_to_json + + +class PeerTVIE(InfoExtractor): + IE_NAME = 'peer.tv' + _VALID_URL = r'https?://(?:www\.)?peer\.tv/(?:de|it|en)/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.peer.tv/de/841', + 'info_dict': { + 'id': '841', + 'ext': 'mp4', + 'title': 'Die Brunnenburg', + 'description': 'md5:4395f6142b090338340ab88a3aae24ed', + }, + }, { + 'url': 'https://www.peer.tv/it/404', + 'info_dict': { + 'id': '404', + 'ext': 'mp4', + 'title': 'Cascate di ghiaccio in Val Gardena', + 'description': 'md5:e8e5907f236171842674e8090e3577b8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_key = self._html_search_regex(r'player\.peer\.tv/js/([a-zA-Z0-9]+)', webpage, 'video key') + + js = self._download_webpage(f'https://player.peer.tv/js/{video_key}/', video_id, + headers={'Referer': 'https://www.peer.tv/'}, note='Downloading session id') + + session_id = self._search_regex(r'["\']session_id["\']:\s*["\']([a-zA-Z0-9]+)["\']', js, 'session id') + + player_webpage = self._download_webpage( + f'https://player.peer.tv/jsc/{video_key}/{session_id}?jsr=aHR0cHM6Ly93d3cucGVlci50di9kZS84NDE=&cs=UTF-8&mq=2&ua=0&webm=p&mp4=p&hls=1', + video_id, note='Downloading player webpage') + + m3u8_url = self._search_regex(r'["\']playlist_url["\']:\s*(["\'][^"\']+["\'])', player_webpage, 'm3u8 url') + m3u8_url = self._parse_json(m3u8_url, video_id, transform_source=js_to_json) + + formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._html_search_regex(r'

(.+?)

', webpage, 'title').replace('\xa0', ' '), + 'formats': formats, + 'description': self._html_search_meta(('og:description', 'description'), webpage), + 'thumbnail': self._html_search_meta(('og:image', 'image'), webpage) + } From 22a510ff447a5d0e4c023b810d434611521b777c Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Fri, 19 Nov 2021 06:43:22 +0900 Subject: [PATCH 0243/2552] [mixch] add support for mixch.tv (#1586) Authored by: nao20010128nao --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/mixch.py | 55 ++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 yt_dlp/extractor/mixch.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 458e6e2c8..200c59bbe 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -795,6 +795,7 @@ from .mirrativ import ( ) from .mit import TechTVMITIE, OCWMITIE from .mitele import MiTeleIE +from .mixch import MixchIE from .mixcloud import ( MixcloudIE, MixcloudUserIE, diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py new file mode 100644 index 000000000..a99ddd172 --- /dev/null +++ b/yt_dlp/extractor/mixch.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, +) + + +class MixchIE(InfoExtractor): + IE_NAME = 'mixch' + _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P\d+)' + + TESTS = [{ + 'url': 'https://mixch.tv/u/16236849/live', + 'skip': 'don\'t know if this live persists', + 'info_dict': { + 'id': '16236849', + 'title': '24配信シェア⭕️投票🙏💦', + 'comment_count': 13145, + 'view_count': 28348, + 'timestamp': 1636189377, + 'uploader': '🦥伊咲👶🏻#フレアワ', + 'uploader_id': '16236849', + } + }, { + 'url': 'https://mixch.tv/u/16137876/live', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(f'https://mixch.tv/u/{video_id}/live', video_id) + + initial_js_state = self._parse_json(self._search_regex( + r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id) + if not initial_js_state.get('liveInfo'): + raise ExtractorError('Livestream has ended.', expected=True) + + return { + 'id': video_id, + 'title': traverse_obj(initial_js_state, ('liveInfo', 'title')), + 'comment_count': traverse_obj(initial_js_state, ('liveInfo', 'comments')), + 'view_count': traverse_obj(initial_js_state, ('liveInfo', 'visitor')), + 'timestamp': traverse_obj(initial_js_state, ('liveInfo', 'created')), + 'uploader': traverse_obj(initial_js_state, ('broadcasterInfo', 'name')), + 'uploader_id': video_id, + 'formats': [{ + 'format_id': 'hls', + 'url': traverse_obj(initial_js_state, ('liveInfo', 'hls')) or 'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_%s.m3u8' % video_id, + 'ext': 'mp4', + 'protocol': 'm3u8', + }], + 'is_live': True, + } From 402cd603a40c2115413f914ebb4dd43d9bf2449a Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:57:40 +0000 Subject: [PATCH 0244/2552] [LinkedIn] Add extractor (#1597) Closes #1206 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/linkedin.py | 105 ++++++++++++++++++++++++--------- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 200c59bbe..106006671 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -704,6 +704,7 @@ from .line import ( LineLiveChannelIE, ) from .linkedin import ( + LinkedInIE, LinkedInLearningIE, LinkedInLearningCourseIE, ) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index c2d347efd..9255b3301 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -6,21 +6,56 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, + extract_attributes, ExtractorError, float_or_none, + get_element_by_class, int_or_none, srt_subtitles_timecode, + strip_or_none, + mimetype2ext, try_get, urlencode_postdata, urljoin, ) -class LinkedInLearningBaseIE(InfoExtractor): +class LinkedInBaseIE(InfoExtractor): _NETRC_MACHINE = 'linkedin' - _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' _logged_in = False + def _real_initialize(self): + if self._logged_in: + return + email, password = self._get_login_info() + if email is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + action_url = urljoin(self._LOGIN_URL, self._search_regex( + r']+action=(["\'])(?P.+?)\1', login_page, 'post url', + default='https://www.linkedin.com/uas/login-submit', group='url')) + data = self._hidden_inputs(login_page) + data.update({ + 'session_key': email, + 'session_password': password, + }) + login_submit_page = self._download_webpage( + action_url, None, 'Logging in', + data=urlencode_postdata(data)) + error = self._search_regex( + r']+class="error"[^>]*>\s*(.+?)\s*', + login_submit_page, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + LinkedInBaseIE._logged_in = True + + +class LinkedInLearningBaseIE(LinkedInBaseIE): + _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' + def _call_api(self, course_slug, fields, video_slug=None, resolution=None): query = { 'courseSlug': course_slug, @@ -52,32 +87,47 @@ class LinkedInLearningBaseIE(InfoExtractor): def _get_video_id(self, video_data, course_slug, video_slug): return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) - def _real_initialize(self): - if self._logged_in: - return - email, password = self._get_login_info() - if email is None: - return - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - action_url = urljoin(self._LOGIN_URL, self._search_regex( - r']+action=(["\'])(?P.+?)\1', login_page, 'post url', - default='https://www.linkedin.com/uas/login-submit', group='url')) - data = self._hidden_inputs(login_page) - data.update({ - 'session_key': email, - 'session_password': password, - }) - login_submit_page = self._download_webpage( - action_url, None, 'Logging in', - data=urlencode_postdata(data)) - error = self._search_regex( - r']+class="error"[^>]*>\s*(.+?)\s*', - login_submit_page, 'error', default=None) - if error: - raise ExtractorError(error, expected=True) - LinkedInLearningBaseIE._logged_in = True +class LinkedInIE(LinkedInBaseIE): + _VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/.+?(?P\d+)' + _TESTS = [{ + 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20', + 'info_dict': { + 'id': '6850898786781339649', + 'ext': 'mp4', + 'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing', + 'description': 'md5:be125430bab1c574f16aeb186a4d5b19', + 'creator': 'Mishal K.' + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'([^<]+)', webpage, 'title') + description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) + like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage)) + creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage))) + + sources = self._parse_json(extract_attributes(self._search_regex(r'(]+>)', webpage, 'video'))['data-sources'], video_id) + formats = [{ + 'url': source['src'], + 'ext': mimetype2ext(source.get('type')), + 'tbr': float_or_none(source.get('data-bitrate'), scale=1000), + } for source in sources] + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'like_count': like_count, + 'creator': creator, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': description, + } class LinkedInLearningIE(LinkedInLearningBaseIE): @@ -108,7 +158,6 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): def _real_extract(self, url): course_slug, video_slug = self._match_valid_url(url).groups() - video_data = None formats = [] for width, height in ((640, 360), (960, 540), (1280, 720)): video_data = self._call_api( From cfcaf64a4b10400964606804085eb975cfd2a401 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Fri, 19 Nov 2021 06:14:38 +0800 Subject: [PATCH 0245/2552] [rtrfm] Add extractor (#1628) Authored by: pabs3 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/rtrfm.py | 67 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 yt_dlp/extractor/rtrfm.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 106006671..89c61312d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1235,6 +1235,7 @@ from .rtl2 import ( RTL2YouSeriesIE, ) from .rtp import RTPIE +from .rtrfm import RTRFMIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE diff --git a/yt_dlp/extractor/rtrfm.py b/yt_dlp/extractor/rtrfm.py new file mode 100644 index 000000000..93d51e8ed --- /dev/null +++ b/yt_dlp/extractor/rtrfm.py @@ -0,0 +1,67 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RTRFMIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P[^/?\#&]+)' + _TESTS = [ + { + 'url': 'https://rtrfm.com.au/shows/breakfast/', + 'md5': '46168394d3a5ce237cf47e85d0745413', + 'info_dict': { + 'id': 'breakfast-2021-11-16', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + 'skip': 'ID and md5 changes daily', + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/', + 'md5': '396bedf1e40f96c62b30d4999202a790', + 'info_dict': { + 'id': 'breakfast-2021-11-11', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2021-11-11', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/', + 'md5': '594027f513ec36a24b15d65007a24dff', + 'info_dict': { + 'id': 'breakfast-2020-06-01', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2020-06-01', + 'description': r're:^Breakfast with Taylah ', + }, + 'skip': 'This audio has expired', + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + show, date, title = self._search_regex( + r'''\.playShow(?:From)?\(['"](?P[^'"]+)['"],\s*['"](?P[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P[^'"]+)['"]''', + webpage, 'details', group=('show', 'date', 'title')) + url = self._download_json( + 'https://restreams.rtrfm.com.au/rzz', + show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u'] + # This is the only indicator of an error until trying to download the URL and + # downloads of mp4 URLs always fail (403 for current episodes, 404 for missing). + if '.mp4' in url: + url = None + self.raise_no_formats('Expired or no episode on this date', expected=True) + return { + 'id': '%s-%s' % (show, date), + 'title': '%s %s' % (title, date), + 'series': title, + 'url': url, + 'release_date': date, + 'description': self._og_search_description(webpage), + } From 764f5de2f48a523394558b10006b97cd0b6c7acf Mon Sep 17 00:00:00 2001 From: Paul Wise <pabs3@bonedaddy.net> Date: Fri, 19 Nov 2021 06:15:41 +0800 Subject: [PATCH 0246/2552] [blogger] Add extractor (#1629) Authored by: pabs3 --- yt_dlp/extractor/blogger.py | 54 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/generic.py | 17 +++++++++++ 3 files changed, 72 insertions(+) create mode 100644 yt_dlp/extractor/blogger.py diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py new file mode 100644 index 000000000..dba131cb0 --- /dev/null +++ b/yt_dlp/extractor/blogger.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from ..utils import ( + mimetype2ext, + parse_duration, + parse_qs, + str_or_none, + traverse_obj, +) +from .common import InfoExtractor + + +class BloggerIE(InfoExtractor): + IE_NAME = 'blogger.com' + _VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)' + _VALID_EMBED = r'''<iframe[^>]+src=["']((?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']''' + _TESTS = [{ + 'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw', + 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac', + 'info_dict': { + 'id': 'BLOGGER-video-3c740e3a49197e16-796', + 'title': 'BLOGGER-video-3c740e3a49197e16-796', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*', + 'duration': 76.068, + } + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall(BloggerIE._VALID_EMBED, webpage) + + def _real_extract(self, url): + token_id = self._match_id(url) + webpage = self._download_webpage(url, token_id) + data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data') + data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id) + streams = data['streams'] + formats = [{ + 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))), + 'url': stream['play_url'], + 'format_id': str_or_none(stream.get('format_id')), + } for stream in streams] + + return { + 'id': data.get('iframe_id', token_id), + 'title': data.get('iframe_id', token_id), + 'formats': formats, + 'thumbnail': data.get('thumbnail'), + 'duration': parse_duration(traverse_obj(parse_qs(streams[0]['play_url']), ('dur', 0))), + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 89c61312d..75cb0b2ab 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -166,6 +166,7 @@ from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) +from .blogger import BloggerIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 3374c1c20..d6631e2f3 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -136,6 +136,7 @@ from .medialaan import MedialaanIE from .simplecast import SimplecastIE from .wimtv import WimTVIE from .tvp import TVPEmbedIE +from .blogger import BloggerIE class GenericIE(InfoExtractor): @@ -2173,6 +2174,17 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # blogger embed + 'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html', + 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac', + 'info_dict': { + 'id': 'BLOGGER-video-3c740e3a49197e16-796', + 'ext': 'mp4', + 'title': 'Blogger', + 'thumbnail': r're:^https?://.*', + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -3216,6 +3228,11 @@ class GenericIE(InfoExtractor): if onionstudios_url: return self.url_result(onionstudios_url) + # Look for Blogger embeds + blogger_urls = BloggerIE._extract_urls(webpage) + if blogger_urls: + return self.playlist_from_matches(blogger_urls, video_id, video_title, ie=BloggerIE.ie_key()) + # Look for ViewLift embeds viewlift_url = ViewLiftEmbedIE._extract_url(webpage) if viewlift_url: From c6118ca2ccf41663e14f353a6f7e6a306525e190 Mon Sep 17 00:00:00 2001 From: zulaport <70630440+zulaport@users.noreply.github.com> Date: Thu, 18 Nov 2021 14:45:13 -0800 Subject: [PATCH 0247/2552] [Stripchat] Add extractor (#1668) Authored by: zulaport --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/stripchat.py | 66 ++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 yt_dlp/extractor/stripchat.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 75cb0b2ab..6bad1f40c 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1407,6 +1407,7 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE +from .stripchat import StripchatIE from .stv import STVPlayerIE from .sunporno import SunPornoIE from .sverigesradio import ( diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py new file mode 100644 index 000000000..efd0afc75 --- /dev/null +++ b/yt_dlp/extractor/stripchat.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_str, +) +from ..utils import ( + ExtractorError, + lowercase_escape, + try_get, +) + + +class StripchatIE(InfoExtractor): + _VALID_URL = r'https?://stripchat\.com/(?P<id>[0-9A-Za-z-_]+)' + _TESTS = [{ + 'url': 'https://stripchat.com/feel_me', + 'info_dict': { + 'id': 'feel_me', + 'ext': 'mp4', + 'title': 're:^feel_me [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': str, + 'is_live': True, + 'age_limit': 18, + }, + 'skip': 'Room is offline', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'https://stripchat.com/%s/' % video_id, video_id, + headers=self.geo_verification_headers()) + + data = self._parse_json( + self._search_regex( + r'<script\b[^>]*>\s*window\.__PRELOADED_STATE__\s*=(?P<value>.*?)<\/script>', + webpage, 'data', default='{}', group='value'), + video_id, transform_source=lowercase_escape, fatal=False) + if not data: + raise ExtractorError('Unable to find configuration for stream.') + + if try_get(data, lambda x: x['viewCam']['show'], dict): + raise ExtractorError('Model is in private show', expected=True) + elif not try_get(data, lambda x: x['viewCam']['model']['isLive'], bool): + raise ExtractorError('Model is offline', expected=True) + + server = try_get(data, lambda x: x['viewCam']['viewServers']['flashphoner-hls'], compat_str) + host = try_get(data, lambda x: x['config']['data']['hlsStreamHost'], compat_str) + model_id = try_get(data, lambda x: x['viewCam']['model']['id'], int) + + formats = self._extract_m3u8_formats( + 'https://b-%s.%s/hls/%d/%d.m3u8' % (server, host, model_id, model_id), + video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._live_title(video_id), + 'description': self._og_search_description(webpage), + 'is_live': True, + 'formats': formats, + # Stripchat declares the RTA meta-tag, but in an non-standard format so _rta_search() can't be used + 'age_limit': 18, + } From e16fefd8699c56d7a565e933ed1f55112ad399b4 Mon Sep 17 00:00:00 2001 From: Joshua Lochner <admin@xenova.com> Date: Fri, 19 Nov 2021 00:48:48 +0200 Subject: [PATCH 0248/2552] [Reddit] Add support for 1080p videos (#1682) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29565 Authored by: xenova --- yt_dlp/extractor/extractors.py | 5 +-- yt_dlp/extractor/generic.py | 28 ++++++++++++ yt_dlp/extractor/reddit.py | 82 +++++++++++++++++----------------- 3 files changed, 71 insertions(+), 44 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6bad1f40c..d19c67243 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1206,10 +1206,7 @@ from .redbulltv import ( RedBullTVRrnContentIE, RedBullIE, ) -from .reddit import ( - RedditIE, - RedditRIE, -) +from .reddit import RedditIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d6631e2f3..9c7fa4a21 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2344,6 +2344,34 @@ class GenericIE(InfoExtractor): 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', } }, + { + # Reddit-hosted video that will redirect and be processed by RedditIE + # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ + 'url': 'https://v.redd.it/zv89llsvexdz', + 'md5': '87f5f02f6c1582654146f830f21f8662', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'timestamp': 1501941939.0, + 'title': 'That small heart attack.', + 'upload_date': '20170805', + 'uploader': 'Antw87' + } + }, + { + # 1080p Reddit-hosted video that will redirect and be processed by RedditIE + 'url': 'https://v.redd.it/33hgok7dfbz71/', + 'md5': '7a1d587940242c9bb3bd6eb320b39258', + 'info_dict': { + 'id': '33hgok7dfbz71', + 'ext': 'mp4', + 'title': "The game Didn't want me to Knife that Guy I guess", + 'uploader': 'paraf1ve', + 'timestamp': 1636788683.0, + 'upload_date': '20211113' + } + } + # ] def report_following_redirect(self, new_url): diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 3ea750aeb..a042a59cc 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -8,43 +8,11 @@ from ..utils import ( try_get, unescapeHTML, url_or_none, + traverse_obj ) class RedditIE(InfoExtractor): - _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)' - _TEST = { - # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ - 'url': 'https://v.redd.it/zv89llsvexdz', - 'md5': '0a070c53eba7ec4534d95a5a1259e253', - 'info_dict': { - 'id': 'zv89llsvexdz', - 'ext': 'mp4', - 'title': 'zv89llsvexdz', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - formats = self._extract_m3u8_formats( - 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id, - 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - - formats.extend(self._extract_mpd_formats( - 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, - mpd_id='dash', fatal=False)) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_id, - 'formats': formats, - } - - -class RedditRIE(InfoExtractor): _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', @@ -147,19 +115,53 @@ class RedditRIE(InfoExtractor): for resolution in resolutions: add_thumbnail(resolution) - return { - '_type': 'url_transparent', - 'url': video_url, + info = { 'title': data.get('title'), 'thumbnails': thumbnails, 'timestamp': float_or_none(data.get('created_utc')), 'uploader': data.get('author'), - 'duration': int_or_none(try_get( - data, - (lambda x: x['media']['reddit_video']['duration'], - lambda x: x['secure_media']['reddit_video']['duration']))), 'like_count': int_or_none(data.get('ups')), 'dislike_count': int_or_none(data.get('downs')), 'comment_count': int_or_none(data.get('num_comments')), 'age_limit': age_limit, } + + # Check if media is hosted on reddit: + reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False) + if reddit_video: + playlist_urls = [ + try_get(reddit_video, lambda x: unescapeHTML(x[y])) + for y in ('dash_url', 'hls_url') + ] + + # Update video_id + display_id = video_id + video_id = self._search_regex( + r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'], + 'video_id', default=display_id) + + dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' + hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' + + formats = self._extract_m3u8_formats( + hls_playlist_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + formats.extend(self._extract_mpd_formats( + dash_playlist_url, display_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + + return { + **info, + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'duration': int_or_none(reddit_video.get('duration')), + } + + # Not hosted on reddit, must continue extraction + return { + **info, + 'display_id': video_id, + '_type': 'url_transparent', + 'url': video_url, + } From 8863c8f09ee0bf36a83f428adca58b373d2c8358 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 18 Nov 2021 22:38:00 +0530 Subject: [PATCH 0249/2552] [soundcloud:search] Fix pagination --- yt_dlp/extractor/soundcloud.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 824528474..2bb449220 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -893,5 +893,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): break def _get_n_results(self, query, n): - tracks = self._get_collection('search/tracks', query, limit=n, q=query) - return self.playlist_result(tracks, query, query) + return self.playlist_result(itertools.islice( + self._get_collection('search/tracks', query, limit=n, q=query), + 0, None if n == float('inf') else n), query, query) From 467b6b838737c0907bbc331f96352dda3019afb7 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 05:20:13 +0530 Subject: [PATCH 0250/2552] [ExtractAudio] Support `alac` Closes #1707 --- yt_dlp/postprocessor/ffmpeg.py | 44 +++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index eacee8ee9..1bde170ce 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -53,6 +53,7 @@ ACODECS = { 'opus': 'libopus', 'vorbis': 'libvorbis', 'wav': None, + 'alac': None, } @@ -383,7 +384,7 @@ class FFmpegPostProcessor(PostProcessor): class FFmpegExtractAudioPP(FFmpegPostProcessor): COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') - SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav') + SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac') def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) @@ -399,10 +400,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): limits = { 'libmp3lame': (10, 0), + 'libvorbis': (0, 10), # FFmpeg's AAC encoder does not have an upper limit for the value of -q:a. # Experimentally, with values over 4, bitrate changes were minimal or non-existent 'aac': (0.1, 4), - 'vorbis': (0, 10), 'libfdk_aac': (1, 5), }.get(codec) if not limits: @@ -426,7 +427,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, information): - path = information['filepath'] + orig_path = path = information['filepath'] orig_ext = information['ext'] if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS: @@ -452,6 +453,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = ['-f', 'adts'] if filecodec == 'vorbis': extension = 'ogg' + elif filecodec == 'alac': + acodec = None + extension = 'm4a' + more_opts += ['-acodec', 'alac'] else: # MP3 otherwise. acodec = 'libmp3lame' @@ -466,42 +471,49 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = self._quality_args(acodec) if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] - if self._preferredcodec == 'm4a': + elif self._preferredcodec == 'm4a': more_opts += ['-bsf:a', 'aac_adtstoasc'] - if self._preferredcodec == 'vorbis': + elif self._preferredcodec == 'vorbis': extension = 'ogg' - if self._preferredcodec == 'wav': + elif self._preferredcodec == 'wav': extension = 'wav' more_opts += ['-f', 'wav'] + elif self._preferredcodec == 'alac': + extension = 'm4a' + more_opts += ['-acodec', 'alac'] prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups - new_path = prefix + sep + extension - - information['filepath'] = new_path - information['ext'] = extension + temp_path = new_path = prefix + sep + extension - # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. - if (new_path == path - or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): + if new_path == path: + orig_path = prepend_extension(path, 'orig') + temp_path = prepend_extension(path, 'temp') + if (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)) + and os.path.exists(encodeFilename(orig_path))): self.to_screen('Post-process file %s exists, skipping' % new_path) return [], information try: - self.to_screen('Destination: ' + new_path) - self.run_ffmpeg(path, new_path, acodec, more_opts) + self.to_screen(f'Destination: {new_path}') + self.run_ffmpeg(path, temp_path, acodec, more_opts) except AudioConversionError as e: raise PostProcessingError( 'audio conversion failed: ' + e.msg) except Exception: raise PostProcessingError('error running ' + self.basename) + os.replace(path, orig_path) + os.replace(temp_path, new_path) + information['filepath'] = new_path + information['ext'] = extension + # Try to update the date time for extracted audio file. if information.get('filetime') is not None: self.try_utime( new_path, time.time(), information['filetime'], errnote='Cannot update utime of audio file') - return [path], information + return [orig_path], information class FFmpegVideoConvertorPP(FFmpegPostProcessor): From 9222c38182604d0a9826291509e0719b45b3faac Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 05:36:28 +0530 Subject: [PATCH 0251/2552] [cleanup] Minor cleanup Closes #1696, Closes #1673 --- CONTRIBUTING.md | 2 +- Changelog.md | 2 +- README.md | 26 ++++++++++------ test/test_youtube_signature.py | 4 +++ yt_dlp/YoutubeDL.py | 7 ++--- yt_dlp/__init__.py | 56 ++++++++++++++-------------------- yt_dlp/extractor/francetv.py | 2 +- yt_dlp/extractor/funimation.py | 2 +- yt_dlp/extractor/linkedin.py | 2 +- yt_dlp/extractor/pbs.py | 2 +- yt_dlp/extractor/tenplay.py | 2 +- yt_dlp/extractor/youtube.py | 27 +++++++++++----- 12 files changed, 74 insertions(+), 60 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd22afed9..8a0178d94 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -209,7 +209,7 @@ After you have ensured this site is distributing its content legally, you can fo ``` 1. Add an import in [`yt_dlp/extractor/extractors.py`](yt_dlp/extractor/extractors.py). 1. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` -1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the purticular test is disabled from running. +1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): diff --git a/Changelog.md b/Changelog.md index 5ac2aa615..7bb8c7888 100644 --- a/Changelog.md +++ b/Changelog.md @@ -40,7 +40,7 @@ * [fragment] Fix progress display in fragmented downloads * [downloader/ffmpeg] Fix vtt download with ffmpeg * [ffmpeg] Detect presence of setts and libavformat version -* [ExtractAudio] Rescale --audio-quality correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) +* [ExtractAudio] Rescale `--audio-quality` correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) * [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal) * [FormatSort] `eac3` is better than `ac3` * [FormatSort] Fix some fields' defaults diff --git a/README.md b/README.md index 96f5d7ecb..1a5f84cc9 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **New playlist extractors**: bilibili categories, eroprofile albums, hotstar series, hungama albums, newgrounds user, niconico search/users, paramountplus series, patreon user, peertube playlist/channels, roosterteeth series, sonyliv series, tiktok user, trovo channels, voot series -* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo +* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo * **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN @@ -136,7 +136,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both * `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files -* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-infojson`. Use `--compat-options no-attach-info-json` to revert this +* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this * Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this @@ -196,7 +196,7 @@ python3 -m pip install --no-deps -U yt-dlp If you want to be on the cutting edge, you can also install the master branch with: ``` -python3 -m pip3 install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip +python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip ``` Note that on some systems, you may need to use `py` or `python` instead of `python3` @@ -793,7 +793,7 @@ You can also fork the project on github and push it to a release branch in your --audio-format FORMAT Specify audio format to convert the audio to when -x is used. Currently supported formats are: best (default) or one of - best|aac|flac|mp3|m4a|opus|vorbis|wav + best|aac|flac|mp3|m4a|opus|vorbis|wav|alac --audio-quality QUALITY Specify ffmpeg audio quality, insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K @@ -844,15 +844,20 @@ You can also fork the project on github and push it to a release branch in your --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) - --embed-metadata Embed metadata to the video file. Also adds - chapters to file unless --no-add-chapters - is used (Alias: --add-metadata) + --embed-metadata Embed metadata to the video file. Also + embeds chapters/infojson if present unless + --no-embed-chapters/--no-embed-info-json + are used (Alias: --add-metadata) --no-embed-metadata Do not add metadata to file (default) (Alias: --no-add-metadata) --embed-chapters Add chapter markers to the video file (Alias: --add-chapters) --no-embed-chapters Do not add chapter markers (default) (Alias: --no-add-chapters) + --embed-info-json Embed the infojson as an attachment to + mkv/mka video files + --no-embed-info-json Do not embed the infojson as an attachment + to the video file --parse-metadata FROM:TO Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details @@ -1210,11 +1215,14 @@ If you are using an output template inside a Windows batch file then you must es Note that on Windows you need to use double quotes instead of single. ```bash +$ yt-dlp --get-filename -o 'test video.%(ext)s' BaW_jenozKc +test video.webm # Literal name with correct extension + $ yt-dlp --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc -youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters +youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters $ yt-dlp --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames -youtube-dl_test_video_.mp4 # A simple file name +youtube-dl_test_video_.webm # Restricted file name # Download YouTube playlist videos in separate directory indexed by video order in a playlist $ yt-dlp -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 60d8eabf5..df4c36047 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -74,6 +74,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', ), + ( + 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', + 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', + ), ] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 197ec11e6..e078e62ef 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -528,7 +528,6 @@ class YoutubeDL(object): self.cache = Cache(self) windows_enable_vt_mode() - # FIXME: This will break if we ever print color to stdout self._allow_colors = { 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file), 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file), @@ -2012,10 +2011,10 @@ class YoutubeDL(object): # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector if format_spec == 'all': def selector_function(ctx): - yield from _check_formats(ctx['formats']) + yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'])) + formats = list(_check_formats(ctx['formats'][::-1])) if not formats: return merged_format = formats[-1] @@ -3163,7 +3162,7 @@ class YoutubeDL(object): return 'images' else: return default - return f'{res} images' if is_images else res + return f'img {res}' if is_images else res def _format_note(self, fdict): res = '' diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 63b9b6e2f..7960d3b03 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -378,8 +378,6 @@ def _real_main(argv=None): opts.sponsorblock_remove = set() sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove - if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: - opts.addchapters = True opts.remove_chapters = opts.remove_chapters or [] if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: @@ -400,40 +398,32 @@ def _real_main(argv=None): opts.remuxvideo = False if opts.allow_unplayable_formats: - if opts.extractaudio: - report_conflict('--allow-unplayable-formats', '--extract-audio') - opts.extractaudio = False - if opts.remuxvideo: - report_conflict('--allow-unplayable-formats', '--remux-video') - opts.remuxvideo = False - if opts.recodevideo: - report_conflict('--allow-unplayable-formats', '--recode-video') - opts.recodevideo = False - if opts.addmetadata: - report_conflict('--allow-unplayable-formats', '--add-metadata') - opts.addmetadata = False - if opts.embedsubtitles: - report_conflict('--allow-unplayable-formats', '--embed-subs') - opts.embedsubtitles = False - if opts.embedthumbnail: - report_conflict('--allow-unplayable-formats', '--embed-thumbnail') - opts.embedthumbnail = False - if opts.xattrs: - report_conflict('--allow-unplayable-formats', '--xattrs') - opts.xattrs = False - if opts.fixup and opts.fixup.lower() not in ('never', 'ignore'): - report_conflict('--allow-unplayable-formats', '--fixup') + def report_unplayable_conflict(opt_name, arg, default=False, allowed=None): + val = getattr(opts, opt_name) + if (not allowed and val) or not allowed(val): + report_conflict('--allow-unplayable-formats', arg) + setattr(opts, opt_name, default) + + report_unplayable_conflict('extractaudio', '--extract-audio') + report_unplayable_conflict('remuxvideo', '--remux-video') + report_unplayable_conflict('recodevideo', '--recode-video') + report_unplayable_conflict('addmetadata', '--embed-metadata') + report_unplayable_conflict('addchapters', '--embed-chapters') + report_unplayable_conflict('embed_infojson', '--embed-info-json') + opts.embed_infojson = False + report_unplayable_conflict('embedsubtitles', '--embed-subs') + report_unplayable_conflict('embedthumbnail', '--embed-thumbnail') + report_unplayable_conflict('xattrs', '--xattrs') + report_unplayable_conflict('fixup', '--fixup', default='never', allowed=lambda x: x in (None, 'never', 'ignore')) opts.fixup = 'never' - if opts.remove_chapters: - report_conflict('--allow-unplayable-formats', '--remove-chapters') - opts.remove_chapters = [] - if opts.sponsorblock_remove: - report_conflict('--allow-unplayable-formats', '--sponsorblock-remove') - opts.sponsorblock_remove = set() - if opts.sponskrub: - report_conflict('--allow-unplayable-formats', '--sponskrub') + report_unplayable_conflict('remove_chapters', '--remove-chapters', default=[]) + report_unplayable_conflict('sponsorblock_remove', '--sponsorblock-remove', default=set()) + report_unplayable_conflict('sponskrub', '--sponskrub', default=set()) opts.sponskrub = False + if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + opts.addchapters = True + # PostProcessors postprocessors = list(opts.add_postprocessors) if sponsorblock_query: diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 3bbab69e6..bc5ef4df9 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -185,7 +185,7 @@ class FranceTVIE(InfoExtractor): 'vcodec': 'none', 'ext': 'mhtml', 'protocol': 'mhtml', - 'url': 'about:dummy', + 'url': 'about:invalid', 'fragments': [{ 'path': sheet, # XXX: not entirely accurate; each spritesheet seems to be diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 42711083e..96dad2ca3 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -276,7 +276,7 @@ class FunimationIE(FunimationBaseIE): def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name): if isinstance(episode, str): webpage = self._download_webpage( - f'https://www.funimation.com/player/{experience_id}', display_id, + f'https://www.funimation.com/player/{experience_id}/', display_id, fatal=False, note=f'Downloading player webpage for {format_name}') episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 9255b3301..bd76ae166 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -109,7 +109,7 @@ class LinkedInIE(LinkedInBaseIE): description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage)) creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage))) - + sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id) formats = [{ 'url': source['src'], diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index 0eabf9bee..ffaa6bf92 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -193,7 +193,7 @@ class PBSIE(InfoExtractor): # Article with embedded player (or direct video) (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player - (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ + (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+) ) ''' % '|'.join(list(zip(*_STATIONS))[0]) diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index c810cfd0d..5b3222ecf 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -58,7 +58,7 @@ class TenPlayIE(InfoExtractor): 'email': username, 'password': password, })) - return "Bearer " + data['jwt']['accessToken'] + return 'Bearer ' + data['jwt']['accessToken'] def _real_extract(self, url): content_id = self._match_id(url) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 41e7fce10..1fbdcd98b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -508,9 +508,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): Extracts visitorData from an API response or ytcfg Appears to be used to track session state """ - return traverse_obj( - args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), - expected_type=compat_str, get_all=False) + return get_first( + args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), + expected_type=str) @property def is_authenticated(self): @@ -1674,7 +1674,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # shorts 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY', 'only_matching': True, - }, + }, { + 'note': 'Storyboards', + 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8', + 'info_dict': { + 'id': '5KLPxDtMqe8', + 'ext': 'mhtml', + 'format_id': 'sb0', + 'title': 'Your Brain is Plastic', + 'uploader_id': 'scishow', + 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc', + 'upload_date': '20140324', + 'uploader': 'SciShow', + }, 'params': {'format': 'mhtml', 'skip_download': True} + } ] @classmethod @@ -1920,9 +1933,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return sts def _mark_watched(self, video_id, player_responses): - playback_url = traverse_obj( - player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), - expected_type=url_or_none, get_all=False) + playback_url = get_first( + player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), + expected_type=url_or_none) if not playback_url: self.report_warning('Unable to mark watched') return From dd2a987d3f412dc61422ad13cf7b60920be8af6e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 06:30:25 +0530 Subject: [PATCH 0252/2552] [tests] Fix tests --- test/test_YoutubeDL.py | 4 ++-- test/test_all_urls.py | 1 - test/test_youtube_lists.py | 22 ++++++++++++---------- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/extractor/youtube.py | 3 --- 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 5a0dabeb6..63ef50e1a 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -137,7 +137,7 @@ class TestFormatSelection(unittest.TestCase): test('webm/mp4', '47') test('3gp/40/mp4', '35') test('example-with-dashes', 'example-with-dashes') - test('all', '35', 'example-with-dashes', '45', '47', '2') # Order doesn't actually matter for this + test('all', '2', '47', '45', 'example-with-dashes', '35') test('mergeall', '2+47+45+example-with-dashes+35', multi=True) def test_format_selection_audio(self): @@ -520,7 +520,7 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'format': 'all[width>=400][width<=600]'}) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] - self.assertEqual(downloaded_ids, ['B', 'C', 'D']) + self.assertEqual(downloaded_ids, ['D', 'C', 'B']) ydl = YDL({'format': 'best[height<40]'}) try: diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 68c1c68d3..2d89366d4 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -38,7 +38,6 @@ class TestAllURLsMatching(unittest.TestCase): assertTab('https://www.youtube.com/AsapSCIENCE') assertTab('https://www.youtube.com/embedded') assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') - assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index e831393e4..d9638658d 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -26,29 +26,31 @@ class TestYoutubeLists(unittest.TestCase): def test_youtube_playlist_noplaylist(self): dl = FakeYDL() dl.params['noplaylist'] = True - ie = YoutubePlaylistIE(dl) + ie = YoutubeTabIE(dl) result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertEqual(result['_type'], 'url') - self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg') + self.assertEqual(YoutubeIE.extract_id(result['url']), 'FXxLjLQi3Fg') def test_youtube_course(self): + print('Skipping: Course URLs no longer exists') + return dl = FakeYDL() ie = YoutubePlaylistIE(dl) # TODO find a > 100 (paginating?) videos course result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') entries = list(result['entries']) - self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs') + self.assertEqual(YoutubeIE.extract_id(entries[0]['url']), 'j9WZyLZCBzs') self.assertEqual(len(entries), 25) - self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') + self.assertEqual(YoutubeIE.extract_id(entries[-1]['url']), 'rYefUsYuEp0') def test_youtube_mix(self): dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') - entries = result['entries'] + ie = YoutubeTabIE(dl) + result = ie.extract('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8') + entries = list(result['entries']) self.assertTrue(len(entries) >= 50) original_video = entries[0] - self.assertEqual(original_video['id'], 'OQpdSVF_k_w') + self.assertEqual(original_video['id'], 'tyITL_exICo') def test_youtube_toptracks(self): print('Skipping: The playlist page gives error 500') @@ -68,10 +70,10 @@ class TestYoutubeLists(unittest.TestCase): entries = list(result['entries']) self.assertTrue(len(entries) == 1) video = entries[0] - self.assertEqual(video['_type'], 'url_transparent') + self.assertEqual(video['_type'], 'url') self.assertEqual(video['ie_key'], 'Youtube') self.assertEqual(video['id'], 'BaW_jenozKc') - self.assertEqual(video['url'], 'BaW_jenozKc') + self.assertEqual(video['url'], 'https://www.youtube.com/watch?v=BaW_jenozKc') self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐') self.assertEqual(video['duration'], 10) self.assertEqual(video['uploader'], 'Philipp Hagemeister') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e078e62ef..1f1b4ccd4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2014,7 +2014,7 @@ class YoutubeDL(object): yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'][::-1])) + formats = list(_check_formats(ctx['formats'])) if not formats: return merged_format = formats[-1] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1fbdcd98b..632129bc6 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3467,9 +3467,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'note': 'inline playlist with not always working continuations', 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8', - 'only_matching': True, }, { 'url': 'https://www.youtube.com/course', 'only_matching': True, From 6b993ca765753e0b04d65ec70cf787a2e9f94639 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Fri, 19 Nov 2021 02:49:51 +0100 Subject: [PATCH 0253/2552] [hls] Better FairPlay DRM detection (#1661) Authored by: nyuszika7h --- yt_dlp/downloader/hls.py | 9 +++++++++ yt_dlp/extractor/common.py | 8 ++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 61312c5ba..e932fd6ae 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -77,6 +77,15 @@ class HlsFD(FragmentFD): message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' 'Decryption will be performed natively, but will be extremely slow') if not can_download: + has_drm = re.search('|'.join([ + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + ]), s) + if has_drm and not self.params.get('allow_unplayable_formats'): + self.report_error( + 'This video is DRM protected; Try selecting another format with --format or ' + 'add --check-formats to automatically fallback to the next best format') + return False message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 6f0650296..a47364d07 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2035,10 +2035,10 @@ class InfoExtractor(object): video_id=None): formats, subtitles = [], {} - if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access - return formats, subtitles - - has_drm = re.search(r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', m3u8_doc) + has_drm = re.search('|'.join([ + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + ]), m3u8_doc) def format_url(url): return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url) From a04e005521ecf2eb0c4979e234ff0c4f23a3caa0 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi <nao20010128@gmail.com> Date: Fri, 19 Nov 2021 10:54:10 +0900 Subject: [PATCH 0254/2552] [AES] Add ECB mode (#1686) Needed for #1688 Authored by: nao20010128nao --- test/test_aes.py | 18 +++++++++++++++++- yt_dlp/aes.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/test/test_aes.py b/test/test_aes.py index 46db59e57..5c9273f8a 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -10,6 +10,8 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.aes import ( aes_decrypt, aes_encrypt, + aes_ecb_encrypt, + aes_ecb_decrypt, aes_cbc_decrypt, aes_cbc_decrypt_bytes, aes_cbc_encrypt, @@ -17,7 +19,8 @@ from yt_dlp.aes import ( aes_ctr_encrypt, aes_gcm_decrypt_and_verify, aes_gcm_decrypt_and_verify_bytes, - aes_decrypt_text + aes_decrypt_text, + BLOCK_SIZE_BYTES, ) from yt_dlp.compat import compat_pycrypto_AES from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes @@ -94,6 +97,19 @@ class TestAES(unittest.TestCase): decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) + def test_ecb_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES) + encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv)) + self.assertEqual( + encrypted, + b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + + def test_ecb_decrypt(self): + data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index 60cdeb74e..8503e3dfd 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -28,6 +28,48 @@ else: BLOCK_SIZE_BYTES = 16 +def aes_ecb_encrypt(data, key, iv=None): + """ + Encrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_encrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_ecb_decrypt(data, key, iv=None): + """ + Decrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_decrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + def aes_ctr_decrypt(data, key, iv): """ Decrypt with aes in counter mode From 7333296ff5386efcd13a9db780170350e1924389 Mon Sep 17 00:00:00 2001 From: Paper <37962225+mrpapersonic@users.noreply.github.com> Date: Fri, 19 Nov 2021 01:11:36 -0500 Subject: [PATCH 0255/2552] [VidLii] Add 720p support (#1681) Authored by: mrpapersonic --- yt_dlp/extractor/vidlii.py | 47 +++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index f4774256b..ce7487ec1 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -5,9 +5,11 @@ import re from .common import InfoExtractor from ..utils import ( + HEADRequest, float_or_none, get_element_by_id, int_or_none, + str_to_int, strip_or_none, unified_strdate, urljoin, @@ -35,6 +37,25 @@ class VidLiiIE(InfoExtractor): 'categories': ['News & Politics'], 'tags': ['Vidlii', 'Jan', 'Videogames'], } + }, { + 'url': 'https://www.vidlii.com/watch?v=zTAtaAgOLKt', + 'md5': '5778f7366aa4c569b77002f8bf6b614f', + 'info_dict': { + 'id': 'zTAtaAgOLKt', + 'ext': 'mp4', + 'title': 'FULPTUBE SUCKS.', + 'description': 'md5:087b2ca355d4c8f8f77e97c43e72d711', + 'thumbnail': 'https://www.vidlii.com/usfi/thmp/zTAtaAgOLKt.jpg', + 'uploader': 'Homicide', + 'uploader_url': 'https://www.vidlii.com/user/Homicide', + 'upload_date': '20210612', + 'duration': 89, + 'view_count': int, + 'comment_count': int, + 'average_rating': float, + 'categories': ['News & Politics'], + 'tags': ['fulp', 'tube', 'sucks', 'bad', 'fulptube'], + }, }, { 'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0', 'only_matching': True, @@ -45,10 +66,20 @@ class VidLiiIE(InfoExtractor): webpage = self._download_webpage( 'https://www.vidlii.com/watch?v=%s' % video_id, video_id) - - video_url = self._search_regex( - r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage, - 'video url', group='url') + formats = [] + + sources = [source[1] for source in re.findall( + r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', + webpage) or []] + for source in sources: + height = int(self._search_regex(r'(\d+).mp4', source, 'height', default=360)) + if self._request_webpage(HEADRequest(source), video_id, f'Checking {height}p url', errnote=False): + formats.append({ + 'url': source, + 'format_id': f'{height}p', + 'height': height, + }) + self._sort_formats(formats) title = self._search_regex( (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage, @@ -82,9 +113,9 @@ class VidLiiIE(InfoExtractor): default=None) or self._search_regex( r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) - view_count = int_or_none(self._search_regex( - (r'<strong>(\d+)</strong> views', - r'Views\s*:\s*<strong>(\d+)</strong>'), + view_count = str_to_int(self._search_regex( + (r'<strong>([,0-9]+)</strong> views', + r'Views\s*:\s*<strong>([,0-9]+)</strong>'), webpage, 'view count', fatal=False)) comment_count = int_or_none(self._search_regex( @@ -109,11 +140,11 @@ class VidLiiIE(InfoExtractor): return { 'id': video_id, - 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, + 'formats': formats, 'uploader_url': uploader_url, 'upload_date': upload_date, 'duration': duration, From c45b87419f86b5c513a3135ea17e93b3deea6e29 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Fri, 19 Nov 2021 15:57:01 +0100 Subject: [PATCH 0256/2552] [bbc] Get all available formats (#1717) Authored by: nyuszika7h --- yt_dlp/extractor/bbc.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 4e2dcd76b..672ed1ffe 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -451,9 +451,10 @@ class BBCCoUkIE(InfoExtractor): playlist = self._download_json( 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id, playlist_id, 'Downloading playlist JSON') + formats = [] + subtitles = {} - version = playlist.get('defaultAvailableVersion') - if version: + for version in playlist.get('allAvailableVersions', []): smp_config = version['smpConfig'] title = smp_config['title'] description = smp_config['summary'] @@ -463,8 +464,18 @@ class BBCCoUkIE(InfoExtractor): continue programme_id = item.get('vpid') duration = int_or_none(item.get('duration')) - formats, subtitles = self._download_media_selector(programme_id) - return programme_id, title, description, duration, formats, subtitles + version_formats, version_subtitles = self._download_media_selector(programme_id) + types = version['types'] + for f in version_formats: + f['format_note'] = ', '.join(types) + if any('AudioDescribed' in x for x in types): + f['language_preference'] = -10 + formats += version_formats + for tag, subformats in (version_subtitles or {}).items(): + subtitles.setdefault(tag, []) + subtitles[tag] += subformats + + return programme_id, title, description, duration, formats, subtitles except ExtractorError as ee: if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): raise From c5e3f84972f19e8f5c99ca358cf30bb105294e20 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 20 Nov 2021 08:33:51 +0530 Subject: [PATCH 0257/2552] [utils] Allow alignment in `render_table` and add tests --- test/test_utils.py | 39 ++++++++++++++++++++++++++++++++++++++- yt_dlp/YoutubeDL.py | 35 +++++++++++++++++------------------ yt_dlp/utils.py | 22 +++++++++++++--------- 3 files changed, 68 insertions(+), 28 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 810ed3de4..b918ae2b6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1220,14 +1220,51 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) def test_render_table(self): + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]]), + 'a empty bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]], + hide_empty=True), + 'a bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['\ta', 'bcd'], + [['1\t23', 4], ['\t9999', 51]]), + ' a bcd\n' + '1 23 4\n' + '9999 51') + self.assertEqual( render_table( ['a', 'bcd'], - [[123, 4], [9999, 51]]), + [[123, 4], [9999, 51]], + delim='-'), 'a bcd\n' + '--------\n' '123 4\n' '9999 51') + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]], + delim='-', extra_gap=2), + 'a bcd\n' + '----------\n' + '123 4\n' + '9999 51') + def test_match_str(self): # Unary self.assertFalse(match_str('xy', {'x': 1200})) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1f1b4ccd4..4bd6dcc4c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3229,37 +3229,36 @@ class YoutubeDL(object): formats = info_dict.get('formats', [info_dict]) new_format = self.params.get('listformats_table', True) is not False if new_format: - tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats)) - vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats)) - abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats)) delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) table = [ [ self._format_screen(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), self.format_resolution(f), - format_field(f, 'fps', '%3d'), + format_field(f, 'fps', '\t%d'), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), delim, - format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes), - format_field(f, 'tbr', f'%{tbr_digits}dk'), - shorten_protocol_name(f.get('protocol', '').replace("native", "n")), + format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), + format_field(f, 'tbr', '\t%dk'), + shorten_protocol_name(f.get('protocol', '').replace('native', 'n')), delim, format_field(f, 'vcodec', default='unknown').replace('none', ''), - format_field(f, 'vbr', f'%{vbr_digits}dk'), + format_field(f, 'vbr', '\t%dk'), format_field(f, 'acodec', default='unknown').replace('none', ''), - format_field(f, 'abr', f'%{abr_digits}dk'), - format_field(f, 'asr', '%5dHz'), + format_field(f, 'abr', '\t%dk'), + format_field(f, 'asr', '\t%dHz'), join_nonempty( self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, format_field(f, 'language', '[%s]'), - format_field(f, 'format_note'), - format_field(f, 'container', ignore=(None, f.get('ext'))), - delim=', '), + join_nonempty( + format_field(f, 'format_note'), + format_field(f, 'container', ignore=(None, f.get('ext'))), + delim=', '), + delim=' '), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] header_line = self._list_format_headers( - 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO', - delim, 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO') + 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO', + delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') else: table = [ [ @@ -3275,8 +3274,8 @@ class YoutubeDL(object): '[info] Available formats for %s:' % info_dict['id']) self.to_stdout(render_table( header_line, table, - extraGap=(0 if new_format else 1), - hideEmpty=new_format, + extra_gap=(0 if new_format else 1), + hide_empty=new_format, delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))) def list_thumbnails(self, info_dict): @@ -3307,7 +3306,7 @@ class YoutubeDL(object): self.to_stdout(render_table( self._list_format_headers('Language', 'Name', 'Formats'), [_row(lang, formats) for lang, formats in subtitles.items()], - hideEmpty=True)) + hide_empty=True)) def urlopen(self, req): """ Start an HTTP download """ diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a9e066257..282ed1f93 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4805,10 +4805,11 @@ def determine_protocol(info_dict): return compat_urllib_parse_urlparse(url).scheme -def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): - """ Render a list of rows, each as a list of values """ +def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False): + """ Render a list of rows, each as a list of values. + Text after a \t will be right aligned """ def width(string): - return len(remove_terminal_sequences(string)) + return len(remove_terminal_sequences(string).replace('\t', '')) def get_max_lens(table): return [max(width(str(v)) for v in col) for col in zip(*table)] @@ -4816,21 +4817,24 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): def filter_using_list(row, filterArray): return [col for (take, col) in zip(filterArray, row) if take] - if hideEmpty: + if hide_empty: max_lens = get_max_lens(data) header_row = filter_using_list(header_row, max_lens) data = [filter_using_list(row, max_lens) for row in data] table = [header_row] + data max_lens = get_max_lens(table) - extraGap += 1 + extra_gap += 1 if delim: - table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data - max_lens[-1] = 0 + table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data + table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter for row in table: for pos, text in enumerate(map(str, row)): - row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap)) - ret = '\n'.join(''.join(row) for row in table) + if '\t' in text: + row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap + else: + row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap) + ret = '\n'.join(''.join(row).rstrip() for row in table) return ret From c07a39ae8e3e3b71ec8c7c0fa3e91b6908584316 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 20:45:52 +0530 Subject: [PATCH 0258/2552] [utils] Fix `PagedList` Bug in d8cf8d97a8dbc9602556de474af133b5ab0e0a29 --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/utils.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4bd6dcc4c..62ec087b8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1328,7 +1328,7 @@ class YoutubeDL(object): self.to_stderr('\r') self.report_warning(f'{e}; Re-extracting data') return wrapper(self, *args, **kwargs) - except (DownloadCancelled, LazyList.IndexError): + except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): raise except Exception as e: if self.params.get('ignoreerrors'): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 282ed1f93..2d5b9892d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4168,6 +4168,10 @@ class LazyList(collections.abc.Sequence): class PagedList: + + class IndexError(IndexError): + pass + def __len__(self): # This is only useful for tests return len(self.getslice()) @@ -4198,7 +4202,7 @@ class PagedList: raise TypeError('indices must be non-negative integers') entries = self.getslice(idx, idx + 1) if not entries: - raise IndexError() + raise self.IndexError() return entries[0] From 282f570918f936a3aa9f57d4c85de4693da882c9 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 20 Nov 2021 08:05:57 +0530 Subject: [PATCH 0259/2552] [utils] Fix error when copying `LazyList` --- test/test_utils.py | 10 +++++----- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/utils.py | 20 ++++++++++++++------ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b918ae2b6..22dda4f37 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1657,9 +1657,9 @@ Line 1 self.assertEqual(repr(LazyList(it)), repr(it)) self.assertEqual(str(LazyList(it)), str(it)) - self.assertEqual(list(LazyList(it).reverse()), it[::-1]) - self.assertEqual(list(LazyList(it).reverse()[1:3:7]), it[::-1][1:3:7]) - self.assertEqual(list(LazyList(it).reverse()[::-1]), it) + self.assertEqual(list(LazyList(it, reverse=True)), it[::-1]) + self.assertEqual(list(reversed(LazyList(it))[::-1]), it) + self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) def test_LazyList_laziness(self): @@ -1672,13 +1672,13 @@ Line 1 test(ll, 5, 5, range(6)) test(ll, -3, 7, range(10)) - ll = LazyList(range(10)).reverse() + ll = LazyList(range(10), reverse=True) test(ll, -1, 0, range(1)) test(ll, 3, 6, range(10)) ll = LazyList(itertools.count()) test(ll, 10, 10, range(11)) - ll.reverse() + ll = reversed(ll) test(ll, -15, 14, range(15)) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 62ec087b8..fb7e12624 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2166,7 +2166,7 @@ class YoutubeDL(object): t['url'] = sanitize_url(t['url']) if self.params.get('check_formats') is True: - info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse() + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) else: info_dict['thumbnails'] = thumbnails @@ -2361,7 +2361,7 @@ class YoutubeDL(object): # TODO Central sorting goes here if self.params.get('check_formats') is True: - formats = LazyList(self._check_formats(formats[::-1])).reverse() + formats = LazyList(self._check_formats(formats[::-1]), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2d5b9892d..ade2bbff1 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4086,10 +4086,10 @@ class LazyList(collections.abc.Sequence): class IndexError(IndexError): pass - def __init__(self, iterable): + def __init__(self, iterable, *, reverse=False, _cache=None): self.__iterable = iter(iterable) - self.__cache = [] - self.__reversed = False + self.__cache = [] if _cache is None else _cache + self.__reversed = reverse def __iter__(self): if self.__reversed: @@ -4155,9 +4155,17 @@ class LazyList(collections.abc.Sequence): self.__exhaust() return len(self.__cache) - def reverse(self): - self.__reversed = not self.__reversed - return self + def __reversed__(self): + return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache) + + def __copy__(self): + return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache) + + def __deepcopy__(self, memo): + # FIXME: This is actually just a shallow copy + id_ = id(self) + memo[id_] = self.__copy__() + return memo[id_] def __repr__(self): # repr and str should mimic a list. So we exhaust the iterable From d76991ab0743a1e855bd44be597a40c89d5a814a Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 20 Nov 2021 08:27:47 +0530 Subject: [PATCH 0260/2552] Fix `--check-formats` for `mhtml` Closes #1709 --- yt_dlp/downloader/mhtml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index b75db18a8..1477f65a6 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -114,8 +114,8 @@ body > figure > img { fragment_base_url = info_dict.get('fragment_base_url') fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] - title = info_dict['title'] - origin = info_dict['webpage_url'] + title = info_dict.get('title', info_dict['format_id']) + origin = info_dict.get('webpage_url', info_dict['url']) ctx = { 'filename': filename, From 545ad64988d03b8c38e51004cd6941236f529e66 Mon Sep 17 00:00:00 2001 From: aarubui <aarubui@users.noreply.github.com> Date: Sat, 20 Nov 2021 15:03:43 +1100 Subject: [PATCH 0261/2552] [willow] Add extractor (#1723) Authored by: aarubui --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/willow.py | 58 ++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 yt_dlp/extractor/willow.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d19c67243..fdcd60e2d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1789,6 +1789,7 @@ from .weibo import ( WeiboMobileIE ) from .weiqitv import WeiqiTVIE +from .willow import WillowIE from .wimtv import WimTVIE from .whowatch import WhoWatchIE from .wistia import ( diff --git a/yt_dlp/extractor/willow.py b/yt_dlp/extractor/willow.py new file mode 100644 index 000000000..4d3d62f95 --- /dev/null +++ b/yt_dlp/extractor/willow.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from ..utils import ExtractorError +from .common import InfoExtractor + + +class WillowIE(InfoExtractor): + _VALID_URL = r'https?://(www\.)?willow\.tv/videos/(?P<id>[0-9a-z-_]+)' + _GEO_COUNTRIES = ['US'] + + _TESTS = [{ + 'url': 'http://willow.tv/videos/d5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021', + 'info_dict': { + 'id': '169662', + 'display_id': 'd5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021', + 'ext': 'mp4', + 'title': 'Winning Moment: 4th Test, England vs India', + 'thumbnail': 'https://aimages.willow.tv/ytThumbnails/6748_D5winning_moment.jpg', + 'duration': 233, + 'timestamp': 1630947954, + 'upload_date': '20210906', + 'location': 'Kennington Oval, London', + 'series': 'India tour of England 2021', + }, + 'params': { + 'skip_download': True, # AES-encrypted m3u8 + }, + }, { + 'url': 'http://willow.tv/videos/highlights-short-ind-vs-nz-streaming-online-2nd-t20i-new-zealand-tour-of-india-2021', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_data = self._parse_json(self._html_search_regex( + r'var\s+data_js\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, + 'data_js'), video_id) + + video = next((v for v in video_data.get('trending_videos') or [] + if v.get('secureurl')), None) + if not video: + raise ExtractorError('No videos found') + + formats = self._extract_m3u8_formats(video['secureurl'], video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': str(video.get('content_id')), + 'display_id': video.get('video_slug'), + 'title': video.get('video_name') or self._html_search_meta('twitter:title', webpage), + 'formats': formats, + 'thumbnail': video.get('yt_thumb_url') or self._html_search_meta( + 'twitter:image', webpage, default=None), + 'duration': video.get('duration_seconds'), + 'timestamp': video.get('created_date'), + 'location': video.get('venue'), + 'series': video.get('series_name'), + } From 77fcc6515852bc2e1c6960a6e010ab2ff1caf1ee Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 20 Nov 2021 14:55:14 +0530 Subject: [PATCH 0262/2552] [CozyTV] Add extractor (#1727) Authored by: Ashish0804 --- yt_dlp/extractor/cozytv.py | 40 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 41 insertions(+) create mode 100644 yt_dlp/extractor/cozytv.py diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py new file mode 100644 index 000000000..868d8d27d --- /dev/null +++ b/yt_dlp/extractor/cozytv.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class CozyTVIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)' + + _TESTS = [{ + 'url': 'https://cozy.tv/beardson/replays/2021-11-19_1', + 'info_dict': { + 'id': 'beardson-2021-11-19_1', + 'ext': 'mp4', + 'title': 'pokemon pt2', + 'uploader': 'beardson', + 'upload_date': '20211119', + 'was_live': True, + 'duration': 7981, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + uploader, date = self._match_valid_url(url).groups() + id = f'{uploader}-{date}' + data_json = self._download_json(f'https://api.cozy.tv/cache/{uploader}/replay/{date}', id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://cozycdn.foxtrotstream.xyz/replays/{uploader}/{date}/index.m3u8', id, ext='mp4') + return { + 'id': id, + 'title': data_json.get('title'), + 'uploader': data_json.get('user') or uploader, + 'upload_date': unified_strdate(data_json.get('date')), + 'was_live': True, + 'duration': data_json.get('duration'), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index fdcd60e2d..a0f4908f0 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -293,6 +293,7 @@ from .commonprotocols import ( from .condenast import CondeNastIE from .contv import CONtvIE from .corus import CorusIE +from .cozytv import CozyTVIE from .cracked import CrackedIE from .crackle import CrackleIE from .crooksandliars import CrooksAndLiarsIE From 849d699a8b2d36a9aab6c3a34073c9d1c5088a29 Mon Sep 17 00:00:00 2001 From: 4a1e2y5 <66421735+4a1e2y5@users.noreply.github.com> Date: Sun, 21 Nov 2021 00:24:05 +0100 Subject: [PATCH 0263/2552] [xvideos] Detect embed URLs (#1729) Authored by: 4a1e2y5 --- yt_dlp/extractor/xvideos.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index 8fc64914c..ef45eb929 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -19,7 +19,7 @@ class XVideosIE(InfoExtractor): (?: (?:[^/]+\.)?xvideos2?\.com/video| (?:www\.)?xvideos\.es/video| - flashservice\.xvideos\.com/embedframe/| + (?:www|flashservice)\.xvideos\.com/embedframe/| static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= ) (?P<id>[0-9]+) @@ -37,6 +37,9 @@ class XVideosIE(InfoExtractor): }, { 'url': 'https://flashservice.xvideos.com/embedframe/4588838', 'only_matching': True, + }, { + 'url': 'https://www.xvideos.com/embedframe/4588838', + 'only_matching': True, }, { 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', 'only_matching': True, From c98d4df23bfba30fc38f2614bd96db67644e7ddf Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 22 Nov 2021 13:41:57 +0530 Subject: [PATCH 0264/2552] [WDR] Expand valid URL Closes #1749 --- yt_dlp/extractor/wdr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index f54aa6ff9..d3229d8af 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -22,7 +22,11 @@ from ..utils import ( class WDRIE(InfoExtractor): - _VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js' + _VALID_URL = r'''(?x)https?:// + (?:deviceids-medp\.wdr\.de/ondemand/\d+/| + kinder\.wdr\.de/(?!mediathek/)[^#?]+-) + (?P<id>\d+)\.(?:js|assetjsonp) + ''' _GEO_COUNTRIES = ['DE'] _TEST = { 'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js', From 234416e4bf39d442e7abd036b7c59b8934a4086b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 22 Nov 2021 23:32:14 +0530 Subject: [PATCH 0265/2552] [downloader/ffmpeg] Fix for direct videos inside mpd manifests Closes #1751 --- yt_dlp/downloader/external.py | 3 +-- yt_dlp/extractor/common.py | 9 +++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 1efbb2fab..da69423f7 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -443,8 +443,7 @@ class FFmpegFD(ExternalFD): if info_dict.get('requested_formats') or protocol == 'http_dash_segments': for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): stream_number = fmt.get('manifest_stream_number', 0) - a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v' - args.extend(['-map', f'{i}:{a_or_v}:{stream_number}']) + args.extend(['-map', f'{i}:{stream_number}']) if self.params.get('test', False): args += ['-fs', compat_str(self._TEST_FILE_SIZE)] diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a47364d07..1565ba5c3 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import base64 +import collections import datetime import hashlib import itertools @@ -2649,7 +2650,7 @@ class InfoExtractor(object): mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats, subtitles = [], {} - stream_numbers = {'audio': 0, 'video': 0} + stream_numbers = collections.defaultdict(int) for period in mpd_doc.findall(_add_ns('Period')): period_duration = parse_duration(period.get('duration')) or mpd_duration period_ms_info = extract_multisegment_info(period, { @@ -2715,10 +2716,8 @@ class InfoExtractor(object): 'format_note': 'DASH %s' % content_type, 'filesize': filesize, 'container': mimetype2ext(mime_type) + '_dash', - 'manifest_stream_number': stream_numbers[content_type] } f.update(parse_codecs(codecs)) - stream_numbers[content_type] += 1 elif content_type == 'text': f = { 'ext': mimetype2ext(mime_type), @@ -2885,7 +2884,9 @@ class InfoExtractor(object): else: # Assuming direct URL to unfragmented media. f['url'] = base_url - if content_type in ('video', 'audio') or mime_type == 'image/jpeg': + if content_type in ('video', 'audio', 'image/jpeg'): + f['manifest_stream_number'] = stream_numbers[f['url']] + stream_numbers[f['url']] += 1 formats.append(f) elif content_type == 'text': subtitles.setdefault(lang or 'und', []).append(f) From 1ee34c76bb6e3a74d5a4d76475469e64dc201063 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 01:09:17 +0530 Subject: [PATCH 0266/2552] [vimeo] Add fallback for config URL Closes #1662 --- yt_dlp/extractor/vimeo.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 7df4116f3..e2b86662b 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -604,6 +604,20 @@ class VimeoIE(VimeoBaseInfoExtractor): 'format': 'Original', }, }, + { + 'url': 'https://vimeo.com/channels/staffpicks/143603739', + 'info_dict': { + 'id': '143603739', + 'ext': 'mp4', + 'uploader': 'Karim Huu Do', + 'timestamp': 1445846953, + 'upload_date': '20151026', + 'title': 'The Shoes - Submarine Feat. Blaine Harrison', + 'uploader_id': 'karimhd', + 'description': 'md5:8e2eea76de4504c2e8020a9bcfa1e843', + }, + 'params': {'skip_download': 'm3u8'}, + }, { # requires passing unlisted_hash(a52724358e) to load_download_config request 'url': 'https://vimeo.com/392479337/a52724358e', @@ -798,18 +812,19 @@ class VimeoIE(VimeoBaseInfoExtractor): timestamp = None video_description = None info_dict = {} + config_url = None channel_id = self._search_regex( r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) if channel_id: config_url = self._html_search_regex( - r'\bdata-config-url="([^"]+)"', webpage, 'config URL') + r'\bdata-config-url="([^"]+)"', webpage, 'config URL', default=None) video_description = clean_html(get_element_by_class('description', webpage)) info_dict.update({ 'channel_id': channel_id, 'channel_url': 'https://vimeo.com/channels/' + channel_id, }) - else: + if not config_url: page_config = self._parse_json(self._search_regex( r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', webpage, 'page config', default='{}'), video_id, fatal=False) From f7b558df4d76fae77a5bbac62364195891673738 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 01:14:25 +0530 Subject: [PATCH 0267/2552] [mediaklikk] Expand valid URL Partial fix for #1409 --- yt_dlp/extractor/mediaklikk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index b9b6d739f..18ff3befa 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -12,8 +12,8 @@ from ..compat import ( class MediaKlikkIE(InfoExtractor): - _VALID_URL = r'''(?x)^https?:\/\/(?:www\.)? - (?:mediaklikk|m4sport|hirado|petofilive)\.hu\/.*?videok?\/ + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?:mediaklikk|m4sport|hirado|petofilive)\.hu/.*?(?:videok?|cikk)/ (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)? (?P<id>[^/#?_]+)''' From 0e6b018a10e751bc6da59cdf5d55e61cdf975efa Mon Sep 17 00:00:00 2001 From: Zirro <code@zirro.se> Date: Tue, 23 Nov 2021 01:40:53 +0530 Subject: [PATCH 0268/2552] Ensure path for link files exists (#1755) Authored by: Zirro --- yt_dlp/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index fb7e12624..5c2d64598 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2690,6 +2690,8 @@ class YoutubeDL(object): self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') return False linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) + if not self._ensure_dir_exists(encodeFilename(linkfn)): + return False if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') return True From 14a086058a30a0748b5b716e9b21481f993518f3 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 02:33:41 +0530 Subject: [PATCH 0269/2552] [ARDBetaMediathek] Handle new URLs Adapted from https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf Closes #1601 --- yt_dlp/extractor/ard.py | 48 +++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 048d30f27..f8d57109e 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -388,7 +388,13 @@ class ARDIE(InfoExtractor): class ARDBetaMediathekIE(ARDMediathekBaseIE): - _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)' + _VALID_URL = r'''(?x)https:// + (?:(?:beta|www)\.)?ardmediathek\.de/ + (?:(?P<client>[^/]+)/)? + (?:player|live|video|(?P<playlist>sendung|sammlung))/ + (?:(?P<display_id>[^?#]+)/)? + (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)''' + _TESTS = [{ 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', 'md5': 'a1dc75a39c61601b980648f7c9f9f71d', @@ -403,6 +409,18 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): 'upload_date': '20200805', 'ext': 'mp4', }, + 'skip': 'Error', + }, { + 'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', + 'md5': 'f1837e563323b8a642a8ddeff0131f51', + 'info_dict': { + 'id': '10049223', + 'ext': 'mp4', + 'title': 'tagesschau, 20:00 Uhr', + 'timestamp': 1636398000, + 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', + 'upload_date': '20211108', + }, }, { 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'only_matching': True, @@ -426,6 +444,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): # playlist of type 'sammlung' 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/', + 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet', + 'only_matching': True, }] def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber): @@ -525,20 +549,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): return self.playlist_result(entries, playlist_title=display_id) def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('video_id') - display_id = mobj.group('display_id') - if display_id: - display_id = display_id.rstrip('/') - if not display_id: - display_id = video_id - - if mobj.group('mode') in ('sendung', 'sammlung'): - # this is a playlist-URL - return self._ARD_extract_playlist( - url, video_id, display_id, - mobj.group('client'), - mobj.group('mode')) + video_id, display_id, playlist_type, client = self._match_valid_url(url).group( + 'id', 'display_id', 'playlist', 'client') + display_id, client = display_id or video_id, client or 'ard' + + if playlist_type: + return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) player_page = self._download_json( 'https://api.ardmediathek.de/public-gateway', @@ -574,7 +590,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): } } } -}''' % (mobj.group('client'), video_id), +}''' % (client, video_id), }).encode(), headers={ 'Content-Type': 'application/json' })['data']['playerPage'] From 8f122fa070dee737077059747731896a603c9e0b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 13:11:28 +0530 Subject: [PATCH 0270/2552] [extractor] Extract `average_rating` from JSON-LD Eg: Crunchyroll --- yt_dlp/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1565ba5c3..fc28bca2e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1452,6 +1452,9 @@ class InfoExtractor(object): item_type = e.get('@type') if expected_type is not None and expected_type != item_type: continue + rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none) + if rating is not None: + info['average_rating'] = rating if item_type in ('TVEpisode', 'Episode'): episode_name = unescapeHTML(e.get('name')) info.update({ From bc8ab44ea08995bd4345c9ca149ba82591b600bb Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 13:13:27 +0530 Subject: [PATCH 0271/2552] [itv] Fix for Python 3.6/3.7 Closes #1758 --- yt_dlp/extractor/itv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 6e6a3673c..5f1d306f6 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -117,7 +117,7 @@ class ITVIE(InfoExtractor): # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_subs, featureset_subs = next( ((platform_tag, featureset) - for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets + for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets if try_get(featureset, lambda x: x[2]) == 'outband-webvtt'), (None, None)) @@ -146,7 +146,7 @@ class ITVIE(InfoExtractor): # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_video, featureset_video = next( ((platform_tag, featureset) - for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets + for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']), (None, None)) if not platform_tag_video or not featureset_video: From d52cd2f5cd54bd100a51fca8e4044b4f2a89fade Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 13:15:49 +0530 Subject: [PATCH 0272/2552] [sbs] Fix for movies and livestreams Closes #1640 --- yt_dlp/extractor/sbs.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py index 0a806ee4e..4090f6385 100644 --- a/yt_dlp/extractor/sbs.py +++ b/yt_dlp/extractor/sbs.py @@ -10,7 +10,14 @@ from ..utils import ( class SBSIE(InfoExtractor): IE_DESC = 'sbs.com.au' - _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P<id>[0-9]+)' + _VALID_URL = r'''(?x) + https?://(?:www\.)?sbs\.com\.au/(?: + ondemand(?: + /video/(?:single/)?| + /movie/[^/]+/| + .*?\bplay=|/watch/ + )|news/(?:embeds/)?video/ + )(?P<id>[0-9]+)''' _TESTS = [{ # Original URL is handled by the generic IE which finds the iframe: @@ -46,6 +53,13 @@ class SBSIE(InfoExtractor): }, { 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971', 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/movie/coherence/1469404227931', + 'only_matching': True, + }, { + 'note': 'Live stream', + 'url': 'https://www.sbs.com.au/ondemand/video/1726824003663/sbs-24x7-live-stream-nsw', + 'only_matching': True, }] def _real_extract(self, url): @@ -75,4 +89,5 @@ class SBSIE(InfoExtractor): 'ie_key': 'ThePlatform', 'id': video_id, 'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}), + 'is_live': player_params.get('streamType') == 'live', } From e5d731f35dce2e0eb82d7877d6e1001d5e18ced9 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 17:15:41 +0530 Subject: [PATCH 0273/2552] [tv2] Expand valid URL Closes #1764 --- yt_dlp/extractor/tv2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index e0851531c..da351eeb0 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -19,7 +19,7 @@ from ..utils import ( class TV2IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?tv2\.no/v\d*/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.tv2.no/v/916509/', 'info_dict': { @@ -33,6 +33,9 @@ class TV2IE(InfoExtractor): 'view_count': int, 'categories': list, }, + }, { + 'url': 'http://www.tv2.no/v2/916509', + 'only_matching': True, }] _PROTOCOLS = ('HLS', 'DASH') _GEO_COUNTRIES = ['NO'] From 57dbe8077f8d00e0fffac53669f40cd7d584474f Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 20:33:55 +0530 Subject: [PATCH 0274/2552] [jsinterp] Fix splice to handle float Needed for new youtube js player f1ca6900 Closes #1767 --- test/test_youtube_signature.py | 4 ++++ yt_dlp/jsinterp.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index df4c36047..3359ac457 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -78,6 +78,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', ), + ( + 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js', + 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q', + ), ] diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index bb2a0ae0b..a6084ab82 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -397,7 +397,7 @@ class JSInterpreter(object): elif member == 'splice': assertion(isinstance(obj, list), 'must be applied on a list') assertion(argvals, 'takes one or more arguments') - index, howMany = (argvals + [len(obj)])[:2] + index, howMany = map(int, (argvals + [len(obj)])[:2]) if index < 0: index += len(obj) add_items = argvals[2:] From ff51ed588fa75256b98ead67bdef7edda08b66f0 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 20:38:30 +0530 Subject: [PATCH 0275/2552] Clarify video/audio-only formats in -F Related: #1759 --- yt_dlp/YoutubeDL.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5c2d64598..b983b1775 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -847,6 +847,7 @@ class YoutubeDL(object): DELIM = 'blue' ERROR = 'red' WARNING = 'yellow' + SUPPRESS = 'light black' def __format_text(self, out, text, f, fallback=None, *, test_encoding=False): assert out in ('screen', 'err') @@ -3149,22 +3150,17 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): - is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none' if format.get('vcodec') == 'none' and format.get('acodec') != 'none': return 'audio only' if format.get('resolution') is not None: return format['resolution'] if format.get('width') and format.get('height'): - res = '%dx%d' % (format['width'], format['height']) + return '%dx%d' % (format['width'], format['height']) elif format.get('height'): - res = '%sp' % format['height'] + return '%sp' % format['height'] elif format.get('width'): - res = '%dx?' % format['width'] - elif is_images: - return 'images' - else: - return default - return f'img {res}' if is_images else res + return '%dx?' % format['width'] + return default def _format_note(self, fdict): res = '' @@ -3236,7 +3232,7 @@ class YoutubeDL(object): [ self._format_screen(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), - self.format_resolution(f), + format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), format_field(f, 'fps', '\t%d'), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), delim, @@ -3244,9 +3240,15 @@ class YoutubeDL(object): format_field(f, 'tbr', '\t%dk'), shorten_protocol_name(f.get('protocol', '').replace('native', 'n')), delim, - format_field(f, 'vcodec', default='unknown').replace('none', ''), + format_field(f, 'vcodec', default='unknown').replace( + 'none', + 'images' if f.get('acodec') == 'none' + else self._format_screen('audio only', self.Styles.SUPPRESS)), format_field(f, 'vbr', '\t%dk'), - format_field(f, 'acodec', default='unknown').replace('none', ''), + format_field(f, 'acodec', default='unknown').replace( + 'none', + '' if f.get('vcodec') == 'none' + else self._format_screen('video only', self.Styles.SUPPRESS)), format_field(f, 'abr', '\t%dk'), format_field(f, 'asr', '\t%dHz'), join_nonempty( From 9941a1e12750c3df1350c505250ee88a230a208c Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 08:28:36 +0530 Subject: [PATCH 0276/2552] [PatreonUser] Do not capture RSS URLs Closes #1777 --- yt_dlp/extractor/patreon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index c7d316efc..d3ee071e0 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -191,7 +191,7 @@ class PatreonIE(InfoExtractor): class PatreonUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?P<id>[-_\w\d]+)/?(?:posts/?)?' + _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?P<id>[-\w]+)' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', From a6213a49250129f25e8f435ff3fadf4a3237f6e1 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 08:31:52 +0530 Subject: [PATCH 0277/2552] [cleanup,youtube] Reorganize Tab and Search extractor inheritances --- yt_dlp/extractor/youtube.py | 1219 ++++++++++++++++++----------------- 1 file changed, 610 insertions(+), 609 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 632129bc6..a8d515f5c 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -44,6 +44,7 @@ from ..utils import ( join_nonempty, mimetype2ext, network_exceptions, + NO_DEFAULT, orderedSet, parse_codecs, parse_count, @@ -3116,508 +3117,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return info +class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): -class YoutubeTabIE(YoutubeBaseInfoExtractor): - IE_DESC = 'YouTube Tabs' - _VALID_URL = r'''(?x) - https?:// - (?:\w+\.)? - (?: - youtube(?:kids)?\.com| - %(invidious)s - )/ - (?: - (?P<channel_type>channel|c|user|browse)/| - (?P<not_channel> - feed/|hashtag/| - (?:playlist|watch)\?.*?\blist= - )| - (?!(?:%(reserved_names)s)\b) # Direct URLs - ) - (?P<id>[^/?\#&]+) - ''' % { - 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES, - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } - IE_NAME = 'youtube:tab' - - _TESTS = [{ - 'note': 'playlists, multipage', - 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid', - 'playlist_mincount': 94, - 'info_dict': { - 'id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'title': 'Игорь Клейнер - Playlists', - 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', - 'uploader': 'Игорь Клейнер', - 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', - }, - }, { - 'note': 'playlists, multipage, different order', - 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', - 'playlist_mincount': 94, - 'info_dict': { - 'id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'title': 'Игорь Клейнер - Playlists', - 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', - 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'uploader': 'Игорь Клейнер', - }, - }, { - 'note': 'playlists, series', - 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', - 'playlist_mincount': 5, - 'info_dict': { - 'id': 'UCYO_jab_esuFRV4b17AJtAw', - 'title': '3Blue1Brown - Playlists', - 'description': 'md5:e1384e8a133307dd10edee76e875d62f', - 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', - 'uploader': '3Blue1Brown', - }, - }, { - 'note': 'playlists, singlepage', - 'url': 'https://www.youtube.com/user/ThirstForScience/playlists', - 'playlist_mincount': 4, - 'info_dict': { - 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ', - 'title': 'ThirstForScience - Playlists', - 'description': 'md5:609399d937ea957b0f53cbffb747a14c', - 'uploader': 'ThirstForScience', - 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ', - } - }, { - 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', - 'only_matching': True, - }, { - 'note': 'basic, single video playlist', - 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', - 'info_dict': { - 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', - 'uploader': 'Sergey M.', - 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', - 'title': 'youtube-dl public playlist', - }, - 'playlist_count': 1, - }, { - 'note': 'empty playlist', - 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', - 'info_dict': { - 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', - 'uploader': 'Sergey M.', - 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', - 'title': 'youtube-dl empty playlist', - }, - 'playlist_count': 0, - }, { - 'note': 'Home tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Home', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 2, - }, { - 'note': 'Videos tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Videos', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 975, - }, { - 'note': 'Videos tab, sorted by popular', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Videos', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 199, - }, { - 'note': 'Playlists tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Playlists', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 17, - }, { - 'note': 'Community tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Community', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 18, - }, { - 'note': 'Channels tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Channels', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 12, - }, { - 'note': 'Search tab', - 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', - 'playlist_mincount': 40, - 'info_dict': { - 'id': 'UCYO_jab_esuFRV4b17AJtAw', - 'title': '3Blue1Brown - Search - linear algebra', - 'description': 'md5:e1384e8a133307dd10edee76e875d62f', - 'uploader': '3Blue1Brown', - 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', - }, - }, { - 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', - 'only_matching': True, - }, { - 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', - 'only_matching': True, - }, { - 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', - 'only_matching': True, - }, { - 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', - 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', - 'info_dict': { - 'title': '29C3: Not my department', - 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', - 'uploader': 'Christiaan008', - 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg', - 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268', - }, - 'playlist_count': 96, - }, { - 'note': 'Large playlist', - 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', - 'info_dict': { - 'title': 'Uploads from Cauchemar', - 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', - 'uploader': 'Cauchemar', - 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q', - }, - 'playlist_mincount': 1123, - }, { - 'note': 'even larger playlist, 8832 videos', - 'url': 'http://www.youtube.com/user/NASAgovVideo/videos', - 'only_matching': True, - }, { - 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', - 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', - 'info_dict': { - 'title': 'Uploads from Interstellar Movie', - 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', - 'uploader': 'Interstellar Movie', - 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA', - }, - 'playlist_mincount': 21, - }, { - 'note': 'Playlist with "show unavailable videos" button', - 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q', - 'info_dict': { - 'title': 'Uploads from Phim Siêu Nhân Nhật Bản', - 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q', - 'uploader': 'Phim Siêu Nhân Nhật Bản', - 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', - }, - 'playlist_mincount': 200, - }, { - 'note': 'Playlist with unavailable videos in page 7', - 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w', - 'info_dict': { - 'title': 'Uploads from BlankTV', - 'id': 'UU8l9frL61Yl5KFOl87nIm2w', - 'uploader': 'BlankTV', - 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w', - }, - 'playlist_mincount': 1000, - }, { - 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844', - 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', - 'info_dict': { - 'title': 'Data Analysis with Dr Mike Pound', - 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', - 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA', - 'uploader': 'Computerphile', - 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487', - }, - 'playlist_mincount': 11, - }, { - 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', - 'only_matching': True, - }, { - 'note': 'Playlist URL that does not actually serve a playlist', - 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', - 'info_dict': { - 'id': 'FqZTN594JQw', - 'ext': 'webm', - 'title': "Smiley's People 01 detective, Adventure Series, Action", - 'uploader': 'STREEM', - 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', - 'upload_date': '20150526', - 'license': 'Standard YouTube License', - 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', - 'categories': ['People & Blogs'], - 'tags': list, - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'This video is not available.', - 'add_ie': [YoutubeIE.ie_key()], - }, { - 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', - 'info_dict': { - 'id': '3yImotZU3tw', # This will keep changing - 'ext': 'mp4', - 'title': compat_str, - 'uploader': 'Sky News', - 'uploader_id': 'skynews', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews', - 'upload_date': r're:\d{8}', - 'description': compat_str, - 'categories': ['News & Politics'], - 'tags': list, - 'like_count': int, - 'dislike_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '], - }, { - 'url': 'https://www.youtube.com/user/TheYoungTurks/live', - 'info_dict': { - 'id': 'a48o2S1cPoo', - 'ext': 'mp4', - 'title': 'The Young Turks - Live Main Show', - 'uploader': 'The Young Turks', - 'uploader_id': 'TheYoungTurks', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', - 'upload_date': '20150715', - 'license': 'Standard YouTube License', - 'description': 'md5:438179573adcdff3c97ebb1ee632b891', - 'categories': ['News & Politics'], - 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], - 'like_count': int, - 'dislike_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', - 'only_matching': True, - }, { - 'note': 'A channel that is not live. Should raise error', - 'url': 'https://www.youtube.com/user/numberphile/live', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/trending', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/library', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/history', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/subscriptions', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/watch_later', - 'only_matching': True, - }, { - 'note': 'Recommended - redirects to home page.', - 'url': 'https://www.youtube.com/feed/recommended', - 'only_matching': True, - }, { - 'note': 'inline playlist with not always working continuations', - 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/course', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/zsecurity', - 'only_matching': True, - }, { - 'url': 'http://www.youtube.com/NASAgovVideo/videos', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/TheYoungTurks/live', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/hashtag/cctv9', - 'info_dict': { - 'id': 'cctv9', - 'title': '#cctv9', - }, - 'playlist_mincount': 350, - }, { - 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU', - 'only_matching': True, - }, { - 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist', - 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'only_matching': True - }, { - 'note': '/browse/ should redirect to /channel/', - 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', - 'only_matching': True - }, { - 'note': 'VLPL, should redirect to playlist?list=PL...', - 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'info_dict': { - 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'uploader': 'NoCopyrightSounds', - 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', - 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', - 'title': 'NCS Releases', - }, - 'playlist_mincount': 166, - }, { - 'note': 'Topic, should redirect to playlist?list=UU...', - 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', - 'info_dict': { - 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', - 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', - 'title': 'Uploads from Royalty Free Music - Topic', - 'uploader': 'Royalty Free Music - Topic', - }, - 'expected_warnings': [ - 'A channel/user page was given', - 'The URL does not have a videos tab', - ], - 'playlist_mincount': 101, - }, { - 'note': 'Topic without a UU playlist', - 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', - 'info_dict': { - 'id': 'UCtFRv9O2AHqOZjjynzrv-xg', - 'title': 'UCtFRv9O2AHqOZjjynzrv-xg', - }, - 'expected_warnings': [ - 'A channel/user page was given', - 'The URL does not have a videos tab', - 'Falling back to channel URL', - ], - 'playlist_mincount': 9, - }, { - 'note': 'Youtube music Album', - 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE', - 'info_dict': { - 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', - 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', - }, - 'playlist_count': 50, - }, { - 'note': 'unlisted single video playlist', - 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', - 'info_dict': { - 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q', - 'uploader': 'colethedj', - 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', - 'title': 'yt-dlp unlisted playlist test', - 'availability': 'unlisted' - }, - 'playlist_count': 1, - }, { - 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', - 'url': 'https://www.youtube.com/feed/recommended', - 'info_dict': { - 'id': 'recommended', - 'title': 'recommended', - }, - 'playlist_mincount': 50, - 'params': { - 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} - }, - }, { - 'note': 'API Fallback: /videos tab, sorted by oldest first', - 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid', - 'info_dict': { - 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', - 'title': 'Cody\'sLab - Videos', - 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa', - 'uploader': 'Cody\'sLab', - 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', - }, - 'playlist_mincount': 650, - 'params': { - 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} - }, - }, { - 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', - 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', - 'info_dict': { - 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', - 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', - 'title': 'Uploads from Royalty Free Music - Topic', - 'uploader': 'Royalty Free Music - Topic', - }, - 'expected_warnings': [ - 'A channel/user page was given', - 'The URL does not have a videos tab', - ], - 'playlist_mincount': 101, - 'params': { - 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} - }, - }] - - @classmethod - def suitable(cls, url): - return False if YoutubeIE.suitable(url) else super( - YoutubeTabIE, cls).suitable(url) - - def _extract_channel_id(self, webpage): - channel_id = self._html_search_meta( - 'channelId', webpage, 'channel id', default=None) - if channel_id: - return channel_id - channel_url = self._html_search_meta( - ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url', - 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad', - 'twitter:app:url:googleplay'), webpage, 'channel url') - return self._search_regex( - r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+', - channel_url, 'channel id') + def _extract_channel_id(self, webpage): + channel_id = self._html_search_meta( + 'channelId', webpage, 'channel id', default=None) + if channel_id: + return channel_id + channel_url = self._html_search_meta( + ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url', + 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad', + 'twitter:app:url:googleplay'), webpage, 'channel url') + return self._search_regex( + r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+', + channel_url, 'channel id') @staticmethod def _extract_basic_item_renderer(item): @@ -3787,49 +3300,51 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if entry: yield entry ''' - def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): - - def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds - contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] - for content in contents: - if not isinstance(content, dict): - continue - is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict) - if not is_renderer: - renderer = content.get('richItemRenderer') - if renderer: - for entry in self._rich_entries(renderer): - yield entry - continuation_list[0] = self._extract_continuation(parent_renderer) + def _extract_entries(self, parent_renderer, continuation_list): + # continuation_list is modified in-place with continuation_list = [continuation_token] + continuation_list[:] = [None] + contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] + for content in contents: + if not isinstance(content, dict): + continue + is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict) + if not is_renderer: + renderer = content.get('richItemRenderer') + if renderer: + for entry in self._rich_entries(renderer): + yield entry + continuation_list[0] = self._extract_continuation(parent_renderer) + continue + isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] + for isr_content in isr_contents: + if not isinstance(isr_content, dict): continue - isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] - for isr_content in isr_contents: - if not isinstance(isr_content, dict): - continue - known_renderers = { - 'playlistVideoListRenderer': self._playlist_entries, - 'gridRenderer': self._grid_entries, - 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'), - 'backstagePostThreadRenderer': self._post_thread_entries, - 'videoRenderer': lambda x: [self._video_entry(x)], - } - for key, renderer in isr_content.items(): - if key not in known_renderers: - continue - for entry in known_renderers[key](renderer): - if entry: - yield entry - continuation_list[0] = self._extract_continuation(renderer) - break - - if not continuation_list[0]: - continuation_list[0] = self._extract_continuation(is_renderer) + known_renderers = { + 'playlistVideoListRenderer': self._playlist_entries, + 'gridRenderer': self._grid_entries, + 'shelfRenderer': lambda x: self._shelf_entries(x), + 'backstagePostThreadRenderer': self._post_thread_entries, + 'videoRenderer': lambda x: [self._video_entry(x)], + } + for key, renderer in isr_content.items(): + if key not in known_renderers: + continue + for entry in known_renderers[key](renderer): + if entry: + yield entry + continuation_list[0] = self._extract_continuation(renderer) + break if not continuation_list[0]: - continuation_list[0] = self._extract_continuation(parent_renderer) + continuation_list[0] = self._extract_continuation(is_renderer) + + if not continuation_list[0]: + continuation_list[0] = self._extract_continuation(parent_renderer) - continuation_list = [None] # Python 2 does not support nonlocal + def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): + continuation_list = [None] + extract_entries = lambda x: self._extract_entries(x, continuation_list) tab_content = try_get(tab, lambda x: x['content'], dict) if not tab_content: return @@ -4214,12 +3729,556 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): raise ExtractorError(err_note, expected=True) self.report_warning(err_note, item_id) - @staticmethod - def _smuggle_data(entries, data): - for entry in entries: - if data: - entry['url'] = smuggle_url(entry['url'], data) - yield entry + @staticmethod + def _smuggle_data(entries, data): + for entry in entries: + if data: + entry['url'] = smuggle_url(entry['url'], data) + yield entry + + _SEARCH_PARAMS = None + + def _search_results(self, query, params=NO_DEFAULT): + data = {'query': query} + if params is NO_DEFAULT: + params = self._SEARCH_PARAMS + if params: + data['params'] = params + continuation = {} + for page_num in itertools.count(1): + data.update(continuation) + search = self._extract_response( + item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, + check_get_keys=('contents', 'onResponseReceivedCommands') + ) + if not search: + break + slr_contents = try_get( + search, + (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], + lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), + list) + if not slr_contents: + break + + # Youtube sometimes adds promoted content to searches, + # changing the index location of videos and token. + # So we search through all entries till we find them. + continuation = None + for slr_content in slr_contents: + if not continuation: + continuation = self._extract_continuation({'contents': [slr_content]}) + + isr_contents = try_get( + slr_content, + lambda x: x['itemSectionRenderer']['contents'], + list) + if not isr_contents: + continue + for content in isr_contents: + if not isinstance(content, dict): + continue + video = content.get('videoRenderer') + if not isinstance(video, dict): + continue + video_id = video.get('videoId') + if not video_id: + continue + + yield self._extract_video(video) + + if not continuation: + break + + +class YoutubeTabIE(YoutubeTabBaseInfoExtractor): + IE_DESC = 'YouTube Tabs' + _VALID_URL = r'''(?x: + https?:// + (?:\w+\.)? + (?: + youtube(?:kids)?\.com| + %(invidious)s + )/ + (?: + (?P<channel_type>channel|c|user|browse)/| + (?P<not_channel> + feed/|hashtag/| + (?:playlist|watch)\?.*?\blist= + )| + (?!(?:%(reserved_names)s)\b) # Direct URLs + ) + (?P<id>[^/?\#&]+) + )''' % { + 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES, + 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + } + IE_NAME = 'youtube:tab' + + _TESTS = [{ + 'note': 'playlists, multipage', + 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid', + 'playlist_mincount': 94, + 'info_dict': { + 'id': 'UCqj7Cz7revf5maW9g5pgNcg', + 'title': 'Игорь Клейнер - Playlists', + 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', + 'uploader': 'Игорь Клейнер', + 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', + }, + }, { + 'note': 'playlists, multipage, different order', + 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', + 'playlist_mincount': 94, + 'info_dict': { + 'id': 'UCqj7Cz7revf5maW9g5pgNcg', + 'title': 'Игорь Клейнер - Playlists', + 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', + 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', + 'uploader': 'Игорь Клейнер', + }, + }, { + 'note': 'playlists, series', + 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'UCYO_jab_esuFRV4b17AJtAw', + 'title': '3Blue1Brown - Playlists', + 'description': 'md5:e1384e8a133307dd10edee76e875d62f', + 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', + 'uploader': '3Blue1Brown', + }, + }, { + 'note': 'playlists, singlepage', + 'url': 'https://www.youtube.com/user/ThirstForScience/playlists', + 'playlist_mincount': 4, + 'info_dict': { + 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ', + 'title': 'ThirstForScience - Playlists', + 'description': 'md5:609399d937ea957b0f53cbffb747a14c', + 'uploader': 'ThirstForScience', + 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ', + } + }, { + 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', + 'only_matching': True, + }, { + 'note': 'basic, single video playlist', + 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', + 'info_dict': { + 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', + 'uploader': 'Sergey M.', + 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', + 'title': 'youtube-dl public playlist', + }, + 'playlist_count': 1, + }, { + 'note': 'empty playlist', + 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', + 'info_dict': { + 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', + 'uploader': 'Sergey M.', + 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', + 'title': 'youtube-dl empty playlist', + }, + 'playlist_count': 0, + }, { + 'note': 'Home tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Home', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 2, + }, { + 'note': 'Videos tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Videos', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 975, + }, { + 'note': 'Videos tab, sorted by popular', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Videos', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 199, + }, { + 'note': 'Playlists tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Playlists', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 17, + }, { + 'note': 'Community tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Community', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 18, + }, { + 'note': 'Channels tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Channels', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 12, + }, { + 'note': 'Search tab', + 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', + 'playlist_mincount': 40, + 'info_dict': { + 'id': 'UCYO_jab_esuFRV4b17AJtAw', + 'title': '3Blue1Brown - Search - linear algebra', + 'description': 'md5:e1384e8a133307dd10edee76e875d62f', + 'uploader': '3Blue1Brown', + 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', + }, + }, { + 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', + 'only_matching': True, + }, { + 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', + 'only_matching': True, + }, { + 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', + 'only_matching': True, + }, { + 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', + 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', + 'info_dict': { + 'title': '29C3: Not my department', + 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', + 'uploader': 'Christiaan008', + 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg', + 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268', + }, + 'playlist_count': 96, + }, { + 'note': 'Large playlist', + 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', + 'info_dict': { + 'title': 'Uploads from Cauchemar', + 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', + 'uploader': 'Cauchemar', + 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q', + }, + 'playlist_mincount': 1123, + }, { + 'note': 'even larger playlist, 8832 videos', + 'url': 'http://www.youtube.com/user/NASAgovVideo/videos', + 'only_matching': True, + }, { + 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', + 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', + 'info_dict': { + 'title': 'Uploads from Interstellar Movie', + 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', + 'uploader': 'Interstellar Movie', + 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA', + }, + 'playlist_mincount': 21, + }, { + 'note': 'Playlist with "show unavailable videos" button', + 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q', + 'info_dict': { + 'title': 'Uploads from Phim Siêu Nhân Nhật Bản', + 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q', + 'uploader': 'Phim Siêu Nhân Nhật Bản', + 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', + }, + 'playlist_mincount': 200, + }, { + 'note': 'Playlist with unavailable videos in page 7', + 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w', + 'info_dict': { + 'title': 'Uploads from BlankTV', + 'id': 'UU8l9frL61Yl5KFOl87nIm2w', + 'uploader': 'BlankTV', + 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w', + }, + 'playlist_mincount': 1000, + }, { + 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844', + 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'info_dict': { + 'title': 'Data Analysis with Dr Mike Pound', + 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA', + 'uploader': 'Computerphile', + 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487', + }, + 'playlist_mincount': 11, + }, { + 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', + 'only_matching': True, + }, { + 'note': 'Playlist URL that does not actually serve a playlist', + 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', + 'info_dict': { + 'id': 'FqZTN594JQw', + 'ext': 'webm', + 'title': "Smiley's People 01 detective, Adventure Series, Action", + 'uploader': 'STREEM', + 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', + 'upload_date': '20150526', + 'license': 'Standard YouTube License', + 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', + 'categories': ['People & Blogs'], + 'tags': list, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'This video is not available.', + 'add_ie': [YoutubeIE.ie_key()], + }, { + 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', + 'info_dict': { + 'id': '3yImotZU3tw', # This will keep changing + 'ext': 'mp4', + 'title': compat_str, + 'uploader': 'Sky News', + 'uploader_id': 'skynews', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews', + 'upload_date': r're:\d{8}', + 'description': compat_str, + 'categories': ['News & Politics'], + 'tags': list, + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '], + }, { + 'url': 'https://www.youtube.com/user/TheYoungTurks/live', + 'info_dict': { + 'id': 'a48o2S1cPoo', + 'ext': 'mp4', + 'title': 'The Young Turks - Live Main Show', + 'uploader': 'The Young Turks', + 'uploader_id': 'TheYoungTurks', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', + 'upload_date': '20150715', + 'license': 'Standard YouTube License', + 'description': 'md5:438179573adcdff3c97ebb1ee632b891', + 'categories': ['News & Politics'], + 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', + 'only_matching': True, + }, { + 'note': 'A channel that is not live. Should raise error', + 'url': 'https://www.youtube.com/user/numberphile/live', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/trending', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/library', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/history', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/subscriptions', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/watch_later', + 'only_matching': True, + }, { + 'note': 'Recommended - redirects to home page.', + 'url': 'https://www.youtube.com/feed/recommended', + 'only_matching': True, + }, { + 'note': 'inline playlist with not always working continuations', + 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/course', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/zsecurity', + 'only_matching': True, + }, { + 'url': 'http://www.youtube.com/NASAgovVideo/videos', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/TheYoungTurks/live', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/hashtag/cctv9', + 'info_dict': { + 'id': 'cctv9', + 'title': '#cctv9', + }, + 'playlist_mincount': 350, + }, { + 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU', + 'only_matching': True, + }, { + 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist', + 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'only_matching': True + }, { + 'note': '/browse/ should redirect to /channel/', + 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', + 'only_matching': True + }, { + 'note': 'VLPL, should redirect to playlist?list=PL...', + 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'info_dict': { + 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'uploader': 'NoCopyrightSounds', + 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'title': 'NCS Releases', + }, + 'playlist_mincount': 166, + }, { + 'note': 'Topic, should redirect to playlist?list=UU...', + 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', + 'info_dict': { + 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', + 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', + 'title': 'Uploads from Royalty Free Music - Topic', + 'uploader': 'Royalty Free Music - Topic', + }, + 'expected_warnings': [ + 'A channel/user page was given', + 'The URL does not have a videos tab', + ], + 'playlist_mincount': 101, + }, { + 'note': 'Topic without a UU playlist', + 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', + 'info_dict': { + 'id': 'UCtFRv9O2AHqOZjjynzrv-xg', + 'title': 'UCtFRv9O2AHqOZjjynzrv-xg', + }, + 'expected_warnings': [ + 'A channel/user page was given', + 'The URL does not have a videos tab', + 'Falling back to channel URL', + ], + 'playlist_mincount': 9, + }, { + 'note': 'Youtube music Album', + 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE', + 'info_dict': { + 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', + 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', + }, + 'playlist_count': 50, + }, { + 'note': 'unlisted single video playlist', + 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', + 'info_dict': { + 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q', + 'uploader': 'colethedj', + 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', + 'title': 'yt-dlp unlisted playlist test', + 'availability': 'unlisted' + }, + 'playlist_count': 1, + }, { + 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', + 'url': 'https://www.youtube.com/feed/recommended', + 'info_dict': { + 'id': 'recommended', + 'title': 'recommended', + }, + 'playlist_mincount': 50, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }, { + 'note': 'API Fallback: /videos tab, sorted by oldest first', + 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid', + 'info_dict': { + 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', + 'title': 'Cody\'sLab - Videos', + 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa', + 'uploader': 'Cody\'sLab', + 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', + }, + 'playlist_mincount': 650, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }, { + 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', + 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', + 'info_dict': { + 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', + 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', + 'title': 'Uploads from Royalty Free Music - Topic', + 'uploader': 'Royalty Free Music - Topic', + }, + 'expected_warnings': [ + 'A channel/user page was given', + 'The URL does not have a videos tab', + ], + 'playlist_mincount': 101, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }] + + @classmethod + def suitable(cls, url): + return False if YoutubeIE.suitable(url) else super( + YoutubeTabIE, cls).suitable(url) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -4506,77 +4565,24 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): ie=YoutubeTabIE.ie_key()) -class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): - IE_DESC = 'YouTube searches' +class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): + IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' _SEARCH_PARAMS = None _TESTS = [] - def _search_results(self, query): - data = {'query': query} - if self._SEARCH_PARAMS: - data['params'] = self._SEARCH_PARAMS - continuation = {} - for page_num in itertools.count(1): - data.update(continuation) - search = self._extract_response( - item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - check_get_keys=('contents', 'onResponseReceivedCommands') - ) - if not search: - break - slr_contents = try_get( - search, - (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], - lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), - list) - if not slr_contents: - break - - # Youtube sometimes adds promoted content to searches, - # changing the index location of videos and token. - # So we search through all entries till we find them. - continuation = None - for slr_content in slr_contents: - if not continuation: - continuation = self._extract_continuation({'contents': [slr_content]}) - - isr_contents = try_get( - slr_content, - lambda x: x['itemSectionRenderer']['contents'], - list) - if not isr_contents: - continue - for content in isr_contents: - if not isinstance(content, dict): - continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - - yield self._extract_video(video) - - if not continuation: - break - - -class YoutubeSearchDateIE(YoutubeSearchIE): +class YoutubeSearchDateIE(SearchInfoExtractor, YoutubeTabBaseInfoExtractor): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' - IE_DESC = 'YouTube searches, newest videos first' + IE_DESC = 'YouTube search, newest videos first' _SEARCH_PARAMS = 'CAI%3D' -class YoutubeSearchURLIE(YoutubeSearchIE): +class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): IE_DESC = 'YouTube search URLs with sorting and filter support' IE_NAME = YoutubeSearchIE.IE_NAME + '_url' - _SEARCH_KEY = None _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)' - # _MAX_RESULTS = 100 _TESTS = [{ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'playlist_mincount': 5, @@ -4589,15 +4595,10 @@ class YoutubeSearchURLIE(YoutubeSearchIE): 'only_matching': True, }] - @classmethod - def _make_valid_url(cls): - return cls._VALID_URL - def _real_extract(self, url): qs = parse_qs(url) query = (qs.get('search_query') or qs.get('q'))[0] - self._SEARCH_PARAMS = qs.get('sp', ('',))[0] - return self._get_n_results(query, self._MAX_RESULTS) + return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query) class YoutubeFeedsInfoExtractor(YoutubeTabIE): From a61fd4cf6fa23b05729396ae342a5fe9785c231f Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 09:27:59 +0530 Subject: [PATCH 0278/2552] [youtube:search_url] Add playlist/channel support Closes #1213, #1214 --- yt_dlp/extractor/youtube.py | 57 +++++++++++++------------------------ 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index a8d515f5c..ba135613b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3117,6 +3117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return info + class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): def _extract_channel_id(self, webpage): @@ -3326,6 +3327,8 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'shelfRenderer': lambda x: self._shelf_entries(x), 'backstagePostThreadRenderer': self._post_thread_entries, 'videoRenderer': lambda x: [self._video_entry(x)], + 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}), + 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}), } for key, renderer in isr_content.items(): if key not in known_renderers: @@ -3744,50 +3747,19 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): params = self._SEARCH_PARAMS if params: data['params'] = params - continuation = {} + continuation_list = [None] for page_num in itertools.count(1): - data.update(continuation) + data.update(continuation_list[0] or {}) search = self._extract_response( item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - check_get_keys=('contents', 'onResponseReceivedCommands') - ) - if not search: - break + check_get_keys=('contents', 'onResponseReceivedCommands')) slr_contents = try_get( search, (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), list) - if not slr_contents: - break - - # Youtube sometimes adds promoted content to searches, - # changing the index location of videos and token. - # So we search through all entries till we find them. - continuation = None - for slr_content in slr_contents: - if not continuation: - continuation = self._extract_continuation({'contents': [slr_content]}) - - isr_contents = try_get( - slr_content, - lambda x: x['itemSectionRenderer']['contents'], - list) - if not isr_contents: - continue - for content in isr_contents: - if not isinstance(content, dict): - continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - - yield self._extract_video(video) - - if not continuation: + yield from self._extract_entries({'contents': slr_contents}, continuation_list) + if not continuation_list[0]: break @@ -4569,14 +4541,15 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' - _SEARCH_PARAMS = None + _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only _TESTS = [] + class YoutubeSearchDateIE(SearchInfoExtractor, YoutubeTabBaseInfoExtractor): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' IE_DESC = 'YouTube search, newest videos first' - _SEARCH_PARAMS = 'CAI%3D' + _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): @@ -4590,6 +4563,14 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', } + }, { + 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'python', + 'title': 'python', + } + }, { 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', 'only_matching': True, From fec41d17a587ff18f375c9ec96ee8bc748b57236 Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Wed, 24 Nov 2021 03:01:49 -0500 Subject: [PATCH 0279/2552] [MTV] Improve mgid extraction (#1713) Original PR: https://github.com/ytdl-org/youtube-dl/pull/30149 Fixes: #713, #1580, https://github.com/ytdl-org/youtube-dl/issues/30139 Authored by: Sipherdrakon, kikuyan --- yt_dlp/extractor/mtv.py | 20 +++++++++++--------- yt_dlp/extractor/southpark.py | 17 ++++++++--------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index 4812f11cc..be5de0a70 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -306,21 +306,23 @@ class MTVServicesInfoExtractor(InfoExtractor): if not mgid: mgid = self._extract_triforce_mgid(webpage) - if not mgid: - mgid = self._search_regex( - r'"videoConfig":{"videoId":"(mgid:.*?)"', webpage, 'mgid', default=None) - - if not mgid: - mgid = self._search_regex( - r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None) - if not mgid: data = self._parse_json(self._search_regex( r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None) main_container = self._extract_child_with_type(data, 'MainContainer') ab_testing = self._extract_child_with_type(main_container, 'ABTesting') video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer') - mgid = video_player['props']['media']['video']['config']['uri'] + if video_player: + mgid = try_get(video_player, lambda x: x['props']['media']['video']['config']['uri']) + else: + flex_wrapper = self._extract_child_with_type(ab_testing or main_container, 'FlexWrapper') + auth_suite_wrapper = self._extract_child_with_type(flex_wrapper, 'AuthSuiteWrapper') + player = self._extract_child_with_type(auth_suite_wrapper or flex_wrapper, 'Player') + if player: + mgid = try_get(player, lambda x: x['props']['videoDetail']['mgid']) + + if not mgid: + raise ExtractorError('Could not extract mgid') return mgid diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index d49749467..942a52dcf 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -6,19 +6,18 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/((?:video-)?clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' _TESTS = [{ - 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', + 'url': 'https://southpark.cc.com/video-clips/d7wr06/south-park-you-all-agreed-to-counseling', 'info_dict': { - 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', - 'title': 'South Park|Bat Daded', - 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', - 'timestamp': 1112760000, - 'upload_date': '20050406', + 'title': 'You All Agreed to Counseling', + 'description': 'Kenny, Cartman, Stan, and Kyle visit Mr. Mackey and ask for his help getting Mrs. Nelson to come back. Mr. Mackey reveals the only way to get things back to normal is to get the teachers vaccinated.', + 'timestamp': 1615352400, + 'upload_date': '20210310', }, }, { 'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1', @@ -40,11 +39,11 @@ class SouthParkIE(MTVServicesInfoExtractor): class SouthParkEsIE(SouthParkIE): IE_NAME = 'southpark.cc.com:español' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/es/episodios/(?P<id>.+?)(\?|#|$))' _LANG = 'es' _TESTS = [{ - 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', + 'url': 'http://southpark.cc.com/es/episodios/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', 'info_dict': { 'title': 'Cartman Consigue Una Sonda Anal', 'description': 'Cartman Consigue Una Sonda Anal', From da27aeea5c4eb8e381b8cb34d3ead8c6487d1e67 Mon Sep 17 00:00:00 2001 From: Tim <staubichsauger@t-online.de> Date: Wed, 24 Nov 2021 11:08:58 +0100 Subject: [PATCH 0280/2552] [ITV] Fix extractor (#1776) Closes #1775 Authored by: staubichsauger --- yt_dlp/extractor/itv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 5f1d306f6..bdd6af688 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -147,7 +147,7 @@ class ITVIE(InfoExtractor): platform_tag_video, featureset_video = next( ((platform_tag, featureset) for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets - if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']), + if set(try_get(featureset, lambda x: x[:2]) or []) == {'aes', 'hls'}), (None, None)) if not platform_tag_video or not featureset_video: raise ExtractorError('No downloads available', expected=True, video_id=video_id) From 17b454066224453b0adc795c5a990b35b97c9ffb Mon Sep 17 00:00:00 2001 From: Aurora <nyaurora@disroot.org> Date: Wed, 24 Nov 2021 10:47:53 +0000 Subject: [PATCH 0281/2552] [radiozet] Add extractor (#1593) Authored by: 0xA7404A (Aurora) --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/radiozet.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/radiozet.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a0f4908f0..4dda3705a 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1168,6 +1168,7 @@ from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE +from .radiozet import RadioZetPodcastIE from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py new file mode 100644 index 000000000..2e1ff36c2 --- /dev/null +++ b/yt_dlp/extractor/radiozet.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import ( + traverse_obj, + strip_or_none, +) + + +class RadioZetPodcastIE(InfoExtractor): + _VALID_URL = r'https?://player\.radiozet\.pl\/Podcasty/.*?/(?P<id>.+)' + _TEST = { + 'url': 'https://player.radiozet.pl/Podcasty/Nie-Ma-Za-Co/O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'md5': 'e03665c316b4fbc5f6a8f232948bbba3', + 'info_dict': { + 'id': '42154', + 'display_id': 'O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'title': 'O przedmiotach szkolnych, które przydają się w życiu', + 'description': 'md5:fa72bed49da334b09e5b2f79851f185c', + 'release_timestamp': 1592985480, + 'ext': 'mp3', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 83, + 'series': 'Nie Ma Za Co', + 'creator': 'Katarzyna Pakosińska', + } + } + + def _call_api(self, podcast_id, display_id): + return self._download_json( + f'https://player.radiozet.pl/api/podcasts/getPodcast/(node)/{podcast_id}/(station)/radiozet', + display_id) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + podcast_id = self._html_search_regex(r'<div.*?\sid="player".*?\sdata-id=[\'"]([^\'"]+)[\'"]', + webpage, 'podcast id') + data = self._call_api(podcast_id, display_id)['data'][0] + + return { + 'id': podcast_id, + 'display_id': display_id, + 'title': strip_or_none(data.get('title')), + 'description': strip_or_none(traverse_obj(data, ('program', 'desc'))), + 'release_timestamp': data.get('published_date'), + 'url': traverse_obj(data, ('player', 'stream')), + 'thumbnail': traverse_obj(data, ('program', 'image', 'original')), + 'duration': traverse_obj(data, ('player', 'duration')), + 'series': strip_or_none(traverse_obj(data, ('program', 'title'))), + 'creator': strip_or_none(traverse_obj(data, ('presenter', 0, 'title'))), + } From eb56d132d21752fa50e0dd2c3bfa3d983ad48655 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 18:22:42 +0530 Subject: [PATCH 0282/2552] [cleanup,instagram] Refactor extractors Closes #1561 --- yt_dlp/extractor/instagram.py | 285 ++++++++++++++-------------------- 1 file changed, 114 insertions(+), 171 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 0e726423e..1fcf97a19 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,5 +1,4 @@ # coding: utf-8 -from __future__ import unicode_literals import itertools import hashlib @@ -9,7 +8,6 @@ import time from .common import InfoExtractor from ..compat import ( - compat_str, compat_HTTPError, ) from ..utils import ( @@ -19,9 +17,8 @@ from ..utils import ( int_or_none, lowercase_escape, std_headers, - try_get, + traverse_obj, url_or_none, - variadic, urlencode_postdata, ) @@ -72,6 +69,58 @@ class InstagramBaseIE(InfoExtractor): def _real_initialize(self): self._login() + def _get_count(self, media, kind, *keys): + return traverse_obj( + media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys), + expected_type=int_or_none) + + def _get_dimension(self, name, media, webpage=None): + return ( + traverse_obj(media, ('dimensions', name), expected_type=int_or_none) + or int_or_none(self._html_search_meta( + (f'og:video:{name}', f'video:{name}'), webpage or '', default=None))) + + def _extract_nodes(self, nodes, is_direct=False): + for idx, node in enumerate(nodes, start=1): + if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: + continue + + video_id = node.get('shortcode') + + if is_direct: + info = { + 'id': video_id or node['id'], + 'url': node.get('video_url'), + 'width': self._get_dimension('width', node), + 'height': self._get_dimension('height', node), + 'http_headers': { + 'Referer': 'https://www.instagram.com/', + } + } + elif not video_id: + continue + else: + info = { + '_type': 'url', + 'ie_key': 'Instagram', + 'id': video_id, + 'url': f'https://instagram.com/p/{video_id}', + } + + yield { + **info, + 'title': node.get('title') or (f'Video {idx}' if is_direct else None), + 'description': traverse_obj( + node, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str), + 'thumbnail': traverse_obj( + node, 'display_url', 'thumbnail_src', 'display_src', expected_type=url_or_none), + 'duration': float_or_none(node.get('video_duration')), + 'timestamp': int_or_none(node.get('taken_at_timestamp')), + 'view_count': int_or_none(node.get('video_view_count')), + 'comment_count': self._get_count(node, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'), + 'like_count': self._get_count(node, 'likes', 'preview_like'), + } + class InstagramIOSIE(InfoExtractor): IE_DESC = 'IOS instagram:// URL' @@ -234,29 +283,22 @@ class InstagramIE(InstagramBaseIE): return mobj.group('link') def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - url = mobj.group('url') - + video_id, url = self._match_valid_url(url).group('id', 'url') webpage, urlh = self._download_webpage_handle(url, video_id) - if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'): + if 'www.instagram.com/accounts/login' in urlh.geturl(): self.raise_login_required('You need to log in to access this content') - (media, video_url, description, thumbnails, timestamp, uploader, - uploader_id, like_count, comment_count, comments, height, - width) = [None] * 12 - shared_data = self._parse_json( self._search_regex( r'window\._sharedData\s*=\s*({.+?});', webpage, 'shared data', default='{}'), video_id, fatal=False) - if shared_data: - media = try_get( - shared_data, - (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], - lambda x: x['entry_data']['PostPage'][0]['media']), - dict) + media = traverse_obj( + shared_data, + ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'), + ('entry_data', 'PostPage', 0, 'media'), + expected_type=dict) + # _sharedData.entry_data.PostPage is empty when authenticated (see # https://github.com/ytdl-org/youtube-dl/pull/22880) if not media: @@ -265,125 +307,71 @@ class InstagramIE(InstagramBaseIE): r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;', webpage, 'additional data', default='{}'), video_id, fatal=False) - if additional_data: - media = try_get( - additional_data, lambda x: x['graphql']['shortcode_media'], - dict) - if media: - video_url = media.get('video_url') - height = int_or_none(self._html_search_meta(('og:video:height', 'video:height'), webpage)) or try_get(media, lambda x: x['dimensions']['height']) - width = int_or_none(self._html_search_meta(('og:video:width', 'video:width'), webpage)) or try_get(media, lambda x: x['dimensions']['width']) - description = try_get( - media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], - compat_str) or media.get('caption') - title = media.get('title') - display_resources = media.get('display_resources') - if not display_resources: - display_resources = [{'src': media.get('display_src')}, {'src': media.get('display_url')}] - duration = float_or_none(media.get('video_duration')) - timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) - uploader = try_get(media, lambda x: x['owner']['full_name']) - uploader_id = try_get(media, lambda x: x['owner']['username']) - - def get_count(keys, kind): - for key in variadic(keys): - count = int_or_none(try_get( - media, (lambda x: x['edge_media_%s' % key]['count'], - lambda x: x['%ss' % kind]['count']))) - if count is not None: - return count - - like_count = get_count('preview_like', 'like') - comment_count = get_count( - ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment') - - thumbnails = [{ - 'url': thumbnail['src'], - 'width': thumbnail.get('config_width'), - 'height': thumbnail.get('config_height'), - } for thumbnail in display_resources if thumbnail.get('src')] - - comments = [] - for comment in try_get(media, lambda x: x['edge_media_to_parent_comment']['edges']): - comment_dict = comment.get('node', {}) - comment_text = comment_dict.get('text') - if comment_text: - comments.append({ - 'author': try_get(comment_dict, lambda x: x['owner']['username']), - 'author_id': try_get(comment_dict, lambda x: x['owner']['id']), - 'id': comment_dict.get('id'), - 'text': comment_text, - 'timestamp': int_or_none(comment_dict.get('created_at')), - }) - if not video_url: - edges = try_get( - media, lambda x: x['edge_sidecar_to_children']['edges'], - list) or [] - if edges: - entries = [] - for edge_num, edge in enumerate(edges, start=1): - node = try_get(edge, lambda x: x['node'], dict) - if not node: - continue - node_video_url = url_or_none(node.get('video_url')) - if not node_video_url: - continue - entries.append({ - 'id': node.get('shortcode') or node['id'], - 'title': node.get('title') or 'Video %d' % edge_num, - 'url': node_video_url, - 'thumbnail': node.get('display_url'), - 'duration': float_or_none(node.get('video_duration')), - 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])), - 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])), - 'view_count': int_or_none(node.get('video_view_count')), - }) - return self.playlist_result( - entries, video_id, - 'Post by %s' % uploader_id if uploader_id else None, - description) + media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), expected_type=dict) or {} + + uploader_id = traverse_obj(media, ('owner', 'username')) or self._search_regex( + r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'uploader id', fatal=False) + + description = ( + traverse_obj(media, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str) + or media.get('caption')) + if not description: + description = self._search_regex( + r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) + if description is not None: + description = lowercase_escape(description) + video_url = media.get('video_url') if not video_url: + nodes = traverse_obj(media, ('edge_sidecar_to_children', 'edges', ..., 'node'), expected_type=dict) or [] + if nodes: + return self.playlist_result( + self._extract_nodes(nodes, True), video_id, + 'Post by %s' % uploader_id if uploader_id else None, description) + video_url = self._og_search_video_url(webpage, secure=False) formats = [{ 'url': video_url, - 'width': width, - 'height': height, + 'width': self._get_dimension('width', media, webpage), + 'height': self._get_dimension('height', media, webpage), }] - dash = try_get(media, lambda x: x['dash_info']['video_dash_manifest']) + dash = traverse_obj(media, ('dash_info', 'video_dash_manifest')) if dash: formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) self._sort_formats(formats) - if not uploader_id: - uploader_id = self._search_regex( - r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', - webpage, 'uploader id', fatal=False) - - if not description: - description = self._search_regex( - r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) - if description is not None: - description = lowercase_escape(description) - - if not thumbnails: - thumbnails = self._og_search_thumbnail(webpage) + comments = [{ + 'author': traverse_obj(comment_dict, ('node', 'owner', 'username')), + 'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id')), + 'id': traverse_obj(comment_dict, ('node', 'id')), + 'text': traverse_obj(comment_dict, ('node', 'text')), + 'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), expected_type=int_or_none), + } for comment_dict in traverse_obj(media, ('edge_media_to_parent_comment', 'edges'))] + + display_resources = ( + media.get('display_resources') + or [{'src': media.get(key)} for key in ('display_src', 'display_url')] + or [{'src': self._og_search_thumbnail(webpage)}]) + thumbnails = [{ + 'url': thumbnail['src'], + 'width': thumbnail.get('config_width'), + 'height': thumbnail.get('config_height'), + } for thumbnail in display_resources if thumbnail.get('src')] return { 'id': video_id, 'formats': formats, - 'ext': 'mp4', - 'title': title or 'Video by %s' % uploader_id, + 'title': media.get('title') or 'Video by %s' % uploader_id, 'description': description, - 'duration': duration, - 'thumbnails': thumbnails, - 'timestamp': timestamp, + 'duration': float_or_none(media.get('video_duration')), + 'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none), 'uploader_id': uploader_id, - 'uploader': uploader, - 'like_count': like_count, - 'comment_count': comment_count, + 'uploader': traverse_obj(media, ('owner', 'full_name')), + 'like_count': self._get_count(media, 'likes', 'preview_like'), + 'comment_count': self._get_count(media, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'), 'comments': comments, + 'thumbnails': thumbnails, 'http_headers': { 'Referer': 'https://www.instagram.com/', } @@ -402,10 +390,6 @@ class InstagramPlaylistBaseIE(InstagramBaseIE): def _extract_graphql(self, data, url): # Parses GraphQL queries containing videos and generates a playlist. - def get_count(suffix): - return int_or_none(try_get( - node, lambda x: x['edge_media_' + suffix]['count'])) - uploader_id = self._match_id(url) csrf_token = data['config']['csrf_token'] rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8' @@ -454,55 +438,14 @@ class InstagramPlaylistBaseIE(InstagramBaseIE): continue raise - edges = media.get('edges') - if not edges or not isinstance(edges, list): - break - - for edge in edges: - node = edge.get('node') - if not node or not isinstance(node, dict): - continue - if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: - continue - video_id = node.get('shortcode') - if not video_id: - continue - - info = self.url_result( - 'https://instagram.com/p/%s/' % video_id, - ie=InstagramIE.ie_key(), video_id=video_id) - - description = try_get( - node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], - compat_str) - thumbnail = node.get('thumbnail_src') or node.get('display_src') - timestamp = int_or_none(node.get('taken_at_timestamp')) - - comment_count = get_count('to_comment') - like_count = get_count('preview_like') - view_count = int_or_none(node.get('video_view_count')) - - info.update({ - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'comment_count': comment_count, - 'like_count': like_count, - 'view_count': view_count, - }) - - yield info - - page_info = media.get('page_info') - if not page_info or not isinstance(page_info, dict): - break - - has_next_page = page_info.get('has_next_page') - if not has_next_page: + nodes = traverse_obj(media, ('edges', ..., 'node'), expected_type=dict) or [] + if not nodes: break + yield from self._extract_nodes(nodes) - cursor = page_info.get('end_cursor') - if not cursor or not isinstance(cursor, compat_str): + has_next_page = traverse_obj(media, ('page_info', 'has_next_page')) + cursor = traverse_obj(media, ('page_info', 'end_cursor'), expected_type=str) + if not has_next_page or not cursor: break def _real_extract(self, url): From 883ecd54949fa90174094628bf002f179edf6767 Mon Sep 17 00:00:00 2001 From: cntrl-s <65956966+cntrl-s@users.noreply.github.com> Date: Sat, 27 Nov 2021 00:05:39 +0530 Subject: [PATCH 0283/2552] Streamff extractor (#1736) Closes #1359 Authored by: cntrl-s --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/streamff.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 yt_dlp/extractor/streamff.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4dda3705a..163efc748 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1404,6 +1404,7 @@ from .streamable import StreamableIE from .streamanity import StreamanityIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE +from .streamff import StreamFFIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE from .stripchat import StripchatIE diff --git a/yt_dlp/extractor/streamff.py b/yt_dlp/extractor/streamff.py new file mode 100644 index 000000000..6b190bb3b --- /dev/null +++ b/yt_dlp/extractor/streamff.py @@ -0,0 +1,31 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import int_or_none, parse_iso8601 + + +class StreamFFIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?streamff\.com/v/(?P<id>[a-zA-Z0-9]+)' + + _TESTS = [{ + 'url': 'https://streamff.com/v/55cc94', + 'md5': '8745a67bb5e5c570738efe7983826370', + 'info_dict': { + 'id': '55cc94', + 'ext': 'mp4', + 'title': '55cc94', + 'timestamp': 1634764643, + 'upload_date': '20211020', + 'view_count': int, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json(f'https://streamff.com/api/videos/{video_id}', video_id) + return { + 'id': video_id, + 'title': json_data.get('name') or video_id, + 'url': 'https://streamff.com/%s' % json_data['videoLink'], + 'view_count': int_or_none(json_data.get('views')), + 'timestamp': parse_iso8601(json_data.get('date')), + } From 18d6dd4e0194211c4f3238fe441ebe0c1fdbc167 Mon Sep 17 00:00:00 2001 From: Grabien <60237587+Grabien@users.noreply.github.com> Date: Fri, 26 Nov 2021 21:00:04 +0200 Subject: [PATCH 0284/2552] [extractor/breitbart] Breitbart.com website support (#1434) Authored by: Grabien --- yt_dlp/extractor/breitbart.py | 39 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 40 insertions(+) create mode 100644 yt_dlp/extractor/breitbart.py diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py new file mode 100644 index 000000000..f50f719dc --- /dev/null +++ b/yt_dlp/extractor/breitbart.py @@ -0,0 +1,39 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class BreitBartIE(InfoExtractor): + _VALID_URL = r'https?:\/\/(?:www\.)breitbart.com/videos/v/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.breitbart.com/videos/v/5cOz1yup/?pl=Ij6NDOji', + 'md5': '0aa6d1d6e183ac5ca09207fe49f17ade', + 'info_dict': { + 'id': '5cOz1yup', + 'ext': 'mp4', + 'title': 'Watch \u2013 Clyburn: Statues in Congress Have to Go Because they Are Honoring Slavery', + 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', + 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', + 'age_limit': 0, + } + }, { + 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + formats = self._extract_m3u8_formats(f'https://cdn.jwplayer.com/manifests/{video_id}.m3u8', video_id, ext='mp4') + self._sort_formats(formats) + return { + 'id': video_id, + 'title': self._og_search_title( + webpage, default=None) or self._html_search_regex( + r'(?s)<title>(.*?)', webpage, 'video title'), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'age_limit': self._rta_search(webpage), + 'formats': formats + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 163efc748..ed8a23e72 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -179,6 +179,7 @@ from .br import ( ) from .bravotv import BravoTVIE from .breakcom import BreakIE +from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, From cf1f13b817d88eb7d4b449f20cbad3215030e35f Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Sat, 27 Nov 2021 00:15:59 -0500 Subject: [PATCH 0285/2552] [generic] Support mpd manifests without extension (#1806) Authored by: shirt-dev --- yt_dlp/extractor/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 9c7fa4a21..ae0ebb14a 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2601,6 +2601,8 @@ class GenericIE(InfoExtractor): subtitles = {} if format_id.endswith('mpegurl'): formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') + elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): + formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id) elif format_id == 'f4m': formats = self._extract_f4m_formats(url, video_id) else: From 3938a9212c3d1aa30a7f6db12b997d94afd8b646 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 27 Nov 2021 12:01:42 +0530 Subject: [PATCH 0286/2552] [CPTwentyFour] Add extractor (#1769) Closes #1768 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 5 ++++- yt_dlp/extractor/ninecninemedia.py | 35 +++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ed8a23e72..a277bf722 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -943,7 +943,10 @@ from .niconico import ( NicovideoSearchIE, NicovideoSearchURLIE, ) -from .ninecninemedia import NineCNineMediaIE +from .ninecninemedia import ( + NineCNineMediaIE, + CPTwentyFourIE, +) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 4aaf21a12..781842721 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, @@ -99,3 +98,37 @@ class NineCNineMediaIE(InfoExtractor): } return info + + +class CPTwentyFourIE(InfoExtractor): + IE_NAME = 'cp24' + _GEO_COUNTRIES = ['CA'] + _VALID_URL = r'https?://(?:www\.)?cp24\.com/news/(?P[^?#]+)' + + _TESTS = [{ + 'url': 'https://www.cp24.com/news/video-shows-atm-being-ripped-out-of-business-by-pickup-truck-driver-in-mississauga-1.5676877', + 'info_dict': { + 'id': '2328005', + 'ext': 'mp4', + 'title': 'WATCH: Truck rips ATM from Mississauga business', + 'description': 'md5:cf7498480885f080a754389a2b2f7073', + 'timestamp': 1637618377, + 'episode_number': None, + 'season': 'Season 0', + 'season_number': 0, + 'season_id': 57974, + 'series': 'CTV News Toronto', + 'duration': 26.86, + 'thumbnail': 'http://images2.9c9media.com/image_asset/2014_11_5_2eb609a0-475b-0132-fbd6-34b52f6f1279_jpg_2000x1125.jpg', + 'upload_date': '20211122', + }, + 'params': {'skip_download': True, 'format': 'bv'} + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + id, destination = self._search_regex( + r'getAuthStates\("(?P[^"]+)",\s?"(?P[^"]+)"\);', + webpage, 'video id and destination', group=('id', 'destination')) + return self.url_result(f'9c9media:{destination}:{id}', ie=NineCNineMediaIE.ie_key(), video_id=id) From 359df0fc423b4a5d5af8113d42648fdea22e81ea Mon Sep 17 00:00:00 2001 From: Henrik Heimbuerger Date: Sat, 27 Nov 2021 07:51:32 +0100 Subject: [PATCH 0287/2552] [nebula] Add NebulaCollectionIE and rewrite extractor (#1694) Closes #1690 Authored by: hheimbuerger --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/nebula.py | 370 +++++++++++++++++++-------------- 2 files changed, 215 insertions(+), 160 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a277bf722..2fb9515c0 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -889,7 +889,10 @@ from .ndr import ( NJoyEmbedIE, ) from .ndtv import NDTVIE -from .nebula import NebulaIE +from .nebula import ( + NebulaIE, + NebulaCollectionIE, +) from .nerdcubed import NerdCubedFeedIE from .netzkino import NetzkinoIE from .neteasemusic import ( diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 9698a358e..d235805c3 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -1,22 +1,163 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import json import time +import urllib -from urllib.error import HTTPError -from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote from ..utils import ( ExtractorError, parse_iso8601, try_get, - urljoin, ) +from .common import InfoExtractor + + +class NebulaBaseIE(InfoExtractor): + _NETRC_MACHINE = 'watchnebula' + + _nebula_api_token = None + _nebula_bearer_token = None + _zype_access_token = None + + def _perform_nebula_auth(self): + username, password = self._get_login_info() + if not (username and password): + self.raise_login_required() + + data = json.dumps({'email': username, 'password': password}).encode('utf8') + response = self._download_json( + 'https://api.watchnebula.com/api/v1/auth/login/', + data=data, fatal=False, video_id=None, + headers={ + 'content-type': 'application/json', + # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint + 'cookie': '' + }, + note='Logging in to Nebula with supplied credentials', + errnote='Authentication failed or rejected') + if not response or not response.get('key'): + self.raise_login_required() + + # save nebula token as cookie + self._set_cookie( + 'nebula.app', 'nebula-auth', + urllib.parse.quote( + json.dumps({ + "apiToken": response["key"], + "isLoggingIn": False, + "isLoggingOut": False, + }, separators=(",", ":"))), + expire_time=int(time.time()) + 86400 * 365, + ) + + return response['key'] + + def _retrieve_nebula_api_token(self): + """ + Check cookie jar for valid token. Try to authenticate using credentials if no valid token + can be found in the cookie jar. + """ + nebula_cookies = self._get_cookies('https://nebula.app') + nebula_cookie = nebula_cookies.get('nebula-auth') + if nebula_cookie: + self.to_screen('Authenticating to Nebula with token from cookie jar') + nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value) + nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken') + if nebula_api_token: + return nebula_api_token + + return self._perform_nebula_auth() + def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): + assert method in ('GET', 'POST',) + assert auth_type in ('api', 'bearer',) -class NebulaIE(InfoExtractor): + def inner_call(): + authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}' + return self._download_json( + url, video_id, note=note, headers={'Authorization': authorization}, + data=b'' if method == 'POST' else None) + + try: + return inner_call() + except ExtractorError as exc: + # if 401 or 403, attempt credential re-auth and retry + if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403): + self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}') + self._login() + return inner_call() + else: + raise + + def _fetch_nebula_bearer_token(self): + """ + Get a Bearer token for the Nebula API. This will be required to fetch video meta data. + """ + response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/', + method='POST', + note='Authorizing to Nebula') + return response['token'] + def _fetch_zype_access_token(self): + """ + Get a Zype access token, which is required to access video streams -- in our case: to + generate video URLs. + """ + user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token') + + access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str) + if not access_token: + if try_get(user_object, lambda x: x['is_subscribed'], bool): + # TODO: Reimplement the same Zype token polling the Nebula frontend implements + # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 + raise ExtractorError( + 'Unable to extract Zype access token from Nebula API authentication endpoint. ' + 'Open an arbitrary video in a browser with this account to generate a token', + expected=True) + raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') + return access_token + + def _build_video_info(self, episode): + zype_id = episode['zype_id'] + zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}' + channel_slug = episode['channel_slug'] + return { + 'id': episode['zype_id'], + 'display_id': episode['slug'], + '_type': 'url_transparent', + 'ie_key': 'Zype', + 'url': zype_video_url, + 'title': episode['title'], + 'description': episode['description'], + 'timestamp': parse_iso8601(episode['published_at']), + 'thumbnails': [{ + # 'id': tn.get('name'), # this appears to be null + 'url': tn['original'], + 'height': key, + } for key, tn in episode['assets']['thumbnail'].items()], + 'duration': episode['duration'], + 'channel': episode['channel_title'], + 'channel_id': channel_slug, + 'channel_url': f'https://nebula.app/{channel_slug}', + 'uploader': episode['channel_title'], + 'uploader_id': channel_slug, + 'uploader_url': f'https://nebula.app/{channel_slug}', + 'series': episode['channel_title'], + 'creator': episode['channel_title'], + } + + def _login(self): + self._nebula_api_token = self._retrieve_nebula_api_token() + self._nebula_bearer_token = self._fetch_nebula_bearer_token() + self._zype_access_token = self._fetch_zype_access_token() + + def _real_initialize(self): + self._login() + + +class NebulaIE(NebulaBaseIE): _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P[-\w]+)' _TESTS = [ { @@ -30,12 +171,13 @@ class NebulaIE(InfoExtractor): 'upload_date': '20180731', 'timestamp': 1533009600, 'channel': 'Lindsay Ellis', + 'channel_id': 'lindsayellis', 'uploader': 'Lindsay Ellis', + 'uploader_id': 'lindsayellis', }, 'params': { 'usenetrc': True, }, - 'skip': 'All Nebula content requires authentication', }, { 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', @@ -47,13 +189,14 @@ class NebulaIE(InfoExtractor): 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', 'upload_date': '20200327', 'timestamp': 1585348140, - 'channel': 'The Logistics of D-Day', - 'uploader': 'The Logistics of D-Day', + 'channel': 'Real Engineering', + 'channel_id': 'realengineering', + 'uploader': 'Real Engineering', + 'uploader_id': 'realengineering', }, 'params': { 'usenetrc': True, }, - 'skip': 'All Nebula content requires authentication', }, { 'url': 'https://nebula.app/videos/money-episode-1-the-draw', @@ -66,173 +209,82 @@ class NebulaIE(InfoExtractor): 'upload_date': '20200323', 'timestamp': 1584980400, 'channel': 'Tom Scott Presents: Money', + 'channel_id': 'tom-scott-presents-money', 'uploader': 'Tom Scott Presents: Money', + 'uploader_id': 'tom-scott-presents-money', }, 'params': { 'usenetrc': True, }, - 'skip': 'All Nebula content requires authentication', }, { 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', 'only_matching': True, }, ] - _NETRC_MACHINE = 'watchnebula' - _nebula_token = None + def _fetch_video_metadata(self, slug): + return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/', + video_id=slug, + auth_type='bearer', + note='Fetching video meta data') - def _retrieve_nebula_auth(self): - """ - Log in to Nebula, and returns a Nebula API token - """ + def _real_extract(self, url): + slug = self._match_id(url) + video = self._fetch_video_metadata(slug) + return self._build_video_info(video) - username, password = self._get_login_info() - if not (username and password): - self.raise_login_required() - self.report_login() - data = json.dumps({'email': username, 'password': password}).encode('utf8') - response = self._download_json( - 'https://api.watchnebula.com/api/v1/auth/login/', - data=data, fatal=False, video_id=None, - headers={ - 'content-type': 'application/json', - # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint - 'cookie': '' +class NebulaCollectionIE(NebulaBaseIE): + IE_NAME = 'nebula:collection' + _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P[-\w]+)' + _TESTS = [ + { + 'url': 'https://nebula.app/tom-scott-presents-money', + 'info_dict': { + 'id': 'tom-scott-presents-money', + 'title': 'Tom Scott Presents: Money', + 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', }, - note='Authenticating to Nebula with supplied credentials', - errnote='Authentication failed or rejected') - if not response or not response.get('key'): - self.raise_login_required() - - # save nebula token as cookie - self._set_cookie( - 'nebula.app', 'nebula-auth', - compat_urllib_parse_quote( - json.dumps({ - "apiToken": response["key"], - "isLoggingIn": False, - "isLoggingOut": False, - }, separators=(",", ":"))), - expire_time=int(time.time()) + 86400 * 365, - ) - - return response['key'] - - def _retrieve_zype_api_key(self, page_url, display_id): - """ - Retrieves the Zype API key - """ - - # Find the js that has the API key from the webpage and download it - webpage = self._download_webpage(page_url, video_id=display_id) - main_script_relpath = self._search_regex( - r']*src="(?P[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage, - group='script_relpath', name='script relative path', fatal=True) - main_script_abspath = urljoin(page_url, main_script_relpath) - main_script = self._download_webpage(main_script_abspath, video_id=display_id, - note='Retrieving Zype API key') - - api_key = self._search_regex( - r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P[\w-]*)"', main_script, - group='api_key', name='API key', fatal=True) - - return api_key - - def _call_zype_api(self, path, params, video_id, api_key, note): - """ - A helper for making calls to the Zype API. - """ - query = {'api_key': api_key, 'per_page': 1} - query.update(params) - return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note) - - def _call_nebula_api(self, path, video_id, access_token, note): - """ - A helper for making calls to the Nebula API. - """ - return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={ - 'Authorization': 'Token {access_token}'.format(access_token=access_token) - }, note=note) - - def _fetch_zype_access_token(self, video_id): - try: - user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') - except ExtractorError as exc: - # if 401, attempt credential auth and retry - if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401: - self._nebula_token = self._retrieve_nebula_auth() - user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') - else: - raise - - access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str) - if not access_token: - if try_get(user_object, lambda x: x['is_subscribed'], bool): - # TODO: Reimplement the same Zype token polling the Nebula frontend implements - # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 - raise ExtractorError( - 'Unable to extract Zype access token from Nebula API authentication endpoint. ' - 'Open an arbitrary video in a browser with this account to generate a token', - expected=True) - raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') - return access_token - - def _extract_channel_title(self, video_meta): - # TODO: Implement the API calls giving us the channel list, - # so that we can do the title lookup and then figure out the channel URL - categories = video_meta.get('categories', []) if video_meta else [] - # the channel name is the value of the first category - for category in categories: - if category.get('value'): - return category['value'][0] - - def _real_initialize(self): - # check cookie jar for valid token - nebula_cookies = self._get_cookies('https://nebula.app') - nebula_cookie = nebula_cookies.get('nebula-auth') - if nebula_cookie: - self.to_screen('Authenticating to Nebula with token from cookie jar') - nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value) - self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken') + 'playlist_count': 5, + 'params': { + 'usenetrc': True, + }, + }, { + 'url': 'https://nebula.app/lindsayellis', + 'info_dict': { + 'id': 'lindsayellis', + 'title': 'Lindsay Ellis', + 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', + }, + 'playlist_mincount': 100, + 'params': { + 'usenetrc': True, + }, + }, + ] - # try to authenticate using credentials if no valid token has been found - if not self._nebula_token: - self._nebula_token = self._retrieve_nebula_auth() + def _generate_playlist_entries(self, collection_id, channel): + episodes = channel['episodes']['results'] + for page_num in itertools.count(2): + for episode in episodes: + yield self._build_video_info(episode) + next_url = channel['episodes']['next'] + if not next_url: + break + channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer', + note=f'Retrieving channel page {page_num}') + episodes = channel['episodes']['results'] def _real_extract(self, url): - display_id = self._match_id(url) - api_key = self._retrieve_zype_api_key(url, display_id) - - response = self._call_zype_api('/videos', {'friendly_title': display_id}, - display_id, api_key, note='Retrieving metadata from Zype') - if len(response.get('response') or []) != 1: - raise ExtractorError('Unable to find video on Zype API') - video_meta = response['response'][0] - - video_id = video_meta['_id'] - zype_access_token = self._fetch_zype_access_token(display_id) + collection_id = self._match_id(url) + channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/' + channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel') + channel_details = channel['details'] - channel_title = self._extract_channel_title(video_meta) - - return { - 'id': video_id, - 'display_id': display_id, - '_type': 'url_transparent', - 'ie_key': 'Zype', - 'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token), - 'title': video_meta.get('title'), - 'description': video_meta.get('description'), - 'timestamp': parse_iso8601(video_meta.get('published_at')), - 'thumbnails': [{ - 'id': tn.get('name'), # this appears to be null - 'url': tn['url'], - 'width': tn.get('width'), - 'height': tn.get('height'), - } for tn in video_meta.get('thumbnails', [])], - 'duration': video_meta.get('duration'), - 'channel': channel_title, - 'uploader': channel_title, # we chose uploader = channel name - # TODO: uploader_url, channel_id, channel_url - } + return self.playlist_result( + entries=self._generate_playlist_entries(collection_id, channel), + playlist_id=collection_id, + playlist_title=channel_details['title'], + playlist_description=channel_details['description'] + ) From 2abf0815542dd44724b577752fb9339e76816057 Mon Sep 17 00:00:00 2001 From: Yakabuff Date: Sat, 27 Nov 2021 02:04:51 -0500 Subject: [PATCH 0288/2552] [xvideos] Fix extractor (#1799) Closes #1788 Authored by: Yakabuff --- yt_dlp/extractor/xvideos.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index ef45eb929..ab07f01af 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -83,9 +83,7 @@ class XVideosIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://www.xvideos.com/video%s/' % video_id, video_id) + webpage = self._download_webpage(url, video_id) mobj = re.search(r'

(.+?)

', webpage) if mobj: From 4e4ba1d75f250240725c0012edbd88cc0a7ead4b Mon Sep 17 00:00:00 2001 From: chio0hai <94094996+chio0hai@users.noreply.github.com> Date: Sat, 27 Nov 2021 02:10:29 -0500 Subject: [PATCH 0289/2552] [redgifs] Add extractor (#1631) Closes #1504 Authored by: chio0hai --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/redgifs.py | 94 ++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 yt_dlp/extractor/redgifs.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2fb9515c0..dd9edff0e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1216,6 +1216,7 @@ from .redbulltv import ( RedBullIE, ) from .reddit import RedditIE +from .redgifs import RedGifsIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py new file mode 100644 index 000000000..1257d1344 --- /dev/null +++ b/yt_dlp/extractor/redgifs.py @@ -0,0 +1,94 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + try_get, +) + + +class RedGifsIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|thumbs2?)\.)?redgifs\.com/(?:watch/)?(?P[^-/?#\.]+)' + _FORMATS = { + 'gif': 250, + 'sd': 480, + 'hd': None, + } + _TESTS = [{ + 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + } + }, { + 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url).lower() + + video_info = self._download_json( + 'https://api.redgifs.com/v2/gifs/%s' % video_id, + video_id, 'Downloading video info') + if 'error' in video_info: + raise ExtractorError(f'RedGifs said: {video_info["error"]}', expected=True) + + gif = video_info['gif'] + urls = gif['urls'] + + quality = qualities(tuple(self._FORMATS.keys())) + + orig_height = int_or_none(gif.get('height')) + aspect_ratio = try_get(gif, lambda x: orig_height / x['width']) + + formats = [] + for format_id, height in self._FORMATS.items(): + video_url = urls.get(format_id) + if not video_url: + continue + height = min(orig_height, height or orig_height) + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': height * aspect_ratio if aspect_ratio else None, + 'height': height, + 'quality': quality(format_id), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': ' '.join(gif.get('tags') or []) or 'RedGifs', + 'timestamp': int_or_none(gif.get('createDate')), + 'uploader': gif.get('userName'), + 'duration': int_or_none(gif.get('duration')), + 'view_count': int_or_none(gif.get('views')), + 'like_count': int_or_none(gif.get('likes')), + 'categories': gif.get('tags') or [], + 'age_limit': 18, + 'formats': formats, + } From 896a88c5c61a5431222a9b3a75c2c9c5129b1bbe Mon Sep 17 00:00:00 2001 From: gustaf <86112802+18928172992817182@users.noreply.github.com> Date: Sat, 27 Nov 2021 08:24:48 +0100 Subject: [PATCH 0290/2552] [Tvplayhome] Fix extractor (#1357) Authored by: pukkandan, 18928172992817182 (gustaf) --- yt_dlp/extractor/tvplay.py | 113 ++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 47 deletions(-) diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index 9771d9108..b5dbc5526 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -12,9 +12,9 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, - parse_duration, parse_iso8601, qualities, + traverse_obj, try_get, update_url_query, url_or_none, @@ -431,77 +431,96 @@ class ViafreeIE(InfoExtractor): class TVPlayHomeIE(InfoExtractor): - _VALID_URL = r'https?://(?:tv3?)?play\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/(?:[^/]+/)*[^/?#&]+-(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?:tv3?)? + play\.(?:tv3|skaties)\.(?Plv|lt|ee)/ + (?Plives/)? + [^?#&]+(?:episode|programme|clip)-(?P\d+) + ''' _TESTS = [{ - 'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/', + 'url': 'https://play.tv3.lt/series/gauju-karai-karveliai,serial-2343791/serija-8,episode-2343828', 'info_dict': { - 'id': '366367', + 'id': '2343828', 'ext': 'mp4', - 'title': 'Aferistai', - 'description': 'Aferistai. Kalėdinė pasaka.', - 'series': 'Aferistai [N-7]', - 'season': '1 sezonas', + 'title': 'Gaujų karai. Karveliai (2021) | S01E08: Serija 8', + 'description': 'md5:f6fcfbb236429f05531131640dfa7c81', + 'duration': 2710, + 'season': 'Gaujų karai. Karveliai', 'season_number': 1, - 'duration': 464, - 'timestamp': 1394209658, - 'upload_date': '20140307', - 'age_limit': 18, + 'release_year': 2021, + 'episode': 'Serija 8', + 'episode_number': 8, }, 'params': { - 'skip_download': True, + 'skip_download': 'm3u8', }, }, { - 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/', - 'only_matching': True, + 'url': 'https://play.tv3.lt/series/moterys-meluoja-geriau-n-7,serial-2574652/serija-25,episode-3284937', + 'info_dict': { + 'id': '3284937', + 'ext': 'mp4', + 'season': 'Moterys meluoja geriau [N-7]', + 'season_number': 14, + 'release_year': 2021, + 'episode': 'Serija 25', + 'episode_number': 25, + 'title': 'Moterys meluoja geriau [N-7] (2021) | S14|E25: Serija 25', + 'description': 'md5:c6926e9710f1a126f028fbe121eddb79', + 'duration': 2440, + }, + 'skip': '404' }, { - 'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/', + 'url': 'https://play.tv3.lt/lives/tv6-lt,live-2838694/optibet-a-lygos-rungtynes-marijampoles-suduva--vilniaus-riteriai,programme-3422014', 'only_matching': True, }, { - 'url': 'https://play.tv3.lt/aferistai-10047125', + 'url': 'https://tv3play.skaties.lv/series/women-lie-better-lv,serial-1024464/women-lie-better-lv,episode-1038762', 'only_matching': True, }, { - 'url': 'https://tv3play.skaties.lv/vinas-melo-labak-10280317', + 'url': 'https://play.tv3.ee/series/_,serial-2654462/_,episode-2654474', 'only_matching': True, }, { - 'url': 'https://play.tv3.ee/cool-d-ga-mehhikosse-10044354', + 'url': 'https://tv3play.skaties.lv/clips/tv3-zinas-valsti-lidz-15novembrim-bus-majsede,clip-3464509', 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + country, is_live, video_id = self._match_valid_url(url).groups() - asset = self._download_json( - urljoin(url, '/sb/public/asset/' + video_id), video_id) + api_path = 'lives/programmes' if is_live else 'vods' + data = self._download_json( + urljoin(url, f'/api/products/{api_path}/{video_id}?platform=BROWSER&lang={country.upper()}'), + video_id) - m3u8_url = asset['movie']['contentUrl'] - video_id = asset['assetId'] - asset_title = asset['title'] - title = asset_title['title'] - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + video_type = 'CATCHUP' if is_live else 'MOVIE' + stream_id = data['programRecordingId'] if is_live else video_id + stream = self._download_json( + urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls') self._sort_formats(formats) - thumbnails = None - image_url = asset.get('imageUrl') - if image_url: - thumbnails = [{ - 'url': urljoin(url, image_url), - 'ext': 'jpg', - }] - - metadata = asset.get('metadata') or {} + thumbnails = set(traverse_obj( + data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none)) return { 'id': video_id, - 'title': title, - 'description': asset_title.get('summaryLong') or asset_title.get('summaryShort'), - 'thumbnails': thumbnails, - 'duration': parse_duration(asset_title.get('runTime')), - 'series': asset.get('tvSeriesTitle'), - 'season': asset.get('tvSeasonTitle'), - 'season_number': int_or_none(metadata.get('seasonNumber')), - 'episode': asset_title.get('titleBrief'), - 'episode_number': int_or_none(metadata.get('episodeNumber')), + 'title': self._resolve_title(data), + 'description': traverse_obj(data, 'description', 'lead'), + 'duration': int_or_none(data.get('duration')), + 'season': traverse_obj(data, ('season', 'serial', 'title')), + 'season_number': int_or_none(traverse_obj(data, ('season', 'number'))), + 'episode': data.get('title'), + 'episode_number': int_or_none(data.get('episode')), + 'release_year': int_or_none(traverse_obj(data, ('season', 'serial', 'year'))), + 'thumbnails': [{'url': url, 'ext': 'jpg'} for url in thumbnails], 'formats': formats, + 'subtitles': subtitles, } + + @staticmethod + def _resolve_title(data): + return try_get(data, lambda x: ( + f'{data["season"]["serial"]["title"]} ({data["season"]["serial"]["year"]}) | ' + f'S{data["season"]["number"]:02d}E{data["episode"]:02d}: {data["title"]}' + )) or data.get('title') From 639f80c1f9feca69509ede153c28f8651213f7fc Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Sat, 27 Nov 2021 09:00:58 +0100 Subject: [PATCH 0291/2552] [Twitch:vod] Add chapters (#1515) Authored by: mpeter50 --- yt_dlp/extractor/twitch.py | 71 ++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index be70beed4..c5b16f2b0 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -24,6 +24,8 @@ from ..utils import ( parse_iso8601, parse_qs, qualities, + str_or_none, + traverse_obj, try_get, unified_timestamp, update_url_query, @@ -52,6 +54,7 @@ class TwitchBaseIE(InfoExtractor): 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11', 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687', + 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41', } def _real_initialize(self): @@ -249,6 +252,38 @@ class TwitchVodIE(TwitchBaseIE): }, { 'url': 'https://player.twitch.tv/?video=480452374', 'only_matching': True, + }, { + 'url': 'https://www.twitch.tv/videos/635475444', + 'info_dict': { + 'id': 'v635475444', + 'ext': 'mp4', + 'title': 'Riot Games', + 'duration': 11643, + 'uploader': 'Riot Games', + 'uploader_id': 'riotgames', + 'timestamp': 1590770569, + 'upload_date': '20200529', + 'chapters': [ + { + 'start_time': 0, + 'end_time': 573, + 'title': 'League of Legends' + }, + { + 'start_time': 573, + 'end_time': 3922, + 'title': 'Legends of Runeterra' + }, + { + 'start_time': 3922, + 'end_time': 11643, + 'title': 'Art' + } + ], + }, + 'params': { + 'skip_download': True + } }] def _download_info(self, item_id): @@ -259,16 +294,24 @@ class TwitchVodIE(TwitchBaseIE): 'channelLogin': '', 'videoID': item_id, }, + }, { + 'operationName': 'VideoPlayer_ChapterSelectButtonVideo', + 'variables': { + 'includePrivate': False, + 'videoID': item_id, + }, }], - 'Downloading stream metadata GraphQL')[0]['data'] - video = data.get('video') + 'Downloading stream metadata GraphQL') + + video = traverse_obj(data, (0, 'data', 'video')) + video['moments'] = traverse_obj(data, (1, 'data', 'video', 'moments', 'edges', ..., 'node')) + if video is None: raise ExtractorError( 'Video %s does not exist' % item_id, expected=True) return self._extract_info_gql(video, item_id) - @staticmethod - def _extract_info(info): + def _extract_info(self, info): status = info.get('status') if status == 'recording': is_live = True @@ -304,8 +347,22 @@ class TwitchVodIE(TwitchBaseIE): 'is_live': is_live, } - @staticmethod - def _extract_info_gql(info, item_id): + def _extract_moments(self, info, item_id): + for moment in info.get('moments') or []: + start_time = int_or_none(moment.get('positionMilliseconds'), 1000) + duration = int_or_none(moment.get('durationMilliseconds'), 1000) + name = str_or_none(moment.get('description')) + + if start_time is None or duration is None: + self.report_warning(f'Important chapter information missing for chapter {name}', item_id) + continue + yield { + 'start_time': start_time, + 'end_time': start_time + duration, + 'title': name, + } + + def _extract_info_gql(self, info, item_id): vod_id = info.get('id') or item_id # id backward compatibility for download archives if vod_id[0] != 'v': @@ -314,6 +371,7 @@ class TwitchVodIE(TwitchBaseIE): if thumbnail: for p in ('width', 'height'): thumbnail = thumbnail.replace('{%s}' % p, '0') + return { 'id': vod_id, 'title': info.get('title') or 'Untitled Broadcast', @@ -324,6 +382,7 @@ class TwitchVodIE(TwitchBaseIE): 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str), 'timestamp': unified_timestamp(info.get('publishedAt')), 'view_count': int_or_none(info.get('viewCount')), + 'chapters': list(self._extract_moments(info, item_id)), } def _real_extract(self, url): From dfd78699f59d66fe7cd109c2534240ea0254426c Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sat, 27 Nov 2021 08:12:56 +0000 Subject: [PATCH 0292/2552] [Aljazeera] Fix extractor (#1577) Closes #1518 Authored by: u-spec-png --- yt_dlp/extractor/aljazeera.py | 87 ++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index e829b45e4..7bcdb7afb 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -1,55 +1,86 @@ +# coding: utf-8 from __future__ import unicode_literals import json from .common import InfoExtractor +from ..utils import ( + try_get, +) class AlJazeeraIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?Pprogram/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P[^/?&#]+)' + _VALID_URL = r'https?://(?P\w+\.aljazeera\.\w+)/(?Pprograms?/[^/]+|(?:feature|video|new)s)?/\d{4}/\d{1,2}/\d{1,2}/(?P[^/?&#]+)' _TESTS = [{ - 'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance', + 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/pojedini-domovi-u-sarajevu-jos-pod-vodom-mjestanima-se-dostavlja-hrana', 'info_dict': { - 'id': '3792260579001', + 'id': '6280641530001', 'ext': 'mp4', - 'title': 'The Slum - Episode 1: Deliverance', - 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', - 'uploader_id': '665003303001', - 'timestamp': 1411116829, - 'upload_date': '20140919', - }, - 'add_ie': ['BrightcoveNew'], - 'skip': 'Not accessible from Travis CI server', - }, { - 'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off', - 'only_matching': True, + 'title': 'Pojedini domovi u Sarajevu još pod vodom, mještanima se dostavlja hrana', + 'timestamp': 1636219149, + 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', + 'upload_date': '20211106', + } }, { - 'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art', - 'only_matching': True, + 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', + 'info_dict': { + 'id': '6280654936001', + 'ext': 'mp4', + 'title': 'Đoković ušao u finale Mastersa u Parizu', + 'timestamp': 1636221686, + 'description': 'Novak Đoković je u polufinalu Mastersa u Parizu nakon preokreta pobijedio Poljaka Huberta Hurkacza.', + 'upload_date': '20211106', + }, }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' + BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P\d+)/(?P[a-zA-Z0-9]+)_(?P[^/]+)/index.html\?videoId=(?P\d+)' def _real_extract(self, url): - post_type, name = self._match_valid_url(url).groups() + base, post_type, id = self._match_valid_url(url).groups() + wp = { + 'balkans.aljazeera.net': 'ajb', + 'chinese.aljazeera.net': 'chinese', + 'mubasher.aljazeera.net': 'ajm', + }.get(base) or 'aje' post_type = { 'features': 'post', 'program': 'episode', + 'programs': 'episode', 'videos': 'video', + 'news': 'news', }[post_type.split('/')[0]] video = self._download_json( - 'https://www.aljazeera.com/graphql', name, query={ + f'https://{base}/graphql', id, query={ + 'wp-site': wp, 'operationName': 'ArchipelagoSingleArticleQuery', 'variables': json.dumps({ - 'name': name, + 'name': id, 'postType': post_type, }), }, headers={ - 'wp-site': 'aje', - })['data']['article']['video'] - video_id = video['id'] - account_id = video.get('accountId') or '665003303001' - player_id = video.get('playerId') or 'BkeSH5BDb' - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), - 'BrightcoveNew', video_id) + 'wp-site': wp, + }) + video = try_get(video, lambda x: x['data']['article']['video']) or {} + video_id = video.get('id') + account = video.get('accountId') or '911432371001' + player_id = video.get('playerId') or 'csvTfAlKW' + embed = 'default' + + if video_id is None: + webpage = self._download_webpage(url, id) + + account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', + group=(1, 2, 3, 4), default=(None, None, None, None)) + + if video_id is None: + return { + '_type': 'url_transparent', + 'url': url, + 'ie_key': 'Generic' + } + + return { + '_type': 'url_transparent', + 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', + 'ie_key': 'BrightcoveNew' + } From 909b0d66f47c4fb73ee320f512f0c12502f16294 Mon Sep 17 00:00:00 2001 From: Grabien <60237587+Grabien@users.noreply.github.com> Date: Sat, 27 Nov 2021 12:37:45 +0200 Subject: [PATCH 0293/2552] [Senate.gov] Add SenateGovIE and fix SenateISVPIE (#1435) Authored by: Grabien, pukkandan --- yt_dlp/extractor/cspan.py | 2 +- yt_dlp/extractor/extractors.py | 2 +- yt_dlp/extractor/generic.py | 2 +- yt_dlp/extractor/senategov.py | 213 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/senateisvp.py | 153 ----------------------- 5 files changed, 216 insertions(+), 156 deletions(-) create mode 100644 yt_dlp/extractor/senategov.py delete mode 100644 yt_dlp/extractor/senateisvp.py diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index 2e01aff48..c717aec3a 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -18,7 +18,7 @@ from ..utils import ( str_to_int, unescapeHTML, ) -from .senateisvp import SenateISVPIE +from .senategov import SenateISVPIE from .ustream import UstreamIE diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index dd9edff0e..a4baad2da 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1285,7 +1285,7 @@ from .scte import ( SCTECourseIE, ) from .seeker import SeekerIE -from .senateisvp import SenateISVPIE +from .senategov import SenateISVPIE, SenateGovIE from .sendtonews import SendtoNewsIE from .servus import ServusIE from .sevenplus import SevenPlusIE diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index ae0ebb14a..51557f0f1 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -56,7 +56,7 @@ from .sportbox import SportBoxIE from .myvi import MyviIE from .condenast import CondeNastIE from .udn import UDNEmbedIE -from .senateisvp import SenateISVPIE +from .senategov import SenateISVPIE from .svt import SVTIE from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py new file mode 100644 index 000000000..6f4240422 --- /dev/null +++ b/yt_dlp/extractor/senategov.py @@ -0,0 +1,213 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + parse_qs, + unsmuggle_url, +) + +_COMMITTEES = { + 'ag': ('76440', 'http://ag-f.akamaihd.net'), + 'aging': ('76442', 'http://aging-f.akamaihd.net'), + 'approps': ('76441', 'http://approps-f.akamaihd.net'), + 'arch': ('', 'http://ussenate-f.akamaihd.net'), + 'armed': ('76445', 'http://armed-f.akamaihd.net'), + 'banking': ('76446', 'http://banking-f.akamaihd.net'), + 'budget': ('76447', 'http://budget-f.akamaihd.net'), + 'cecc': ('76486', 'http://srs-f.akamaihd.net'), + 'commerce': ('80177', 'http://commerce1-f.akamaihd.net'), + 'csce': ('75229', 'http://srs-f.akamaihd.net'), + 'dpc': ('76590', 'http://dpc-f.akamaihd.net'), + 'energy': ('76448', 'http://energy-f.akamaihd.net'), + 'epw': ('76478', 'http://epw-f.akamaihd.net'), + 'ethics': ('76449', 'http://ethics-f.akamaihd.net'), + 'finance': ('76450', 'http://finance-f.akamaihd.net'), + 'foreign': ('76451', 'http://foreign-f.akamaihd.net'), + 'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'), + 'help': ('76452', 'http://help-f.akamaihd.net'), + 'indian': ('76455', 'http://indian-f.akamaihd.net'), + 'intel': ('76456', 'http://intel-f.akamaihd.net'), + 'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'), + 'jccic': ('85180', 'http://jccic-f.akamaihd.net'), + 'jec': ('76458', 'http://jec-f.akamaihd.net'), + 'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'), + 'rpc': ('76591', 'http://rpc-f.akamaihd.net'), + 'rules': ('76460', 'http://rules-f.akamaihd.net'), + 'saa': ('76489', 'http://srs-f.akamaihd.net'), + 'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'), + 'srs': ('75229', 'http://srs-f.akamaihd.net'), + 'uscc': ('76487', 'http://srs-f.akamaihd.net'), + 'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'), +} + + +class SenateISVPIE(InfoExtractor): + _IE_NAME = 'senate.gov:isvp' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P.+)' + + _TESTS = [{ + 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', + 'info_dict': { + 'id': 'judiciary031715', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', + 'info_dict': { + 'id': 'commerce011514', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', + # checksum differs each time + 'info_dict': { + 'id': 'intel090613', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + } + }, { + # From http://www.c-span.org/video/?96791-1 + 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', + 'only_matching': True, + }] + + @staticmethod + def _search_iframe_url(webpage): + mobj = re.search( + r"]+src=['\"](?Phttps?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + qs = compat_parse_qs(self._match_valid_url(url).group('qs')) + if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): + raise ExtractorError('Invalid URL', expected=True) + + video_id = re.sub(r'.mp4$', '', qs['filename'][0]) + + webpage = self._download_webpage(url, video_id) + + if smuggled_data.get('force_title'): + title = smuggled_data['force_title'] + else: + title = self._html_search_regex(r'([^<]+)', webpage, video_id) + poster = qs.get('poster') + thumbnail = poster[0] if poster else None + + video_type = qs['type'][0] + committee = video_type if video_type == 'arch' else qs['comm'][0] + + stream_num, domain = _COMMITTEES[committee] + + formats = [] + if video_type == 'arch': + filename = video_id if '.' in video_id else video_id + '.mp4' + m3u8_url = compat_urlparse.urljoin(domain, 'i/' + filename + '/master.m3u8') + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8') + else: + hdcore_sign = 'hdcore=3.1.0' + url_params = (domain, video_id, stream_num) + f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params + m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params + for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): + # URLs without the extra param induce an 404 error + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.append(entry) + for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): + mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) + if mobj: + entry['format_id'] += mobj.group('tag') + formats.append(entry) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } + + +class SenateGovIE(InfoExtractor): + _IE_NAME = 'senate.gov' + _VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov' + _TESTS = [{ + 'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'info_dict': { + 'id': 'help090920', + 'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health', + 'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.appropriations.senate.gov/hearings/watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'info_dict': { + 'id': 'appropsA051518', + 'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'title': 'Review of the FY2019 Budget Request for the U.S. Army', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'info_dict': { + 'id': 'banking041521', + 'display_id': '21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization', + 'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + display_id = self._generic_id(url) + webpage = self._download_webpage(url, display_id) + parse_info = parse_qs(self._search_regex( + r'', - webpage, 'embed url')) + + title = self._html_search_meta('name', webpage, 'Title', fatal=False) + timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None)) + description = self._html_search_meta('description', webpage, 'Description', default=None) + + global_embed_url = self._search_regex( + r'', webpage, 'Hash') + + embed_url = global_embed_url + hash + if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) embed_page = self._download_webpage( - embed_url, video_id, headers={'Referer': url}) - video_ext = self._get_cookies(embed_url).get('video_ext') - if video_ext: - video_ext = compat_urllib_parse_unquote(video_ext.value) - if not video_ext: - video_ext = compat_b64decode(self._search_regex( - r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)', - embed_page, 'video_ext')).decode() - video_id, sig, _, access_token = video_ext.split(':') + embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url}) + + glob_params = self._parse_json(self._search_regex( + r'', + embed_page, 'Global Parameters'), video_id, transform_source=js_to_json) + host_name = compat_b64decode(glob_params['server'][::-1]).decode() + item = self._download_json( - 'https://api.vk.com/method/video.get', video_id, - headers={'User-Agent': 'okhttp/3.4.1'}, query={ - 'access_token': access_token, - 'sig': sig, - 'v': 5.44, + f'https://{host_name}/method/video.get/{video_id}', video_id, + headers={'Referer': url}, query={ + 'token': glob_params['video']['access_token'], 'videos': video_id, + 'ckey': glob_params['c_key'], + 'credentials': glob_params['video']['credentials'], })['response']['items'][0] - title = item['title'] formats = [] for f_id, f_url in item.get('files', {}).items(): if f_id == 'external': return self.url_result(f_url) ext, height = f_id.split('_') - formats.append({ - 'format_id': height + 'p', - 'url': f_url, - 'height': int_or_none(height), - 'ext': ext, - }) + height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height)) + if height_extra_key: + formats.append({ + 'format_id': f'{height}p', + 'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}', + 'height': int_or_none(height), + 'ext': ext, + }) self._sort_formats(formats) thumbnails = [] @@ -96,10 +106,9 @@ class BIQLEIE(InfoExtractor): 'title': title, 'formats': formats, 'comment_count': int_or_none(item.get('comments')), - 'description': item.get('description'), + 'description': description, 'duration': int_or_none(item.get('duration')), 'thumbnails': thumbnails, - 'timestamp': int_or_none(item.get('date')), - 'uploader': item.get('owner_id'), + 'timestamp': timestamp, 'view_count': int_or_none(item.get('views')), } From ed66a17ef0b18159dda901f0122520c25ea95d6b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 18 Feb 2022 23:16:16 +0530 Subject: [PATCH 0708/2552] [FFmpegConcat] Abort on `--simulate` --- yt_dlp/postprocessor/common.py | 4 +++- yt_dlp/postprocessor/ffmpeg.py | 7 +++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index f2467c542..d761c9303 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -103,12 +103,14 @@ class PostProcessor(metaclass=PostProcessorMetaClass): return getattr(self._downloader, '_copy_infodict', dict)(info_dict) @staticmethod - def _restrict_to(*, video=True, audio=True, images=True): + def _restrict_to(*, video=True, audio=True, images=True, simulated=True): allowed = {'video': video, 'audio': audio, 'images': images} def decorator(func): @functools.wraps(func) def wrapper(self, info): + if not simulated and (self.get_param('simulate') or self.get_param('skip_download')): + return [], info format_type = ( 'video' if info.get('vcodec') != 'none' else 'audio' if info.get('acodec') != 'none' diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 05eeee2d7..d4495b4a2 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -1145,16 +1145,15 @@ class FFmpegConcatPP(FFmpegPostProcessor): super().concat_files(in_files, out_file) return in_files - @PostProcessor._restrict_to(images=False) + @PostProcessor._restrict_to(images=False, simulated=False) def run(self, info): entries = info.get('entries') or [] - if (self.get_param('skip_download') or not any(entries) - or self._only_multi_video and info['_type'] != 'multi_video'): + if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'): return [], info elif any(len(entry) > 1 for entry in traverse_obj(entries, (..., 'requested_downloads')) or []): raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats') - in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) + in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or [] if len(in_files) < len(entries): raise PostProcessingError('Aborting concatenation because some downloads failed') From febff4c1194de0528c087274bc17e3a8be3296ba Mon Sep 17 00:00:00 2001 From: Bepis <36346617+bbepis@users.noreply.github.com> Date: Sat, 19 Feb 2022 23:00:51 +1100 Subject: [PATCH 0709/2552] [tubitv] Fix/improve TV series extraction (#2829) Authored by: bbepis --- yt_dlp/extractor/tubitv.py | 12 ++++++++++-- yt_dlp/utils.py | 2 ++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 2e9b325ba..e9b66ec77 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -107,6 +107,9 @@ class TubiTvIE(InfoExtractor): 'url': self._proto_relative_url(sub_url), }) + season_number, episode_number, episode_title = self._search_regex( + r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None)) + return { 'id': video_id, 'title': title, @@ -117,6 +120,9 @@ class TubiTvIE(InfoExtractor): 'duration': int_or_none(video_data.get('duration')), 'uploader_id': video_data.get('publisher_id'), 'release_year': int_or_none(video_data.get('year')), + 'season_number': int_or_none(season_number), + 'episode_number': int_or_none(episode_number), + 'episode_title': episode_title } @@ -132,9 +138,11 @@ class TubiTvShowIE(InfoExtractor): def _entries(self, show_url, show_name): show_webpage = self._download_webpage(show_url, show_name) + show_json = self._parse_json(self._search_regex( - r"window\.__data\s*=\s*({.+?});\s*", - show_webpage, 'data',), show_name, transform_source=js_to_json)['video'] + r'window\.__data\s*=\s*({[^<]+});\s*', + show_webpage, 'data'), show_name, transform_source=js_to_json)['video'] + for episode_id in show_json['fullContentById'].keys(): yield self.url_result( 'tubitv:%s' % episode_id, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index c5489d494..f5cad0e54 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3143,6 +3143,8 @@ def js_to_json(code, vars={}): return '"%s"' % v + code = re.sub(r'new Date\((".+")\)', r'\g<1>', code) + return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| From a7d4acc018378b30188685776e954168e041fa4f Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Sun, 20 Feb 2022 17:33:58 +0900 Subject: [PATCH 0710/2552] [youtube] Escape possible `$` in `_extract_n_function_name` regex --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index c03637f5f..4d9815eb3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2418,7 +2418,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not idx: return nfunc return json.loads(js_to_json(self._search_regex( - rf'var {nfunc}\s*=\s*(\[.+?\]);', jscode, + rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode, f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)] def _extract_n_function(self, video_id, player_url): From be8d623455e28c69254230a73c0b87ba87ea1652 Mon Sep 17 00:00:00 2001 From: "Daniel.Zeng" Date: Sun, 20 Feb 2022 19:27:02 +0800 Subject: [PATCH 0711/2552] [Bilibili] Pass referer for all formats (#2834) Authored by: blackgear --- yt_dlp/extractor/bilibili.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a775aa97f..1bbf7ca1c 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -225,10 +225,6 @@ class BiliBiliIE(InfoExtractor): 'quality': -2 if 'hd.mp4' in backup_url else -3, }) - for a_format in formats: - a_format.setdefault('http_headers', {}).update({ - 'Referer': url, - }) for audio in audios: formats.append({ 'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'), @@ -252,6 +248,9 @@ class BiliBiliIE(InfoExtractor): 'id': video_id, 'duration': float_or_none(durl.get('length'), 1000), 'formats': formats, + 'http_headers': { + 'Referer': url, + }, }) break From 3f4faff74873003a07303fc1faff3e5969b9c508 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 20 Feb 2022 17:13:13 +0530 Subject: [PATCH 0712/2552] [generic] Pass referer to extracted formats Closes #2839 --- yt_dlp/extractor/generic.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index baedd7948..d975e4bdb 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -3991,12 +3991,16 @@ class GenericIE(InfoExtractor): # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] + headers = { + 'referer': full_response.geturl() + } entry_info_dict = { 'id': video_id, 'uploader': video_uploader, 'title': video_title, 'age_limit': age_limit, + 'http_headers': headers, } if RtmpIE.suitable(video_url): @@ -4014,11 +4018,11 @@ class GenericIE(InfoExtractor): elif ext == 'xspf': return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) elif ext == 'm3u8': - entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4') + entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers) elif ext == 'mpd': - entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id) + entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers) elif ext == 'f4m': - entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) + entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers) elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: # Just matching .ism/manifest is not enough to be reliably sure # whether it's actually an ISM manifest or some other streaming From f099df1463705a37c5aec0c8108b2b00750c9428 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Sun, 20 Feb 2022 20:48:26 +0900 Subject: [PATCH 0713/2552] [TwitCasting] Check for password protection (#2838) Authored by: Lesmiscore --- yt_dlp/extractor/twitcasting.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 08222df95..af911de98 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -94,6 +94,9 @@ class TwitCastingIE(InfoExtractor): urlh.geturl(), video_id, data=request_data, headers={'Origin': 'https://twitcasting.tv'}, note='Retrying authentication') + # has to check here as the first request can contain password input form even if the password is correct + if re.search(r'\s*]+?name="password"', webpage): + raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) title = (clean_html(get_element_by_id('movietitle', webpage)) or self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True)) From 4d4f9a029f8aebd06bcd98ca61e7629de47e0018 Mon Sep 17 00:00:00 2001 From: Aniruddh Joshi Date: Mon, 21 Feb 2022 13:37:36 +0530 Subject: [PATCH 0714/2552] [zee5] Support web-series (#2827) Authored by: Aniruddh-J --- yt_dlp/extractor/zee5.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 55c225d85..ebe393ec7 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -23,7 +23,7 @@ class Zee5IE(InfoExtractor): zee5:| https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? (?: - (?:tv-shows|kids|zee5originals)(?:/[^#/?]+){3} + (?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3} |movies/[^#/?]+ )/(?P[^#/?]+)/ ) @@ -82,6 +82,9 @@ class Zee5IE(InfoExtractor): }, { 'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730', 'only_matching': True + }, { + 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412', + 'only_matching': True }] _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false' _DEVICE_ID = 'iIxsxYf40cqO3koIkwzKHZhnJzHN13zb' @@ -179,7 +182,7 @@ class Zee5SeriesIE(InfoExtractor): (?: zee5:series:| https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? - (?:tv-shows|kids|zee5originals)(?:/[^#/?]+){2}/ + (?:tv-shows|web-series|kids|zee5originals)(?:/[^#/?]+){2}/ ) (?P[^#/?]+)(?:/episodes)?/?(?:$|[?#]) ''' @@ -216,6 +219,9 @@ class Zee5SeriesIE(InfoExtractor): }, { 'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes', 'only_matching': True, + }, { + 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408', + 'only_matching': True, }] def _entries(self, show_id): From 7a34b5d628ad3b92a14ee7f51daa1cab9e326615 Mon Sep 17 00:00:00 2001 From: Nil Admirari <50202386+nihil-admirari@users.noreply.github.com> Date: Tue, 22 Feb 2022 14:18:44 +0000 Subject: [PATCH 0715/2552] [SponsorBlock] Fixes for highlight and "full video labels" (#2849) Authored by: nihil-admirari --- yt_dlp/postprocessor/sponsorblock.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index cd48b15ae..e7e04e86e 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -49,6 +49,9 @@ class SponsorBlockPP(FFmpegPostProcessor): def duration_filter(s): start_end = s['segment'] + # Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types). + if start_end == (0, 0): + return False # Ignore milliseconds difference at the start. if start_end[0] <= 1: start_end[0] = 0 @@ -89,6 +92,7 @@ class SponsorBlockPP(FFmpegPostProcessor): url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({ 'service': service, 'categories': json.dumps(self._categories), + 'actionTypes': json.dumps(['skip', 'poi']) }) self.write_debug(f'SponsorBlock query: {url}') for d in self._get_json(url): From 971c4847d7834421e6752dc9999f59296a1f62e6 Mon Sep 17 00:00:00 2001 From: i6t <62123048+i6t@users.noreply.github.com> Date: Tue, 22 Feb 2022 23:24:36 +0900 Subject: [PATCH 0716/2552] [Gettr] Fix formats order (#2832) Closes #2557 Authored by: i6t --- yt_dlp/extractor/gettr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index 179bd7c47..9842edd81 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -39,6 +39,7 @@ class GettrIE(GettrBaseIE): 'thumbnail': r're:^https?://.+/out\.jpg', 'timestamp': 1632782451058, 'duration': 58.5585, + 'tags': ['hornofafrica', 'explorations'], } }, { 'url': 'https://gettr.com/post/p4iahp', @@ -52,6 +53,7 @@ class GettrIE(GettrBaseIE): 'thumbnail': r're:^https?://.+/out\.jpg', 'timestamp': 1626594455017, 'duration': 23, + 'tags': 'count:12', } }] @@ -84,7 +86,7 @@ class GettrIE(GettrBaseIE): formats = self._extract_m3u8_formats( urljoin(self._MEDIA_BASE_URL, vid), post_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') if vid else [] + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if vid else [] if ovid: formats.append({ @@ -93,8 +95,6 @@ class GettrIE(GettrBaseIE): 'ext': 'mp4', 'width': int_or_none(post_data.get('vid_wid')), 'height': int_or_none(post_data.get('vid_hgt')), - 'source_preference': 1, - 'quality': 1, }) self._sort_formats(formats) From 77cc7c6e6093c7b16a96870098edec70f943c62c Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Wed, 23 Feb 2022 01:15:08 +0900 Subject: [PATCH 0717/2552] [nhk] Add support for NHK for School (#2850) Authored by: Lesmiscore --- yt_dlp/extractor/common.py | 4 + yt_dlp/extractor/extractors.py | 3 + yt_dlp/extractor/nhk.py | 149 ++++++++++++++++++++++++++++++++- 3 files changed, 155 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 04d4c0733..d8bb21137 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1307,6 +1307,10 @@ class InfoExtractor(object): def _og_search_url(self, html, **kargs): return self._og_search_property('url', html, **kargs) + def _html_extract_title(self, html, name, **kwargs): + return self._html_search_regex( + r'(?s)(.*?)', html, name, **kwargs) + def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): name = variadic(name) if display_name is None: diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 15bc74915..7d4262acf 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -994,6 +994,9 @@ from .nfl import ( from .nhk import ( NhkVodIE, NhkVodProgramIE, + NhkForSchoolBangumiIE, + NhkForSchoolSubjectIE, + NhkForSchoolProgramListIE, ) from .nhl import NHLIE from .nick import ( diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 4998fed83..626c6379b 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -1,8 +1,15 @@ from __future__ import unicode_literals +import re from .common import InfoExtractor -from ..utils import urljoin +from ..utils import ( + parse_duration, + traverse_obj, + unescapeHTML, + unified_timestamp, + urljoin +) class NhkBaseIE(InfoExtractor): @@ -176,3 +183,143 @@ class NhkVodProgramIE(NhkBaseIE): program_title = entries[0].get('series') return self.playlist_result(entries, program_id, program_title) + + +class NhkForSchoolBangumiIE(InfoExtractor): + _VALID_URL = r'https?://www2\.nhk\.or\.jp/school/movie/(?Pbangumi|clip)\.cgi\?das_id=(?P[a-zA-Z0-9_-]+)' + _TESTS = [{ + 'url': 'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id=D0005150191_00000', + 'info_dict': { + 'id': 'D0005150191_00003', + 'title': 'にている かな', + 'duration': 599.999, + 'timestamp': 1396414800, + + 'upload_date': '20140402', + 'ext': 'mp4', + + 'chapters': 'count:12' + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + program_type, video_id = self._match_valid_url(url).groups() + + webpage = self._download_webpage( + f'https://www2.nhk.or.jp/school/movie/{program_type}.cgi?das_id={video_id}', video_id) + + # searches all variables + base_values = {g.group(1): g.group(2) for g in re.finditer(r'var\s+([a-zA-Z_]+)\s*=\s*"([^"]+?)";', webpage)} + # and programObj values too + program_values = {g.group(1): g.group(3) for g in re.finditer(r'(?:program|clip)Obj\.([a-zA-Z_]+)\s*=\s*(["\'])([^"]+?)\2;', webpage)} + # extract all chapters + chapter_durations = [parse_duration(g.group(1)) for g in re.finditer(r'chapterTime\.push\(\'([0-9:]+?)\'\);', webpage)] + chapter_titles = [' '.join([g.group(1) or '', unescapeHTML(g.group(2))]).strip() for g in re.finditer(r'
(scene\s*\d+)?([^<]+?)
', webpage)] + + # this is how player_core.js is actually doing (!) + version = base_values.get('r_version') or program_values.get('version') + if version: + video_id = f'{video_id.split("_")[0]}_{version}' + + formats = self._extract_m3u8_formats( + f'https://nhks-vh.akamaihd.net/i/das/{video_id[0:8]}/{video_id}_V_000.f4v/master.m3u8', + video_id, ext='mp4', m3u8_id='hls') + self._sort_formats(formats) + + duration = parse_duration(base_values.get('r_duration')) + + chapters = None + if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles): + start_time = chapter_durations + end_time = chapter_durations[1:] + [duration] + chapters = [{ + 'start_time': s, + 'end_time': e, + 'title': t, + } for s, e, t in zip(start_time, end_time, chapter_titles)] + + return { + 'id': video_id, + 'title': program_values.get('name'), + 'duration': parse_duration(base_values.get('r_duration')), + 'timestamp': unified_timestamp(base_values['r_upload']), + 'formats': formats, + 'chapters': chapters, + } + + +class NhkForSchoolSubjectIE(InfoExtractor): + IE_DESC = 'Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学)' + KNOWN_SUBJECTS = ( + 'rika', 'syakai', 'kokugo', + 'sansuu', 'seikatsu', 'doutoku', + 'ongaku', 'taiiku', 'zukou', + 'gijutsu', 'katei', 'sougou', + 'eigo', 'tokkatsu', + 'tokushi', 'sonota', + ) + _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P%s)/?(?:[\?#].*)?$' % '|'.join(re.escape(s) for s in KNOWN_SUBJECTS) + + _TESTS = [{ + 'url': 'https://www.nhk.or.jp/school/sougou/', + 'info_dict': { + 'id': 'sougou', + 'title': '総合的な学習の時間', + }, + 'playlist_mincount': 16, + }, { + 'url': 'https://www.nhk.or.jp/school/rika/', + 'info_dict': { + 'id': 'rika', + 'title': '理科', + }, + 'playlist_mincount': 15, + }] + + def _real_extract(self, url): + subject_id = self._match_id(url) + webpage = self._download_webpage(url, subject_id) + + return self.playlist_from_matches( + re.finditer(rf'href="((?:https?://www\.nhk\.or\.jp)?/school/{re.escape(subject_id)}/[^/]+/)"', webpage), + subject_id, + self._html_search_regex(r'(?s)\s*\s*([^<]+?)', webpage, 'title', fatal=False), + lambda g: urljoin(url, g.group(1))) + + +class NhkForSchoolProgramListIE(InfoExtractor): + _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P(?:%s)/[a-zA-Z0-9_-]+)' % ( + '|'.join(re.escape(s) for s in NhkForSchoolSubjectIE.KNOWN_SUBJECTS) + ) + _TESTS = [{ + 'url': 'https://www.nhk.or.jp/school/sougou/q/', + 'info_dict': { + 'id': 'sougou/q', + 'title': 'Q~こどものための哲学', + }, + 'playlist_mincount': 20, + }] + + def _real_extract(self, url): + program_id = self._match_id(url) + + webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id) + + title = self._og_search_title(webpage, fatal=False) or self._html_extract_title(webpage, fatal=False) or self._html_search_regex(r'

([^<]+?)とは?\s*

', webpage, 'title', fatal=False) + title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None + description = self._html_search_regex( + r'(?s)\s*

[^<]+

', + webpage, 'description', fatal=False, group=0) + + bangumi_list = self._download_json( + f'https://www.nhk.or.jp/school/{program_id}/meta/program.json', program_id) + # they're always bangumi + bangumis = [ + self.url_result(f'https://www2.nhk.or.jp/school/movie/bangumi.cgi?das_id={x}') + for x in traverse_obj(bangumi_list, ('part', ..., 'part-video-dasid')) or []] + + return self.playlist_result(bangumis, program_id, title, description) From 72073451be806a845caefaf4d6bea2219d45cefd Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 21 Feb 2022 20:31:49 +0530 Subject: [PATCH 0718/2552] [ThumbnailsConvertor] Support `webp` Closes #2226 --- README.md | 2 +- yt_dlp/postprocessor/ffmpeg.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7446cc2c2..7f7e94289 100644 --- a/README.md +++ b/README.md @@ -951,7 +951,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi (currently supported: srt|vtt|ass|lrc) (Alias: --convert-subtitles) --convert-thumbnails FORMAT Convert the thumbnails to another format - (currently supported: jpg|png) + (currently supported: jpg|png|webp) --split-chapters Split video into multiple files based on internal chapters. The "chapter:" prefix can be used with "--paths" and "--output" diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index d4495b4a2..907627381 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -1055,7 +1055,7 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): - SUPPORTED_EXTS = ('jpg', 'png') + SUPPORTED_EXTS = ('jpg', 'png', 'webp') def __init__(self, downloader=None, format=None): super(FFmpegThumbnailsConvertorPP, self).__init__(downloader) From 65d151d58f6c01f15aecfb2bef81b2a754295b7f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 23 Feb 2022 01:37:21 +0530 Subject: [PATCH 0719/2552] [spiegel] Fix `_VALID_URL` Closes #2842 --- yt_dlp/extractor/spiegel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/spiegel.py b/yt_dlp/extractor/spiegel.py index 2da32b9b2..58f2ed353 100644 --- a/yt_dlp/extractor/spiegel.py +++ b/yt_dlp/extractor/spiegel.py @@ -7,7 +7,7 @@ from .jwplatform import JWPlatformIE class SpiegelIE(InfoExtractor): _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' - _VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' % _UUID_RE + _VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:$|[#?])' % _UUID_RE _TESTS = [{ 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', 'md5': '50c7948883ec85a3e431a0a44b7ad1d6', From a30a6ed3e49a0e037af6d5e26ecef3f3eba67d33 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 23 Feb 2022 01:33:36 +0530 Subject: [PATCH 0720/2552] [youtube:tab] Add `approximate_date` extractor-arg --- README.md | 1 + yt_dlp/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7f7e94289..88ddb2f3b 100644 --- a/README.md +++ b/README.md @@ -1663,6 +1663,7 @@ The following extractors use this feature: #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) +* `approximate_date`: Extract approximate `upload_date` in flat-playlist. This may cause date-based filters to be slightly off #### funimation * `language`: Languages to extract. Eg: `funimation:language=english,japanese` diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4d9815eb3..fbf9581c6 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -844,7 +844,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'uploader': uploader, 'channel_id': channel_id, 'thumbnails': thumbnails, - # 'upload_date': strftime_or_none(timestamp, '%Y%m%d'), + 'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None, 'live_status': ('is_upcoming' if scheduled_timestamp is not None else 'was_live' if 'streamed' in time_text.lower() else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges From 1108613f021eea0f6d4c5786c94db98641af6d59 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 23 Feb 2022 03:07:47 +0530 Subject: [PATCH 0721/2552] [youtube:tab] Reject webpage data if redirected to home page Closes #2660 --- yt_dlp/extractor/youtube.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index fbf9581c6..636bf42b6 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4244,6 +4244,16 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if 'webpage' not in self._configuration_arg('skip'): webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) + # Reject webpage data if redirected to home page without explicitly requesting + selected_tab = self._extract_selected_tab(traverse_obj( + data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[])) or {} + if (url != 'https://www.youtube.com/feed/recommended' + and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page + and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])): + msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page' + if fatal: + raise ExtractorError(msg, expected=True) + self.report_warning(msg, only_once=True) if not data: if not ytcfg and self.is_authenticated: msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' From 09b49e1f688831c3ad7181decf38c90f8451e6c4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 22 Feb 2022 17:13:30 +0530 Subject: [PATCH 0722/2552] Add pre-processor stage `after_filter` * Move `_match_entry` and `post_extract` to `process_video_result`. It is also left in `process_info` for API compat * `--list-...` options and `--force-write-archive` now obey filtering options * Move `SponsorBlockPP` to `after_filter`. Closes https://github.com/yt-dlp/yt-dlp/issues/2536 * Reverts 4ec82a72bbf7ff0066edb50dcad20aa77ac2fe09 since this commit addresses the issue it was solving --- README.md | 20 +++++++++++--------- test/test_YoutubeDL.py | 20 ++------------------ yt_dlp/YoutubeDL.py | 43 +++++++++++++++++++----------------------- yt_dlp/__init__.py | 4 ++-- yt_dlp/options.py | 8 ++++---- yt_dlp/utils.py | 2 +- 6 files changed, 39 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 88ddb2f3b..70b2e202f 100644 --- a/README.md +++ b/README.md @@ -982,15 +982,17 @@ You can also fork the project on github and run your fork's [build workflow](.gi semicolon ";" delimited list of NAME=VALUE. The "when" argument determines when the postprocessor is invoked. It can be one of - "pre_process" (after extraction), - "before_dl" (before video download), - "post_process" (after video download; - default), "after_move" (after moving file - to their final locations), "after_video" - (after downloading and processing all - formats of a video), or "playlist" (end of - playlist). This option can be used multiple - times to add different postprocessors + "pre_process" (after video extraction), + "after_filter" (after video passes filter), + "before_dl" (before each video download), + "post_process" (after each video download; + default), "after_move" (after moving video + file to it's final locations), + "after_video" (after downloading and + processing all formats of a video), or + "playlist" (at end of playlist). This + option can be used multiple times to add + different postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 34ed814b4..7637297be 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -30,9 +30,7 @@ class YDL(FakeYDL): self.msgs = [] def process_info(self, info_dict): - info_dict = info_dict.copy() - info_dict.pop('__original_infodict', None) - self.downloaded_info_dicts.append(info_dict) + self.downloaded_info_dicts.append(info_dict.copy()) def to_screen(self, msg): self.msgs.append(msg) @@ -898,20 +896,6 @@ class TestYoutubeDL(unittest.TestCase): os.unlink(filename) def test_match_filter(self): - class FilterYDL(YDL): - def __init__(self, *args, **kwargs): - super(FilterYDL, self).__init__(*args, **kwargs) - self.params['simulate'] = True - - def process_info(self, info_dict): - super(YDL, self).process_info(info_dict) - - def _match_entry(self, info_dict, incomplete=False): - res = super(FilterYDL, self)._match_entry(info_dict, incomplete) - if res is None: - self.downloaded_info_dicts.append(info_dict.copy()) - return res - first = { 'id': '1', 'url': TEST_URL, @@ -939,7 +923,7 @@ class TestYoutubeDL(unittest.TestCase): videos = [first, second] def get_videos(filter_=None): - ydl = FilterYDL({'match_filter': filter_}) + ydl = YDL({'match_filter': filter_, 'simulate': True}) for v in videos: ydl.process_ie_result(v, download=True) return [v['id'] for v in ydl.downloaded_info_dicts] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 36b2b37c0..d9a3c0bce 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1037,8 +1037,7 @@ class YoutubeDL(object): @staticmethod def _copy_infodict(info_dict): info_dict = dict(info_dict) - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) + info_dict.pop('__postprocessors', None) return info_dict def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): @@ -2512,8 +2511,6 @@ class YoutubeDL(object): if '__x_forwarded_for_ip' in info_dict: del info_dict['__x_forwarded_for_ip'] - # TODO Central sorting goes here - if self.params.get('check_formats') is True: formats = LazyList(self._check_formats(formats[::-1]), reverse=True) @@ -2526,6 +2523,12 @@ class YoutubeDL(object): info_dict, _ = self.pre_process(info_dict) + if self._match_entry(info_dict) is not None: + return info_dict + + self.post_extract(info_dict) + info_dict, _ = self.pre_process(info_dict, 'after_filter') + # The pre-processors may have modified the formats formats = info_dict.get('formats', [info_dict]) @@ -2610,15 +2613,12 @@ class YoutubeDL(object): + ', '.join([f['format_id'] for f in formats_to_download])) max_downloads_reached = False for i, fmt in enumerate(formats_to_download): - formats_to_download[i] = new_info = dict(info_dict) - # Save a reference to the original info_dict so that it can be modified in process_info if needed + formats_to_download[i] = new_info = self._copy_infodict(info_dict) new_info.update(fmt) - new_info['__original_infodict'] = info_dict try: self.process_info(new_info) except MaxDownloadsReached: max_downloads_reached = True - new_info.pop('__original_infodict') # Remove copied info for key, val in tuple(new_info.items()): if info_dict.get(key) == val: @@ -2826,7 +2826,7 @@ class YoutubeDL(object): return None def process_info(self, info_dict): - """Process a single resolved IE result. (Modified it in-place)""" + """Process a single resolved IE result. (Modifies it in-place)""" assert info_dict.get('_type', 'video') == 'video' original_infodict = info_dict @@ -2834,18 +2834,22 @@ class YoutubeDL(object): if 'format' not in info_dict and 'ext' in info_dict: info_dict['format'] = info_dict['ext'] + # This is mostly just for backward compatibility of process_info + # As a side-effect, this allows for format-specific filters if self._match_entry(info_dict) is not None: info_dict['__write_download_archive'] = 'ignore' return + # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) - self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) temp_filename = self.prepare_filename(info_dict, 'temp') files_to_move = {} + self._num_downloads += 1 + # Forced printings self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) @@ -3259,17 +3263,14 @@ class YoutubeDL(object): return info_dict info_dict.setdefault('epoch', int(time.time())) info_dict.setdefault('_type', 'video') - remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict - keep_keys = ['_type'] # Always keep this to facilitate load-info-json + if remove_private_keys: - remove_keys |= { + reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', } - reject = lambda k, v: k not in keep_keys and ( - k.startswith('_') or k in remove_keys or v is None) else: - reject = lambda k, v: k in remove_keys + reject = lambda k, v: False def filter_fn(obj): if isinstance(obj, dict): @@ -3296,14 +3297,8 @@ class YoutubeDL(object): actual_post_extract(video_dict or {}) return - post_extractor = info_dict.get('__post_extractor') or (lambda: {}) - extra = post_extractor().items() - info_dict.update(extra) - info_dict.pop('__post_extractor', None) - - original_infodict = info_dict.get('__original_infodict') or {} - original_infodict.update(extra) - original_infodict.pop('__post_extractor', None) + post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) + info_dict.update(post_extractor()) actual_post_extract(info_dict or {}) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index b93f47ecc..c87c5b6df 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -474,8 +474,8 @@ def _real_main(argv=None): 'key': 'SponsorBlock', 'categories': sponsorblock_query, 'api': opts.sponsorblock_api, - # Run this immediately after extraction is complete - 'when': 'pre_process' + # Run this after filtering videos + 'when': 'after_filter' }) if opts.parse_metadata: postprocessors.append({ diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 2ba7d2601..6fcef98cd 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1550,11 +1550,11 @@ def create_parser(): 'and (optionally) arguments to be passed to it, separated by a colon ":". ' 'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' 'The "when" argument determines when the postprocessor is invoked. ' - 'It can be one of "pre_process" (after extraction), ' - '"before_dl" (before video download), "post_process" (after video download; default), ' - '"after_move" (after moving file to their final locations), ' + 'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' + '"before_dl" (before each video download), "post_process" (after each video download; default), ' + '"after_move" (after moving video file to it\'s final locations), ' '"after_video" (after downloading and processing all formats of a video), ' - 'or "playlist" (end of playlist). ' + 'or "playlist" (at end of playlist). ' 'This option can be used multiple times to add different postprocessors')) sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=( diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index f5cad0e54..8b0d95efa 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3166,7 +3166,7 @@ def qualities(quality_ids): return q -POSTPROCESS_WHEN = {'pre_process', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'} +POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'} DEFAULT_OUTTMPL = { From 03f830040ae92af369ee046b082b1683ddf1539f Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Thu, 24 Feb 2022 18:42:53 +0900 Subject: [PATCH 0723/2552] [YoutubeDL] Fill more fields for playlists (#2824) --- yt_dlp/YoutubeDL.py | 101 +++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 47 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d9a3c0bce..10eebecf2 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1584,6 +1584,7 @@ class YoutubeDL(object): self._playlist_level += 1 self._playlist_urls.add(webpage_url) + self._fill_common_fields(ie_result, False) self._sanitize_thumbnails(ie_result) try: return self.__process_playlist(ie_result, download) @@ -2308,6 +2309,58 @@ class YoutubeDL(object): else: info_dict['thumbnails'] = thumbnails + def _fill_common_fields(self, info_dict, is_video=True): + # TODO: move sanitization here + if is_video: + # playlists are allowed to lack "title" + info_dict['fulltitle'] = info_dict.get('title') + if 'title' not in info_dict: + raise ExtractorError('Missing "title" field in extractor result', + video_id=info_dict['id'], ie=info_dict['extractor']) + elif not info_dict.get('title'): + self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') + info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' + + if info_dict.get('duration') is not None: + info_dict['duration_string'] = formatSeconds(info_dict['duration']) + + for ts_key, date_key in ( + ('timestamp', 'upload_date'), + ('release_timestamp', 'release_date'), + ('modified_timestamp', 'modified_date'), + ): + if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: + # Working around out-of-range timestamp values (e.g. negative ones on Windows, + # see http://bugs.python.org/issue1646728) + try: + upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) + info_dict[date_key] = upload_date.strftime('%Y%m%d') + except (ValueError, OverflowError, OSError): + pass + + live_keys = ('is_live', 'was_live') + live_status = info_dict.get('live_status') + if live_status is None: + for key in live_keys: + if info_dict.get(key) is False: + continue + if info_dict.get(key): + live_status = key + break + if all(info_dict.get(key) is False for key in live_keys): + live_status = 'not_live' + if live_status: + info_dict['live_status'] = live_status + for key in live_keys: + if info_dict.get(key) is None: + info_dict[key] = (live_status == key) + + # Auto generate title fields corresponding to the *_number fields when missing + # in order to always have clean titles. This is very common for TV series. + for field in ('chapter', 'season', 'episode'): + if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' self._num_videos += 1 @@ -2317,14 +2370,6 @@ class YoutubeDL(object): elif not info_dict.get('id'): raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor']) - info_dict['fulltitle'] = info_dict.get('title') - if 'title' not in info_dict: - raise ExtractorError('Missing "title" field in extractor result', - video_id=info_dict['id'], ie=info_dict['extractor']) - elif not info_dict.get('title'): - self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') - info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' - def report_force_conversion(field, field_not, conversion): self.report_warning( '"%s" field is not %s - forcing %s conversion, there is an error in extractor' @@ -2365,45 +2410,7 @@ class YoutubeDL(object): if info_dict.get('display_id') is None and 'id' in info_dict: info_dict['display_id'] = info_dict['id'] - if info_dict.get('duration') is not None: - info_dict['duration_string'] = formatSeconds(info_dict['duration']) - - for ts_key, date_key in ( - ('timestamp', 'upload_date'), - ('release_timestamp', 'release_date'), - ('modified_timestamp', 'modified_date'), - ): - if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: - # Working around out-of-range timestamp values (e.g. negative ones on Windows, - # see http://bugs.python.org/issue1646728) - try: - upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) - info_dict[date_key] = upload_date.strftime('%Y%m%d') - except (ValueError, OverflowError, OSError): - pass - - live_keys = ('is_live', 'was_live') - live_status = info_dict.get('live_status') - if live_status is None: - for key in live_keys: - if info_dict.get(key) is False: - continue - if info_dict.get(key): - live_status = key - break - if all(info_dict.get(key) is False for key in live_keys): - live_status = 'not_live' - if live_status: - info_dict['live_status'] = live_status - for key in live_keys: - if info_dict.get(key) is None: - info_dict[key] = (live_status == key) - - # Auto generate title fields corresponding to the *_number fields when missing - # in order to always have clean titles. This is very common for TV series. - for field in ('chapter', 'season', 'episode'): - if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): - info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + self._fill_common_fields(info_dict) for cc_kind in ('subtitles', 'automatic_captions'): cc = info_dict.get(cc_kind) From b440e1bb2211918ef2b34138a65e0cb6c3a66057 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 24 Feb 2022 17:02:19 +0530 Subject: [PATCH 0724/2552] [devscripts] Improve `prepare_manpage` Closes #2873 --- devscripts/prepare_manpage.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 069d99eeb..6612723f7 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -75,21 +75,17 @@ def filter_options(readme): section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) options = '# OPTIONS\n' for line in section.split('\n')[1:]: - if line.lstrip().startswith('-'): - split = re.split(r'\s{2,}', line.lstrip()) - # Description string may start with `-` as well. If there is - # only one piece then it's a description bit not an option. - if len(split) > 1: - option, description = split - split_option = option.split(' ') - - if not split_option[-1].startswith('-'): # metavar - option = ' '.join(split_option[:-1] + [f'*{split_option[-1]}*']) - - # Pandoc's definition_lists. See http://pandoc.org/README.html - options += f'\n{option}\n: {description}\n' - continue - options += line.lstrip() + '\n' + mobj = re.fullmatch(r'\s{4}(?P-(?:,\s|[^\s])+)(?:\s(?P([^\s]|\s(?!\s))+))?(\s{2,}(?P.+))?', line) + if not mobj: + options += f'{line.lstrip()}\n' + continue + option, metavar, description = mobj.group('opt', 'meta', 'desc') + + # Pandoc's definition_lists. See http://pandoc.org/README.html + option = f'{option} *{metavar}*' if metavar else option + description = f'{description}\n' if description else '' + options += f'\n{option}\n: {description}' + continue return readme.replace(section, options, 1) From a539f06570e89742d641fe53328e2beea51937aa Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Fri, 25 Feb 2022 02:00:46 +0900 Subject: [PATCH 0725/2552] [downloader/fragment] Improve `--live-from-start` for YouTube livestreams (#2870) --- yt_dlp/downloader/fragment.py | 40 ++++++++++++++++++++++++++--------- yt_dlp/extractor/youtube.py | 14 ++++++------ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 19c0990d3..082581b54 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -25,6 +25,7 @@ from ..utils import ( error_to_compat_str, encodeFilename, sanitized_Request, + traverse_obj, ) @@ -382,6 +383,7 @@ class FragmentFD(FileDownloader): max_workers = self.params.get('concurrent_fragment_downloads', 1) if max_progress > 1: self._prepare_multiline_status(max_progress) + is_live = any(traverse_obj(args, (..., 2, 'is_live'), default=[])) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress @@ -395,25 +397,44 @@ class FragmentFD(FileDownloader): def __exit__(self, exc_type, exc_val, exc_tb): pass - spins = [] if compat_os_name == 'nt': - self.report_warning('Ctrl+C does not work on Windows when used with parallel threads. ' - 'This is a known issue and patches are welcome') + def bindoj_result(future): + while True: + try: + return future.result(0.1) + except KeyboardInterrupt: + raise + except concurrent.futures.TimeoutError: + continue + else: + def bindoj_result(future): + return future.result() + + spins = [] for idx, (ctx, fragments, info_dict) in enumerate(args): tpe = FTPE(math.ceil(max_workers / max_progress)) - job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe) + + def interrupt_trigger_iter(): + for f in fragments: + if not interrupt_trigger[0]: + break + yield f + + job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(), info_dict, tpe) spins.append((tpe, job)) result = True for tpe, job in spins: try: - result = result and job.result() + result = result and bindoj_result(job) except KeyboardInterrupt: interrupt_trigger[0] = False finally: tpe.shutdown(wait=True) - if not interrupt_trigger[0]: + if not interrupt_trigger[0] and not is_live: raise KeyboardInterrupt() + # we expect the user wants to stop and DO WANT the preceding postprocessors to run; + # so returning a intermediate result here instead of KeyboardInterrupt on live return result def download_and_append_fragments( @@ -431,10 +452,11 @@ class FragmentFD(FileDownloader): pack_func = lambda frag_content, _: frag_content def download_fragment(fragment, ctx): + if not interrupt_trigger[0]: + return False, fragment['frag_index'] + frag_index = ctx['fragment_index'] = fragment['frag_index'] ctx['last_error'] = None - if not interrupt_trigger[0]: - return False, frag_index headers = info_dict.get('http_headers', {}).copy() byte_range = fragment.get('byte_range') if byte_range: @@ -500,8 +522,6 @@ class FragmentFD(FileDownloader): self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): - if not interrupt_trigger[0]: - break ctx['fragment_filename_sanitized'] = frag_filename ctx['fragment_index'] = frag_index result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 636bf42b6..47b3c5a85 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2135,6 +2135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return f['manifest_url'], f['manifest_stream_number'], is_live for f in formats: + f['is_live'] = True f['protocol'] = 'http_dash_segments_generator' f['fragments'] = functools.partial( self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed) @@ -2157,12 +2158,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): known_idx, no_fragment_score, last_segment_url = begin_index, 0, None fragments, fragment_base_url = None, None - def _extract_sequence_from_mpd(refresh_sequence): + def _extract_sequence_from_mpd(refresh_sequence, immediate): nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url # Obtain from MPD's maximum seq value old_mpd_url = mpd_url last_error = ctx.pop('last_error', None) - expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403 + expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000) or (mpd_url, stream_number, False)) if not refresh_sequence: @@ -2176,7 +2177,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): except ExtractorError: fmts = None if not fmts: - no_fragment_score += 1 + no_fragment_score += 2 return False, last_seq fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number) fragments = fmt_info['fragments'] @@ -2199,11 +2200,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): urlh = None last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum'])) if last_seq is None: - no_fragment_score += 1 + no_fragment_score += 2 last_segment_url = None continue else: - should_continue, last_seq = _extract_sequence_from_mpd(True) + should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15) + no_fragment_score += 2 if not should_continue: continue @@ -2221,7 +2223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: for idx in range(known_idx, last_seq): # do not update sequence here or you'll get skipped some part of it - should_continue, _ = _extract_sequence_from_mpd(False) + should_continue, _ = _extract_sequence_from_mpd(False, False) if not should_continue: known_idx = idx - 1 raise ExtractorError('breaking out of outer loop') From 3e9b66d761048d568ed0da40e43d02e1bf02f759 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Fri, 25 Feb 2022 11:14:04 +0900 Subject: [PATCH 0726/2552] [AbemaTV] Add extractors (#1688) Authored by: Lesmiscore --- yt_dlp/extractor/abematv.py | 488 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 4 + yt_dlp/utils.py | 23 ++ 3 files changed, 515 insertions(+) create mode 100644 yt_dlp/extractor/abematv.py diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py new file mode 100644 index 000000000..66b12c72f --- /dev/null +++ b/yt_dlp/extractor/abematv.py @@ -0,0 +1,488 @@ +import io +import json +import time +import hashlib +import hmac +import re +import struct +from base64 import urlsafe_b64encode +from binascii import unhexlify + +import typing +if typing.TYPE_CHECKING: + from ..YoutubeDL import YoutubeDL + +from .common import InfoExtractor +from ..aes import aes_ecb_decrypt +from ..compat import ( + compat_urllib_response, + compat_urllib_parse_urlparse, + compat_urllib_request, +) +from ..utils import ( + ExtractorError, + decode_base, + int_or_none, + random_uuidv4, + request_to_url, + time_seconds, + update_url_query, + traverse_obj, + intlist_to_bytes, + bytes_to_intlist, + urljoin, +) + + +# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862) + +def add_opener(self: 'YoutubeDL', handler): + ''' Add a handler for opening URLs, like _download_webpage ''' + # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 + # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 + assert isinstance(self._opener, compat_urllib_request.OpenerDirector) + self._opener.add_handler(handler) + + +def remove_opener(self: 'YoutubeDL', handler): + ''' + Remove handler(s) for opening URLs + @param handler Either handler object itself or handler type. + Specifying handler type will remove all handler which isinstance returns True. + ''' + # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 + # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 + opener = self._opener + assert isinstance(self._opener, compat_urllib_request.OpenerDirector) + if isinstance(handler, (type, tuple)): + find_cp = lambda x: isinstance(x, handler) + else: + find_cp = lambda x: x is handler + + removed = [] + for meth in dir(handler): + if meth in ["redirect_request", "do_open", "proxy_open"]: + # oops, coincidental match + continue + + i = meth.find("_") + protocol = meth[:i] + condition = meth[i + 1:] + + if condition.startswith("error"): + j = condition.find("_") + i + 1 + kind = meth[j + 1:] + try: + kind = int(kind) + except ValueError: + pass + lookup = opener.handle_error.get(protocol, {}) + opener.handle_error[protocol] = lookup + elif condition == "open": + kind = protocol + lookup = opener.handle_open + elif condition == "response": + kind = protocol + lookup = opener.process_response + elif condition == "request": + kind = protocol + lookup = opener.process_request + else: + continue + + handlers = lookup.setdefault(kind, []) + if handlers: + handlers[:] = [x for x in handlers if not find_cp(x)] + + removed.append(x for x in handlers if find_cp(x)) + + if removed: + for x in opener.handlers: + if find_cp(x): + x.add_parent(None) + opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)] + + +class AbemaLicenseHandler(compat_urllib_request.BaseHandler): + handler_order = 499 + STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' + HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E' + + def __init__(self, ie: 'AbemaTVIE'): + # the protcol that this should really handle is 'abematv-license://' + # abematv_license_open is just a placeholder for development purposes + # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510 + setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open')) + self.ie = ie + + def _get_videokey_from_ticket(self, ticket): + to_show = self.ie._downloader.params.get('verbose', False) + media_token = self.ie._get_media_token(to_show=to_show) + + license_response = self.ie._download_json( + 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False, + query={'t': media_token}, + data=json.dumps({ + 'kv': 'a', + 'lt': ticket + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + }) + + res = decode_base(license_response['k'], self.STRTABLE) + encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) + + h = hmac.new( + unhexlify(self.HKEY), + (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), + digestmod=hashlib.sha256) + enckey = bytes_to_intlist(h.digest()) + + return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) + + def abematv_license_open(self, url): + url = request_to_url(url) + ticket = compat_urllib_parse_urlparse(url).netloc + response_data = self._get_videokey_from_ticket(ticket) + return compat_urllib_response.addinfourl(io.BytesIO(response_data), headers={ + 'Content-Length': len(response_data), + }, url=url, code=200) + + +class AbemaTVBaseIE(InfoExtractor): + def _extract_breadcrumb_list(self, webpage, video_id): + for jld in re.finditer( + r'(?is)]+type=(["\']?)application/ld\+json\1[^>]*>(?P.+?)', + webpage): + jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False) + if jsonld: + if jsonld.get('@type') != 'BreadcrumbList': + continue + trav = traverse_obj(jsonld, ('itemListElement', ..., 'name')) + if trav: + return trav + return [] + + +class AbemaTVIE(AbemaTVBaseIE): + _VALID_URL = r'https?://abema\.tv/(?Pnow-on-air|video/episode|channels/.+?/slots)/(?P[^?/]+)' + _NETRC_MACHINE = 'abematv' + _TESTS = [{ + 'url': 'https://abema.tv/video/episode/194-25_s2_p1', + 'info_dict': { + 'id': '194-25_s2_p1', + 'title': '第1話 「チーズケーキ」 「モーニング再び」', + 'series': '異世界食堂2', + 'series_number': 2, + 'episode': '第1話 「チーズケーキ」 「モーニング再び」', + 'episode_number': 1, + }, + 'skip': 'expired', + }, { + 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d', + 'info_dict': { + 'id': 'E8tvAnMJ7a9a5d', + 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】', + 'series': 'ゆるキャン△ SEASON2', + 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】', + 'series_number': 2, + 'episode_number': 1, + 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17', + }, + 'skip': 'expired', + }, { + 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047', + 'info_dict': { + 'id': 'E8tvAnMJ7a9a5d', + 'title': '第5話『光射す』', + 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d', + 'thumbnail': r're:https://hayabusa\.io/.+', + 'series': '相棒', + 'episode': '第5話『光射す』', + }, + 'skip': 'expired', + }, { + 'url': 'https://abema.tv/now-on-air/abema-anime', + 'info_dict': { + 'id': 'abema-anime', + # this varies + # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】', + 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f', + 'is_live': True, + }, + 'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server', + }] + _USERTOKEN = None + _DEVICE_ID = None + _TIMETABLE = None + _MEDIATOKEN = None + + _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe' + + def _generate_aks(self, deviceid): + deviceid = deviceid.encode('utf-8') + # add 1 hour and then drop minute and secs + ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600) + time_struct = time.gmtime(ts_1hour) + ts_1hour_str = str(ts_1hour).encode('utf-8') + + tmp = None + + def mix_once(nonce): + nonlocal tmp + h = hmac.new(self._SECRETKEY, digestmod=hashlib.sha256) + h.update(nonce) + tmp = h.digest() + + def mix_tmp(count): + nonlocal tmp + for i in range(count): + mix_once(tmp) + + def mix_twist(nonce): + nonlocal tmp + mix_once(urlsafe_b64encode(tmp).rstrip(b'=') + nonce) + + mix_once(self._SECRETKEY) + mix_tmp(time_struct.tm_mon) + mix_twist(deviceid) + mix_tmp(time_struct.tm_mday % 5) + mix_twist(ts_1hour_str) + mix_tmp(time_struct.tm_hour % 5) + + return urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8') + + def _get_device_token(self): + if self._USERTOKEN: + return self._USERTOKEN + + self._DEVICE_ID = random_uuidv4() + aks = self._generate_aks(self._DEVICE_ID) + user_data = self._download_json( + 'https://api.abema.io/v1/users', None, note='Authorizing', + data=json.dumps({ + 'deviceId': self._DEVICE_ID, + 'applicationKeySecret': aks, + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + }) + self._USERTOKEN = user_data['token'] + + # don't allow adding it 2 times or more, though it's guarded + remove_opener(self._downloader, AbemaLicenseHandler) + add_opener(self._downloader, AbemaLicenseHandler(self)) + + return self._USERTOKEN + + def _get_media_token(self, invalidate=False, to_show=True): + if not invalidate and self._MEDIATOKEN: + return self._MEDIATOKEN + + self._MEDIATOKEN = self._download_json( + 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False, + query={ + 'osName': 'android', + 'osVersion': '6.0.1', + 'osLang': 'ja_JP', + 'osTimezone': 'Asia/Tokyo', + 'appId': 'tv.abema', + 'appVersion': '3.27.1' + }, headers={ + 'Authorization': 'bearer ' + self._get_device_token() + })['token'] + + return self._MEDIATOKEN + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + # No authentication to be performed + if not username: + return True + + if '@' in username: # don't strictly check if it's email address or not + ep, method = 'user/email', 'email' + else: + ep, method = 'oneTimePassword', 'userId' + + login_response = self._download_json( + f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in', + data=json.dumps({ + method: username, + 'password': password + }).encode('utf-8'), headers={ + 'Authorization': 'bearer ' + self._get_device_token(), + 'Origin': 'https://abema.tv', + 'Referer': 'https://abema.tv/', + 'Content-Type': 'application/json', + }) + + self._USERTOKEN = login_response['token'] + self._get_media_token(True) + + def _real_extract(self, url): + # starting download using infojson from this extractor is undefined behavior, + # and never be fixed in the future; you must trigger downloads by directly specifing URL. + # (unless there's a way to hook before downloading by extractor) + video_id, video_type = self._match_valid_url(url).group('id', 'type') + headers = { + 'Authorization': 'Bearer ' + self._get_device_token(), + } + video_type = video_type.split('/')[-1] + + webpage = self._download_webpage(url, video_id) + canonical_url = self._search_regex( + r'(.+?)', webpage, 'title', default=None) + if not title: + jsonld = None + for jld in re.finditer( + r'(?is)(?:)?]+type=(["\']?)application/ld\+json\1[^>]*>(?P.+?)', + webpage): + jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False) + if jsonld: + break + if jsonld: + title = jsonld.get('caption') + if not title and video_type == 'now-on-air': + if not self._TIMETABLE: + # cache the timetable because it goes to 5MiB in size (!!) + self._TIMETABLE = self._download_json( + 'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id, + headers=headers) + now = time_seconds(hours=9) + for slot in self._TIMETABLE.get('slots', []): + if slot.get('channelId') != video_id: + continue + if slot['startAt'] <= now and now < slot['endAt']: + title = slot['title'] + break + + # read breadcrumb on top of page + breadcrumb = self._extract_breadcrumb_list(webpage, video_id) + if breadcrumb: + # breadcrumb list translates to: (example is 1st test for this IE) + # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title) + # hence this works + info['series'] = breadcrumb[-2] + info['episode'] = breadcrumb[-1] + if not title: + title = info['episode'] + + description = self._html_search_regex( + (r'(.+?)

(.+?)[^?/]+)' + + _TESTS = [{ + 'url': 'https://abema.tv/video/title/90-1597', + 'info_dict': { + 'id': '90-1597', + 'title': 'シャッフルアイランド', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://abema.tv/video/title/193-132', + 'info_dict': { + 'id': '193-132', + 'title': '真心が届く~僕とスターのオフィス・ラブ!?~', + }, + 'playlist_mincount': 16, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + playlist_title, breadcrumb = None, self._extract_breadcrumb_list(webpage, video_id) + if breadcrumb: + playlist_title = breadcrumb[-1] + + playlist = [ + self.url_result(urljoin('https://abema.tv/', mobj.group(1))) + for mobj in re.finditer(r' Date: Fri, 25 Feb 2022 11:16:23 +0900 Subject: [PATCH 0727/2552] [fc2:live] Add extractor (#2418) Authored by: Lesmiscore --- yt_dlp/downloader/__init__.py | 2 + yt_dlp/downloader/fc2.py | 41 +++++++++ yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/fc2.py | 150 +++++++++++++++++++++++++++++++++ yt_dlp/utils.py | 4 +- 5 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 yt_dlp/downloader/fc2.py diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 76841993b..96d484dee 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -30,6 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N from .common import FileDownloader from .dash import DashSegmentsFD from .f4m import F4mFD +from .fc2 import FC2LiveFD from .hls import HlsFD from .http import HttpFD from .rtmp import RtmpFD @@ -58,6 +59,7 @@ PROTOCOL_MAP = { 'ism': IsmFD, 'mhtml': MhtmlFD, 'niconico_dmc': NiconicoDmcFD, + 'fc2_live': FC2LiveFD, 'websocket_frag': WebSocketFragmentFD, 'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat_replay': YoutubeLiveChatFD, diff --git a/yt_dlp/downloader/fc2.py b/yt_dlp/downloader/fc2.py new file mode 100644 index 000000000..157bcf23e --- /dev/null +++ b/yt_dlp/downloader/fc2.py @@ -0,0 +1,41 @@ +from __future__ import division, unicode_literals + +import threading + +from .common import FileDownloader +from .external import FFmpegFD + + +class FC2LiveFD(FileDownloader): + """ + Downloads FC2 live without being stopped.
+ Note, this is not a part of public API, and will be removed without notice. + DO NOT USE + """ + + def real_download(self, filename, info_dict): + ws = info_dict['ws'] + + heartbeat_lock = threading.Lock() + heartbeat_state = [None, 1] + + def heartbeat(): + try: + heartbeat_state[1] += 1 + ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) + except Exception: + self.to_screen('[fc2:live] Heartbeat failed') + + with heartbeat_lock: + heartbeat_state[0] = threading.Timer(30, heartbeat) + heartbeat_state[0]._daemonic = True + heartbeat_state[0].start() + + heartbeat() + + new_info_dict = info_dict.copy() + new_info_dict.update({ + 'ws': None, + 'protocol': 'live_ffmpeg', + }) + return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index fafa56d7b..ef1d6c14d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -478,6 +478,7 @@ from .faz import FazIE from .fc2 import ( FC2IE, FC2EmbedIE, + FC2LiveIE, ) from .fczenit import FczenitIE from .filmmodu import FilmmoduIE diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index 2c19a0c6e..7fc6b0e3d 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -1,14 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_parse_qs, ) from ..utils import ( ExtractorError, + WebSocketsWrapper, + has_websockets, + js_to_json, sanitized_Request, + std_headers, traverse_obj, + update_url_query, urlencode_postdata, urljoin, ) @@ -147,3 +154,146 @@ class FC2EmbedIE(InfoExtractor): 'title': title, 'thumbnail': thumbnail, } + + +class FC2LiveIE(InfoExtractor): + _VALID_URL = r'https?://live\.fc2\.com/(?P\d+)' + IE_NAME = 'fc2:live' + + _TESTS = [{ + 'url': 'https://live.fc2.com/57892267/', + 'info_dict': { + 'id': '57892267', + 'title': 'どこまで・・・', + 'uploader': 'あつあげ', + 'uploader_id': '57892267', + 'thumbnail': r're:https?://.+fc2.+', + }, + 'skip': 'livestream', + }] + + def _real_extract(self, url): + if not has_websockets: + raise ExtractorError('websockets library is not available. Please install it.', expected=True) + video_id = self._match_id(url) + webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id) + + self._set_cookie('live.fc2.com', 'js-player_size', '1') + + member_api = self._download_json( + 'https://live.fc2.com/api/memberApi.php', video_id, data=urlencode_postdata({ + 'channel': '1', + 'profile': '1', + 'user': '1', + 'streamid': video_id + }), note='Requesting member info') + + control_server = self._download_json( + 'https://live.fc2.com/api/getControlServer.php', video_id, note='Downloading ControlServer data', + data=urlencode_postdata({ + 'channel_id': video_id, + 'mode': 'play', + 'orz': '', + 'channel_version': member_api['data']['channel_data']['version'], + 'client_version': '2.1.0\n [1]', + 'client_type': 'pc', + 'client_app': 'browser_hls', + 'ipv6': '', + }), headers={'X-Requested-With': 'XMLHttpRequest'}) + self._set_cookie('live.fc2.com', 'l_ortkn', control_server['orz_raw']) + + ws_url = update_url_query(control_server['url'], {'control_token': control_server['control_token']}) + playlist_data = None + + self.to_screen('%s: Fetching HLS playlist info via WebSocket' % video_id) + ws = WebSocketsWrapper(ws_url, { + 'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:], + 'Origin': 'https://live.fc2.com', + 'Accept': '*/*', + 'User-Agent': std_headers['User-Agent'], + }) + ws.__enter__() + + self.write_debug('[debug] Sending HLS server request') + + while True: + recv = ws.recv() + if not recv: + continue + data = self._parse_json(recv, video_id, fatal=False) + if not data or not isinstance(data, dict): + continue + + if data.get('name') == 'connect_complete': + break + ws.send(r'{"name":"get_hls_information","arguments":{},"id":1}') + + while True: + recv = ws.recv() + if not recv: + continue + data = self._parse_json(recv, video_id, fatal=False) + if not data or not isinstance(data, dict): + continue + if data.get('name') == '_response_' and data.get('id') == 1: + self.write_debug('[debug] Goodbye.') + playlist_data = data + break + elif self._downloader.params.get('verbose', False): + if len(recv) > 100: + recv = recv[:100] + '...' + self.to_screen('[debug] Server said: %s' % recv) + + if not playlist_data: + raise ExtractorError('Unable to fetch HLS playlist info via WebSocket') + + formats = [] + for name, playlists in playlist_data['arguments'].items(): + if not isinstance(playlists, list): + continue + for pl in playlists: + if pl.get('status') == 0 and 'master_playlist' in pl.get('url'): + formats.extend(self._extract_m3u8_formats( + pl['url'], video_id, ext='mp4', m3u8_id=name, live=True, + headers={ + 'Origin': 'https://live.fc2.com', + 'Referer': url, + })) + + self._sort_formats(formats) + for fmt in formats: + fmt.update({ + 'protocol': 'fc2_live', + 'ws': ws, + }) + + title = self._html_search_meta(('og:title', 'twitter:title'), webpage, 'live title', fatal=False) + if not title: + title = self._html_extract_title(webpage, 'html title', fatal=False) + if title: + # remove service name in + title = re.sub(r'\s+-\s+.+$', '', title) + uploader = None + if title: + match = self._search_regex(r'^(.+?)\s*\[(.+?)\]$', title, 'title and uploader', default=None, group=(1, 2)) + if match and all(match): + title, uploader = match + + live_info_view = self._search_regex(r'(?s)liveInfoView\s*:\s*({.+?}),\s*premiumStateView', webpage, 'user info', fatal=False) or None + if live_info_view: + # remove jQuery code from object literal + live_info_view = re.sub(r'\$\(.+?\)[^,]+,', '"",', live_info_view) + live_info_view = self._parse_json(js_to_json(live_info_view), video_id) + + return { + 'id': video_id, + 'title': title or traverse_obj(live_info_view, 'title'), + 'description': self._html_search_meta( + ('og:description', 'twitter:description'), + webpage, 'live description', fatal=False) or traverse_obj(live_info_view, 'info'), + 'formats': formats, + 'uploader': uploader or traverse_obj(live_info_view, 'name'), + 'uploader_id': video_id, + 'thumbnail': traverse_obj(live_info_view, 'thumb'), + 'is_live': True, + } diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 012a115ba..6ec8da11b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import asyncio +import atexit import base64 import binascii import calendar @@ -5348,6 +5349,7 @@ class WebSocketsWrapper(): self.conn = compat_websockets.connect( url, extra_headers=headers, ping_interval=None, close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) + atexit.register(self.__exit__, None, None, None) def __enter__(self): self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) @@ -5364,7 +5366,7 @@ class WebSocketsWrapper(): return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop) finally: self.loop.close() - self.r_cancel_all_tasks(self.loop) + self._cancel_all_tasks(self.loop) # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class From f0734e1190630f4cefa4a2028884413f54310f82 Mon Sep 17 00:00:00 2001 From: Lesmiscore <nao20010128@gmail.com> Date: Fri, 25 Feb 2022 13:22:17 +0900 Subject: [PATCH 0728/2552] [downloader/fragment] Fix problem where multiple threads can share one iterator which causes "ValueError: generator already executing" error Closes #2881 --- yt_dlp/downloader/fragment.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 082581b54..7b213cd5f 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -410,17 +410,16 @@ class FragmentFD(FileDownloader): def bindoj_result(future): return future.result() + def interrupt_trigger_iter(fg): + for f in fg: + if not interrupt_trigger[0]: + break + yield f + spins = [] for idx, (ctx, fragments, info_dict) in enumerate(args): tpe = FTPE(math.ceil(max_workers / max_progress)) - - def interrupt_trigger_iter(): - for f in fragments: - if not interrupt_trigger[0]: - break - yield f - - job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(), info_dict, tpe) + job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe) spins.append((tpe, job)) result = True From 195c22840c594c8f9229cb47ffec2a8984c53a0c Mon Sep 17 00:00:00 2001 From: Lesmiscore <nao20010128@gmail.com> Date: Sat, 26 Feb 2022 12:34:36 +0900 Subject: [PATCH 0729/2552] [downloader/fragment] Ignore `FileNotFoundError` when downloading livestreams when `--live-from-start` is used for YouTube and the live ends, request for the last segment prematurely ends (or 404, 403). this is causing lack of the file and `FileNotFoundError` lacking segment doesn't have any data, so it's safe to ignore --- yt_dlp/downloader/fragment.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 7b213cd5f..24f4ec959 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -137,7 +137,12 @@ class FragmentFD(FileDownloader): if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') ctx['fragment_filename_sanitized'] = fragment_filename - return True, self._read_fragment(ctx) + try: + return True, self._read_fragment(ctx) + except FileNotFoundError: + if not info_dict.get('is_live'): + raise + return False, None def _read_fragment(self, ctx): down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') From 93c8410d333c9a61488448c29aabb6fa831e2991 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" <nao20010128@gmail.com> Date: Mon, 28 Feb 2022 13:10:54 +0900 Subject: [PATCH 0730/2552] [downloader/fragment] Fix bugs around resuming with Range (#2901) Authored by: Lesmiscore --- yt_dlp/downloader/fragment.py | 2 +- yt_dlp/downloader/http.py | 48 ++++++++++++++++++++++------------- yt_dlp/utils.py | 10 ++++++++ 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 24f4ec959..83a9f81b6 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -178,7 +178,7 @@ class FragmentFD(FileDownloader): dl = HttpQuietDownloader( self.ydl, { - 'continuedl': True, + 'continuedl': self.params.get('continuedl', True), 'quiet': self.params.get('quiet'), 'noprogress': True, 'ratelimit': self.params.get('ratelimit'), diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 34a1eb59b..10ba61024 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -5,7 +5,6 @@ import os import socket import time import random -import re from .common import FileDownloader from ..compat import ( @@ -16,6 +15,7 @@ from ..utils import ( ContentTooShortError, encodeFilename, int_or_none, + parse_http_range, sanitized_Request, ThrottledDownload, write_xattr, @@ -59,6 +59,9 @@ class HttpFD(FileDownloader): ctx.chunk_size = None throttle_start = None + # parse given Range + req_start, req_end, _ = parse_http_range(headers.get('Range')) + if self.params.get('continuedl', True): # Establish possible resume length if os.path.isfile(encodeFilename(ctx.tmpfilename)): @@ -91,6 +94,9 @@ class HttpFD(FileDownloader): if not is_test and chunk_size else chunk_size) if ctx.resume_len > 0: range_start = ctx.resume_len + if req_start is not None: + # offset the beginning of Range to be within request + range_start += req_start if ctx.is_resume: self.report_resuming_byte(ctx.resume_len) ctx.open_mode = 'ab' @@ -99,7 +105,17 @@ class HttpFD(FileDownloader): else: range_start = None ctx.is_resume = False - range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None + + if ctx.chunk_size: + chunk_aware_end = range_start + ctx.chunk_size - 1 + # we're not allowed to download outside Range + range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end) + elif req_end is not None: + # there's no need for chunked downloads, so download until the end of Range + range_end = req_end + else: + range_end = None + if range_end and ctx.data_len is not None and range_end >= ctx.data_len: range_end = ctx.data_len - 1 has_range = range_start is not None @@ -124,23 +140,19 @@ class HttpFD(FileDownloader): # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') - if content_range: - content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) + content_range_start, content_range_end, content_len = parse_http_range(content_range) + if content_range_start is not None and range_start == content_range_start: # Content-Range is present and matches requested Range, resume is possible - if content_range_m: - if range_start == int(content_range_m.group(1)): - content_range_end = int_or_none(content_range_m.group(2)) - content_len = int_or_none(content_range_m.group(3)) - accept_content_len = ( - # Non-chunked download - not ctx.chunk_size - # Chunked download and requested piece or - # its part is promised to be served - or content_range_end == range_end - or content_len < range_end) - if accept_content_len: - ctx.data_len = content_len - return + accept_content_len = ( + # Non-chunked download + not ctx.chunk_size + # Chunked download and requested piece or + # its part is promised to be served + or content_range_end == range_end + or content_len < range_end) + if accept_content_len: + ctx.data_len = content_len + return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 6ec8da11b..cc08bd130 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5252,6 +5252,16 @@ def join_nonempty(*values, delim='-', from_dict=None): return delim.join(map(str, filter(None, values))) +def parse_http_range(range): + """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """ + if not range: + return None, None, None + crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range) + if not crg: + return None, None, None + return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3)) + + class Config: own_args = None filename = None From 7c219ea60112bc79a3f4306c29c0c7942b19f26b Mon Sep 17 00:00:00 2001 From: coletdev <coletdjnz@protonmail.com> Date: Tue, 1 Mar 2022 18:08:19 +1300 Subject: [PATCH 0731/2552] [youtube:tab] Follow redirect to regional channel (#2884) Closes #2694 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 47b3c5a85..602d48e3c 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3950,13 +3950,14 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): break @staticmethod - def _extract_selected_tab(tabs): + def _extract_selected_tab(tabs, fatal=True): for tab in tabs: renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {} if renderer.get('selected') is True: return renderer else: - raise ExtractorError('Unable to find selected tab') + if fatal: + raise ExtractorError('Unable to find selected tab') @classmethod def _extract_uploader(cls, data): @@ -4229,7 +4230,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): self.report_warning(error_to_compat_str(e)) break - if dict_get(data, ('contents', 'currentVideoEndpoint')): + if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')): break last_error = 'Incomplete yt initial data received' @@ -4248,7 +4249,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) # Reject webpage data if redirected to home page without explicitly requesting selected_tab = self._extract_selected_tab(traverse_obj( - data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[])) or {} + data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {} if (url != 'https://www.youtube.com/feed/recommended' and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])): @@ -4280,7 +4281,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): return self._extract_response( item_id=item_id, query=params, ep=ep, headers=headers, ytcfg=ytcfg, fatal=fatal, default_client=default_client, - check_get_keys=('contents', 'currentVideoEndpoint')) + check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')) err_note = 'Failed to resolve url (does the playlist exist?)' if fatal: raise ExtractorError(err_note, expected=True) @@ -4981,6 +4982,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'skip_download': True, 'extractor_args': {'youtubetab': {'skip': ['webpage']}} }, + }, { + 'note': 'non-standard redirect to regional channel', + 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', + 'only_matching': True }] @classmethod @@ -5053,6 +5058,16 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): data, ytcfg = self._extract_data(url, item_id) + # YouTube may provide a non-standard redirect to the regional channel + # See: https://github.com/yt-dlp/yt-dlp/issues/2694 + redirect_url = traverse_obj( + data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False) + if redirect_url and 'no-youtube-channel-redirect' not in compat_opts: + redirect_url = ''.join(( + urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post'])) + self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}') + return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key()) + tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list) if tabs: selected_tab = self._extract_selected_tab(tabs) From 334b1c480037ed5559ad53b435f13fbf70db5dd0 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 2 Mar 2022 19:16:58 +0530 Subject: [PATCH 0732/2552] [rokfin] Add extractor (#1534) Authored by: P-reducible, pukkandan --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/rokfin.py | 158 +++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 yt_dlp/extractor/rokfin.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ef1d6c14d..9cb1702f1 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1333,6 +1333,7 @@ from .reverbnation import ReverbNationIE from .rice import RICEIE from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE +from .rokfin import RokfinIE from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE from .rozhlas import RozhlasIE diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py new file mode 100644 index 000000000..8b609cfb9 --- /dev/null +++ b/yt_dlp/extractor/rokfin.py @@ -0,0 +1,158 @@ +# coding: utf-8 + +import itertools +from datetime import datetime + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + float_or_none, + format_field, + int_or_none, + str_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) + + +class RokfinIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)' + _TESTS = [{ + 'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change', + 'info_dict': { + 'id': 'post/57548', + 'ext': 'mp4', + 'title': 'Mitt Romney\'s Crazy Solution To Climate Change', + 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', + 'upload_date': '20211023', + 'timestamp': 1634998029, + 'channel': 'Jimmy Dore', + 'channel_id': 65429, + 'channel_url': 'https://rokfin.com/TheJimmyDoreShow', + 'duration': 213.0, + 'availability': 'public', + 'live_status': 'not_live', + 'dislike_count': int, + 'like_count': int, + } + }, { + 'url': 'https://rokfin.com/post/223/Julian-Assange-Arrested-Streaming-In-Real-Time', + 'info_dict': { + 'id': 'post/223', + 'ext': 'mp4', + 'title': 'Julian Assange Arrested: Streaming In Real Time', + 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', + 'upload_date': '20190412', + 'timestamp': 1555052644, + 'channel': 'Ron Placone', + 'channel_id': 10, + 'channel_url': 'https://rokfin.com/RonPlacone', + 'availability': 'public', + 'live_status': 'not_live', + 'dislike_count': int, + 'like_count': int, + 'tags': ['FreeThinkingMedia^', 'RealProgressives^'], + } + }, { + 'url': 'https://www.rokfin.com/stream/10543/Its-A-Crazy-Mess-Regional-Director-Blows-Whistle-On-Pfizers-Vaccine-Trial-Data', + 'info_dict': { + 'id': 'stream/10543', + 'ext': 'mp4', + 'title': '"It\'s A Crazy Mess" Regional Director Blows Whistle On Pfizer\'s Vaccine Trial Data', + 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', + 'description': 'md5:324ce2d3e3b62e659506409e458b9d8e', + 'channel': 'Ryan Cristián', + 'channel_id': 53856, + 'channel_url': 'https://rokfin.com/TLAVagabond', + 'availability': 'public', + 'is_live': False, + 'was_live': True, + 'live_status': 'was_live', + 'timestamp': 1635874720, + 'release_timestamp': 1635874720, + 'release_date': '20211102', + 'upload_date': '20211102', + 'dislike_count': int, + 'like_count': int, + 'tags': ['FreeThinkingMedia^'], + } + }] + + def _real_extract(self, url): + video_id, video_type = self._match_valid_url(url).group('id', 'type') + + metadata = self._download_json(f'https://prod-api-v2.production.rokfin.com/api/v2/public/{video_id}', + video_id, fatal=False) or {} + + scheduled = unified_timestamp(metadata.get('scheduledAt')) + live_status = ('was_live' if metadata.get('stoppedAt') + else 'is_upcoming' if scheduled + else 'is_live' if video_type == 'stream' + else 'not_live') + + video_url = traverse_obj(metadata, 'url', ('content', 'contentUrl'), expected_type=url_or_none) + formats, subtitles = [{'url': video_url}] if video_url else [], {} + if determine_ext(video_url) == 'm3u8': + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, fatal=False, live=live_status == 'is_live') + + if not formats: + if metadata.get('premiumPlan'): + self.raise_login_required('This video is only available to premium users', True, method='cookies') + elif scheduled: + self.raise_no_formats( + f'Stream is offline; sheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}', + video_id=video_id, expected=True) + self._sort_formats(formats) + + uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username')) + timestamp = (scheduled or float_or_none(metadata.get('postedAtMilli'), 1000) + or unified_timestamp(metadata.get('creationDateTime'))) + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': str_or_none(traverse_obj(metadata, 'title', ('content', 'contentTitle'))), + 'duration': float_or_none(traverse_obj(metadata, ('content', 'duration'))), + 'thumbnail': url_or_none(traverse_obj(metadata, 'thumbnail', ('content', 'thumbnailUrl1'))), + 'description': str_or_none(traverse_obj(metadata, 'description', ('content', 'contentDescription'))), + 'like_count': int_or_none(metadata.get('likeCount')), + 'dislike_count': int_or_none(metadata.get('dislikeCount')), + 'channel': str_or_none(traverse_obj(metadata, ('createdBy', 'name'), ('creator', 'name'))), + 'channel_id': traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id')), + 'channel_url': url_or_none(f'https://rokfin.com/{uploader}') if uploader else None, + 'timestamp': timestamp, + 'release_timestamp': timestamp if live_status != 'not_live' else None, + 'tags': traverse_obj(metadata, ('tags', ..., 'title'), expected_type=str_or_none), + 'live_status': live_status, + 'availability': self._availability( + needs_premium=bool(metadata.get('premiumPlan')), + is_private=False, needs_subscription=False, needs_auth=False, is_unlisted=False), + # 'comment_count': metadata.get('numComments'), # Data provided by website is wrong + '__post_extractor': self.extract_comments(video_id) if video_type == 'post' else None, + } + + def _get_comments(self, video_id): + pages_total = None + for page_n in itertools.count(): + raw_comments = self._download_json( + f'https://prod-api-v2.production.rokfin.com/api/v2/public/comment?postId={video_id[5:]}&page={page_n}&size=50', + video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}', + fatal=False) or {} + + for comment in raw_comments.get('content') or []: + yield { + 'text': str_or_none(comment.get('comment')), + 'author': str_or_none(comment.get('name')), + 'id': comment.get('commentId'), + 'author_id': comment.get('userId'), + 'parent': 'root', + 'like_count': int_or_none(comment.get('numLikes')), + 'dislike_count': int_or_none(comment.get('numDislikes')), + 'timestamp': unified_timestamp(comment.get('postedAt')) + } + + pages_total = int_or_none(raw_comments.get('totalPages')) + if not raw_comments.get('content') or raw_comments.get('last') is not False or page_n > (pages_total or 0): + return From 7eaf7f9abafb287e7c0e0e019430f4f8ed860e05 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 2 Mar 2022 21:16:25 +0530 Subject: [PATCH 0733/2552] [rokfin] Add stack and channel extractors (#1534) Authored by: P-reducible, pukkandan --- README.md | 4 ++ yt_dlp/extractor/extractors.py | 6 +- yt_dlp/extractor/rokfin.py | 110 +++++++++++++++++++++++++++++++-- 3 files changed, 113 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 70b2e202f..1aab0ba94 100644 --- a/README.md +++ b/README.md @@ -1698,6 +1698,10 @@ The following extractors use this feature: * `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`. (e.g. `20.2.1`) * `manifest_app_version`: Numeric app version to call mobile APIs with. (e.g. `221`) +#### rokfinchannel +* `tab`: Which tab to download. One of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`. (E.g. `rokfinchannel:tab=streams`) + + NOTE: These options may be changed/removed in the future without concern for backward compatibility <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 9cb1702f1..1a94c8b75 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1333,7 +1333,11 @@ from .reverbnation import ReverbNationIE from .rice import RICEIE from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE -from .rokfin import RokfinIE +from .rokfin import ( + RokfinIE, + RokfinStackIE, + RokfinChannelIE, +) from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE from .rozhlas import RozhlasIE diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 8b609cfb9..79a5b2336 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -1,11 +1,11 @@ # coding: utf-8 - import itertools from datetime import datetime from .common import InfoExtractor from ..utils import ( determine_ext, + ExtractorError, float_or_none, format_field, int_or_none, @@ -16,6 +16,9 @@ from ..utils import ( ) +_API_BASE_URL = 'https://prod-api-v2.production.rokfin.com/api/v2/public/' + + class RokfinIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)' _TESTS = [{ @@ -82,8 +85,7 @@ class RokfinIE(InfoExtractor): def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') - metadata = self._download_json(f'https://prod-api-v2.production.rokfin.com/api/v2/public/{video_id}', - video_id, fatal=False) or {} + metadata = self._download_json(f'{_API_BASE_URL}{video_id}', video_id) scheduled = unified_timestamp(metadata.get('scheduledAt')) live_status = ('was_live' if metadata.get('stoppedAt') @@ -137,7 +139,7 @@ class RokfinIE(InfoExtractor): pages_total = None for page_n in itertools.count(): raw_comments = self._download_json( - f'https://prod-api-v2.production.rokfin.com/api/v2/public/comment?postId={video_id[5:]}&page={page_n}&size=50', + f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50', video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}', fatal=False) or {} @@ -153,6 +155,102 @@ class RokfinIE(InfoExtractor): 'timestamp': unified_timestamp(comment.get('postedAt')) } - pages_total = int_or_none(raw_comments.get('totalPages')) - if not raw_comments.get('content') or raw_comments.get('last') is not False or page_n > (pages_total or 0): + pages_total = int_or_none(raw_comments.get('totalPages')) or None + is_last = raw_comments.get('last') + if not raw_comments.get('content') or is_last or (page_n > pages_total if pages_total else is_last is not False): return + + +class RokfinPlaylistBaseIE(InfoExtractor): + _TYPES = { + 'video': 'post', + 'audio': 'post', + 'stream': 'stream', + 'dead_stream': 'stream', + 'stack': 'stack', + } + + def _get_video_data(self, metadata): + for content in metadata.get('content') or []: + media_type = self._TYPES.get(content.get('mediaType')) + video_id = content.get('id') if media_type == 'post' else content.get('mediaId') + if not media_type or not video_id: + continue + + yield self.url_result(f'https://rokfin.com/{media_type}/{video_id}', video_id=f'{media_type}/{video_id}', + video_title=str_or_none(traverse_obj(content, ('content', 'contentTitle')))) + + +class RokfinStackIE(RokfinPlaylistBaseIE): + IE_NAME = 'rokfin:stack' + _VALID_URL = r'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020', + 'playlist_count': 8, + 'info_dict': { + 'id': '271', + }, + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + return self.playlist_result(self._get_video_data( + self._download_json(f'{_API_BASE_URL}stack/{list_id}', list_id)), list_id) + + +class RokfinChannelIE(RokfinPlaylistBaseIE): + IE_NAME = 'rokfin:channel' + _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$' + _TESTS = [{ + 'url': 'https://rokfin.com/TheConvoCouch', + 'playlist_mincount': 100, + 'info_dict': { + 'id': '12071-new', + 'title': 'TheConvoCouch - New', + 'description': 'md5:bb622b1bca100209b91cd685f7847f06', + }, + }] + + _TABS = { + 'new': 'posts', + 'top': 'top', + 'videos': 'video', + 'podcasts': 'audio', + 'streams': 'stream', + 'stacks': 'stack', + } + + def _real_initialize(self): + self._validate_extractor_args() + + def _validate_extractor_args(self): + requested_tabs = self._configuration_arg('tab', None) + if requested_tabs is not None and (len(requested_tabs) > 1 or requested_tabs[0] not in self._TABS): + raise ExtractorError(f'Invalid extractor-arg "tab". Must be one of {", ".join(self._TABS)}', expected=True) + + def _entries(self, channel_id, channel_name, tab): + pages_total = None + for page_n in itertools.count(0): + if tab in ('posts', 'top'): + data_url = f'{_API_BASE_URL}user/{channel_name}/{tab}?page={page_n}&size=50' + else: + data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}' + metadata = self._download_json( + data_url, channel_name, + note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, template=" of %s")}') + + yield from self._get_video_data(metadata) + pages_total = int_or_none(metadata.get('totalPages')) or None + is_last = metadata.get('last') + if is_last or (page_n > pages_total if pages_total else is_last is not False): + return + + def _real_extract(self, url): + channel_name = self._match_id(url) + channel_info = self._download_json(f'{_API_BASE_URL}user/{channel_name}', channel_name) + channel_id = channel_info['id'] + tab = self._configuration_arg('tab', default=['new'])[0] + + return self.playlist_result( + self._entries(channel_id, channel_name, self._TABS[tab]), + f'{channel_id}-{tab}', f'{channel_name} - {tab.title()}', str_or_none(channel_info.get('description'))) From 02fc6feb6e9b83d8756886efb91c0bf61b4c4de7 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" <nao20010128@gmail.com> Date: Thu, 3 Mar 2022 02:06:34 +0900 Subject: [PATCH 0734/2552] [mirrativ] Cleanup extractor code (#2925) Authored by: Lesmiscore --- yt_dlp/extractor/mirrativ.py | 83 +++++++++++++++--------------------- 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py index 81aea54f6..2111de615 100644 --- a/yt_dlp/extractor/mirrativ.py +++ b/yt_dlp/extractor/mirrativ.py @@ -19,9 +19,25 @@ class MirrativBaseIE(InfoExtractor): class MirrativIE(MirrativBaseIE): IE_NAME = 'mirrativ' _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)' - LIVE_API_URL = 'https://www.mirrativ.com/api/live/live?live_id=%s' TESTS = [{ + 'url': 'https://mirrativ.com/live/UQomuS7EMgHoxRHjEhNiHw', + 'info_dict': { + 'id': 'UQomuS7EMgHoxRHjEhNiHw', + 'title': 'ねむいぃ、。『参加型』🔰jcが初めてやるCOD✨初見さん大歓迎💗', + 'is_live': True, + 'description': 'md5:bfcd8f77f2fab24c3c672e5620f3f16e', + 'thumbnail': r're:https?://.+', + 'uploader': '# あ ち ゅ 。💡', + 'uploader_id': '118572165', + 'duration': None, + 'view_count': 1241, + 'release_timestamp': 1646229192, + 'timestamp': 1646229167, + 'was_live': False, + }, + 'skip': 'livestream', + }, { 'url': 'https://mirrativ.com/live/POxyuG1KmW2982lqlDTuPw', 'only_matching': True, }] @@ -29,12 +45,11 @@ class MirrativIE(MirrativBaseIE): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://www.mirrativ.com/live/%s' % video_id, video_id) - live_response = self._download_json(self.LIVE_API_URL % video_id, video_id) + live_response = self._download_json(f'https://www.mirrativ.com/api/live/live?live_id={video_id}', video_id) self.assert_error(live_response) hls_url = dict_get(live_response, ('archive_url_hls', 'streaming_url_hls')) is_live = bool(live_response.get('is_live')) - was_live = bool(live_response.get('is_archive')) if not hls_url: raise ExtractorError('Neither archive nor live is available.', expected=True) @@ -42,55 +57,29 @@ class MirrativIE(MirrativBaseIE): hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', live=is_live) - rtmp_url = live_response.get('streaming_url_edge') - if rtmp_url: - keys_to_copy = ('width', 'height', 'vcodec', 'acodec', 'tbr') - fmt = { - 'format_id': 'rtmp', - 'url': rtmp_url, - 'protocol': 'rtmp', - 'ext': 'mp4', - } - fmt.update({k: traverse_obj(formats, (0, k)) for k in keys_to_copy}) - formats.append(fmt) self._sort_formats(formats) - title = self._og_search_title(webpage, default=None) or self._search_regex( - r'<title>\s*(.+?) - Mirrativ\s*', webpage) or live_response.get('title') - description = live_response.get('description') - thumbnail = live_response.get('image_url') - - duration = try_get(live_response, lambda x: x['ended_at'] - x['started_at']) - view_count = live_response.get('total_viewer_num') - release_timestamp = live_response.get('started_at') - timestamp = live_response.get('created_at') - - owner = live_response.get('owner', {}) - uploader = owner.get('name') - uploader_id = owner.get('user_id') - return { 'id': video_id, - 'title': title, + 'title': self._og_search_title(webpage, default=None) or self._search_regex( + r'\s*(.+?) - Mirrativ\s*', webpage) or live_response.get('title'), 'is_live': is_live, - 'description': description, + 'description': live_response.get('description'), 'formats': formats, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'duration': duration, - 'view_count': view_count, - 'release_timestamp': release_timestamp, - 'timestamp': timestamp, - 'was_live': was_live, + 'thumbnail': live_response.get('image_url'), + 'uploader': traverse_obj(live_response, ('owner', 'name')), + 'uploader_id': traverse_obj(live_response, ('owner', 'user_id')), + 'duration': try_get(live_response, lambda x: x['ended_at'] - x['started_at']) if not is_live else None, + 'view_count': live_response.get('total_viewer_num'), + 'release_timestamp': live_response.get('started_at'), + 'timestamp': live_response.get('created_at'), + 'was_live': bool(live_response.get('is_archive')), } class MirrativUserIE(MirrativBaseIE): IE_NAME = 'mirrativ:user' _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/user/(?P\d+)' - LIVE_HISTORY_API_URL = 'https://www.mirrativ.com/api/live/live_history?user_id=%s&page=%d' - USER_INFO_API_URL = 'https://www.mirrativ.com/api/user/profile?user_id=%s' _TESTS = [{ # Live archive is available up to 3 days @@ -104,8 +93,8 @@ class MirrativUserIE(MirrativBaseIE): page = 1 while page is not None: api_response = self._download_json( - self.LIVE_HISTORY_API_URL % (user_id, page), user_id, - note='Downloading page %d' % page) + f'https://www.mirrativ.com/api/live/live_history?user_id={user_id}&page={page}', user_id, + note=f'Downloading page {page}') self.assert_error(api_response) lives = api_response.get('lives') if not lives: @@ -123,12 +112,10 @@ class MirrativUserIE(MirrativBaseIE): def _real_extract(self, url): user_id = self._match_id(url) user_info = self._download_json( - self.USER_INFO_API_URL % user_id, user_id, + f'https://www.mirrativ.com/api/user/profile?user_id={user_id}', user_id, note='Downloading user info', fatal=False) self.assert_error(user_info) - uploader = user_info.get('name') - description = user_info.get('description') - - entries = self._entries(user_id) - return self.playlist_result(entries, user_id, uploader, description) + return self.playlist_result( + self._entries(user_id), user_id, + user_info.get('name'), user_info.get('description')) From f494ddada865afc8af916225325d2dd00c5ca233 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Thu, 3 Mar 2022 02:08:13 +0900 Subject: [PATCH 0735/2552] [niconico] Add playlist extractors and refactor (#2915) Authored by: Lesmiscore --- yt_dlp/extractor/extractors.py | 3 +- yt_dlp/extractor/niconico.py | 187 +++++++++++++++++++++++++++------ 2 files changed, 155 insertions(+), 35 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1a94c8b75..0f26dc24f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1011,11 +1011,12 @@ from .nick import ( NickNightIE, NickRuIE, ) - from .niconico import ( NiconicoIE, NiconicoPlaylistIE, NiconicoUserIE, + NiconicoSeriesIE, + NiconicoHistoryIE, NicovideoSearchDateIE, NicovideoSearchIE, NicovideoSearchURLIE, diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 6e561bee5..8f56fc95b 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import datetime import itertools +import functools import json import re @@ -12,6 +13,7 @@ from ..compat import ( compat_str, compat_parse_qs, compat_urllib_parse_urlparse, + compat_HTTPError, ) from ..utils import ( ExtractorError, @@ -24,7 +26,9 @@ from ..utils import ( PostProcessingError, remove_start, str_or_none, + traverse_obj, try_get, + unescapeHTML, unified_timestamp, urlencode_postdata, xpath_text, @@ -606,8 +610,61 @@ class NiconicoIE(InfoExtractor): } -class NiconicoPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P\d+)' +class NiconicoPlaylistBaseIE(InfoExtractor): + _PAGE_SIZE = 100 + + _API_HEADERS = { + 'X-Frontend-ID': '6', + 'X-Frontend-Version': '0', + 'X-Niconico-Language': 'en-us' + } + + def _call_api(self, list_id, resource, query): + "Implement this in child class" + pass + + @staticmethod + def _parse_owner(item): + return { + 'uploader': traverse_obj(item, ('owner', 'name')), + 'uploader_id': traverse_obj(item, ('owner', 'id')), + } + + def _fetch_page(self, list_id, page): + page += 1 + resp = self._call_api(list_id, 'page %d' % page, { + 'page': page, + 'pageSize': self._PAGE_SIZE, + }) + # this is needed to support both mylist and user + for video in traverse_obj(resp, ('items', ..., ('video', None))) or []: + video_id = video.get('id') + if not video_id: + # skip {"video": {"id": "blablabla", ...}} + continue + count = video.get('count') or {} + get_count = lambda x: int_or_none(count.get(x)) + yield { + '_type': 'url', + 'id': video_id, + 'title': video.get('title'), + 'url': f'https://www.nicovideo.jp/watch/{video_id}', + 'description': video.get('shortDescription'), + 'duration': int_or_none(video.get('duration')), + 'view_count': get_count('view'), + 'comment_count': get_count('comment'), + 'thumbnail': traverse_obj(video, ('thumbnail', ('nHdUrl', 'largeUrl', 'listingUrl', 'url'))), + 'ie_key': NiconicoIE.ie_key(), + **self._parse_owner(video), + } + + def _entries(self, list_id): + return OnDemandPagedList(functools.partial(self._fetch_page, list_id), self._PAGE_SIZE) + + +class NiconicoPlaylistIE(NiconicoPlaylistBaseIE): + IE_NAME = 'niconico:playlist' + _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/(?:user/\d+/)?(?:my/)?mylist/(?:#/)?(?P\d+)' _TESTS = [{ 'url': 'http://www.nicovideo.jp/mylist/27411728', @@ -618,48 +675,110 @@ class NiconicoPlaylistIE(InfoExtractor): 'uploader': 'のっく', 'uploader_id': '805442', }, - 'playlist_mincount': 225, + 'playlist_mincount': 291, }, { 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728', 'only_matching': True, + }, { + 'url': 'https://www.nicovideo.jp/my/mylist/#/68048635', + 'only_matching': True, }] - _API_HEADERS = { - 'X-Frontend-ID': '6', - 'X-Frontend-Version': '0' - } + def _call_api(self, list_id, resource, query): + return self._download_json( + f'https://nvapi.nicovideo.jp/v2/mylists/{list_id}', list_id, + f'Downloading {resource}', query=query, + headers=self._API_HEADERS)['data']['mylist'] def _real_extract(self, url): list_id = self._match_id(url) + mylist = self._call_api(list_id, 'list', { + 'pageSize': 1, + }) + return self.playlist_result( + self._entries(list_id), list_id, + mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist)) - def get_page_data(pagenum, pagesize): - return self._download_json( - 'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id, - query={'page': 1 + pagenum, 'pageSize': pagesize}, - headers=self._API_HEADERS).get('data').get('mylist') - - data = get_page_data(0, 1) - title = data.get('name') - description = data.get('description') - uploader = data.get('owner').get('name') - uploader_id = data.get('owner').get('id') - - def pagefunc(pagenum): - data = get_page_data(pagenum, 25) - return ({ - '_type': 'url', - 'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'), - } for item in data.get('items')) - return { - '_type': 'playlist', - 'id': list_id, - 'title': title, - 'description': description, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'entries': OnDemandPagedList(pagefunc, 25), - } +class NiconicoSeriesIE(InfoExtractor): + IE_NAME = 'niconico:series' + _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P\d+)' + + _TESTS = [{ + 'url': 'https://www.nicovideo.jp/series/110226', + 'info_dict': { + 'id': '110226', + 'title': 'ご立派ァ!のシリーズ', + }, + 'playlist_mincount': 10, # as of 2021/03/17 + }, { + 'url': 'https://www.nicovideo.jp/series/12312/', + 'info_dict': { + 'id': '12312', + 'title': 'バトルスピリッツ お勧めカード紹介(調整中)', + }, + 'playlist_mincount': 97, # as of 2021/03/17 + }, { + 'url': 'https://nico.ms/series/203559', + 'only_matching': True, + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id) + + title = self._search_regex( + (r'「(.+)(全', + r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'), + webpage, 'title', fatal=False) + if title: + title = unescapeHTML(title) + playlist = [ + self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id) + for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)] + return self.playlist_result(playlist, list_id, title) + + +class NiconicoHistoryIE(NiconicoPlaylistBaseIE): + IE_NAME = 'niconico:history' + IE_DESC = 'NicoNico user history. Requires cookies.' + _VALID_URL = r'https?://(?:www\.|sp\.)?nicovideo\.jp/my/history' + + _TESTS = [{ + 'note': 'PC page, with /video', + 'url': 'https://www.nicovideo.jp/my/history/video', + 'only_matching': True, + }, { + 'note': 'PC page, without /video', + 'url': 'https://www.nicovideo.jp/my/history', + 'only_matching': True, + }, { + 'note': 'mobile page, with /video', + 'url': 'https://sp.nicovideo.jp/my/history/video', + 'only_matching': True, + }, { + 'note': 'mobile page, without /video', + 'url': 'https://sp.nicovideo.jp/my/history', + 'only_matching': True, + }] + + def _call_api(self, list_id, resource, query): + return self._download_json( + 'https://nvapi.nicovideo.jp/v1/users/me/watch/history', 'history', + f'Downloading {resource}', query=query, + headers=self._API_HEADERS)['data'] + + def _real_extract(self, url): + list_id = 'history' + try: + mylist = self._call_api(list_id, 'list', { + 'pageSize': 1, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + self.raise_login_required('You have to be logged in to get your watch history') + raise + return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist)) class NicovideoSearchBaseIE(InfoExtractor): From a5c0c202528be023761d8e3a9a2e9006a7df832e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 2 Mar 2022 22:59:01 +0530 Subject: [PATCH 0736/2552] [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos `live` parameter already handles changing the protocol --- yt_dlp/extractor/arcpublishing.py | 3 +-- yt_dlp/extractor/dailymotion.py | 4 +--- yt_dlp/extractor/imggaming.py | 2 +- yt_dlp/extractor/threeqsdn.py | 3 +-- yt_dlp/extractor/tv2.py | 8 ++------ yt_dlp/extractor/tvnet.py | 4 +--- yt_dlp/extractor/vgtv.py | 4 +--- yt_dlp/extractor/vimeo.py | 3 +-- 8 files changed, 9 insertions(+), 22 deletions(-) diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index 1943fd5f8..8880e5c95 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -124,8 +124,7 @@ class ArcPublishingIE(InfoExtractor): formats.extend(smil_formats) elif stream_type in ('ts', 'hls'): m3u8_formats = self._extract_m3u8_formats( - s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native', - m3u8_id='hls', fatal=False) + s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) if all([f.get('acodec') == 'none' for f in m3u8_formats]): continue for f in m3u8_formats: diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index e71462061..95589d53a 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -259,9 +259,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): continue if media_type == 'application/x-mpegURL': formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', - 'm3u8' if is_live else 'm3u8_native', - m3u8_id='hls', fatal=False)) + media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)) else: f = { 'url': media_url, diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py index bae74b290..230dc86d3 100644 --- a/yt_dlp/extractor/imggaming.py +++ b/yt_dlp/extractor/imggaming.py @@ -96,7 +96,7 @@ class ImgGamingBaseIE(InfoExtractor): continue if proto == 'hls': m3u8_formats = self._extract_m3u8_formats( - media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native', + media_url, media_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS) for f in m3u8_formats: f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS) diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py index 22b4fe7c8..00a51dccd 100644 --- a/yt_dlp/extractor/threeqsdn.py +++ b/yt_dlp/extractor/threeqsdn.py @@ -111,8 +111,7 @@ class ThreeQSDNIE(InfoExtractor): subtitles = self._merge_subtitles(subtitles, subs) elif source_type == 'hls': fmts, subs = self._extract_m3u8_formats_and_subtitles( - source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native', - m3u8_id='hls', fatal=False) + source, video_id, 'mp4', live=live, m3u8_id='hls', fatal=False) formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) elif source_type == 'progressive': diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index b48dfe389..977da30fe 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -81,9 +81,7 @@ class TV2IE(InfoExtractor): elif ext == 'm3u8': if not data.get('drmProtected'): formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', - 'm3u8' if is_live else 'm3u8_native', - m3u8_id=format_id, fatal=False)) + video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, format_id, fatal=False)) @@ -244,9 +242,7 @@ class KatsomoIE(InfoExtractor): elif ext == 'm3u8': if not data.get('drmProtected'): formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', - 'm3u8' if is_live else 'm3u8_native', - m3u8_id=format_id, fatal=False)) + video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, format_id, fatal=False)) diff --git a/yt_dlp/extractor/tvnet.py b/yt_dlp/extractor/tvnet.py index 4fe8dfb6c..aa1e9d923 100644 --- a/yt_dlp/extractor/tvnet.py +++ b/yt_dlp/extractor/tvnet.py @@ -111,9 +111,7 @@ class TVNetIE(InfoExtractor): continue stream_urls.add(stream_url) formats.extend(self._extract_m3u8_formats( - stream_url, video_id, 'mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', - m3u8_id='hls', fatal=False)) + stream_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)) self._sort_formats(formats) # better support for radio streams diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py index 10083cd24..9d6090b08 100644 --- a/yt_dlp/extractor/vgtv.py +++ b/yt_dlp/extractor/vgtv.py @@ -195,9 +195,7 @@ class VGTVIE(XstreamIE): hls_url = streams.get('hls') if hls_url: formats.extend(self._extract_m3u8_formats( - hls_url, video_id, 'mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', - m3u8_id='hls', fatal=False)) + hls_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)) hds_url = streams.get('hds') if hds_url: diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index c2dec244f..1a9fd00e4 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -166,8 +166,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): for f_id, m_url in sep_manifest_urls: if files_type == 'hls': fmts, subs = self._extract_m3u8_formats_and_subtitles( - m_url, video_id, 'mp4', - 'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id, + m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id, note='Downloading %s m3u8 information' % cdn_name, fatal=False) formats.extend(fmts) From c2ae48dbd5142d91122c7f0233a720a9dc4348b1 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 3 Mar 2022 01:20:31 +0530 Subject: [PATCH 0737/2552] [manyvids] Extract `uploader` (#2913) Authored by: regarten --- yt_dlp/extractor/manyvids.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index e8d7163e4..bd24f8853 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -89,4 +89,5 @@ class ManyVidsIE(InfoExtractor): 'view_count': view_count, 'like_count': like_count, 'formats': formats, + 'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'), } From f1d130902b4ae57f8223798a2472bfedea4203b8 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 3 Mar 2022 02:42:52 +0530 Subject: [PATCH 0738/2552] [utils] OnDemandPagedList: Do not download pages after error --- yt_dlp/utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index cc08bd130..1532210f3 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2799,13 +2799,14 @@ class PagedList: def __init__(self, pagefunc, pagesize, use_cache=True): self._pagefunc = pagefunc self._pagesize = pagesize + self._pagecount = float('inf') self._use_cache = use_cache self._cache = {} def getpage(self, pagenum): page_results = self._cache.get(pagenum) if page_results is None: - page_results = list(self._pagefunc(pagenum)) + page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum)) if self._use_cache: self._cache[pagenum] = page_results return page_results @@ -2817,7 +2818,7 @@ class PagedList: raise NotImplementedError('This method must be implemented by subclasses') def __getitem__(self, idx): - # NOTE: cache must be enabled if this is used + assert self._use_cache, 'Indexing PagedList requires cache' if not isinstance(idx, int) or idx < 0: raise TypeError('indices must be non-negative integers') entries = self.getslice(idx, idx + 1) @@ -2843,7 +2844,11 @@ class OnDemandPagedList(PagedList): if (end is not None and firstid <= end <= nextfirstid) else None) - page_results = self.getpage(pagenum) + try: + page_results = self.getpage(pagenum) + except Exception: + self._pagecount = pagenum - 1 + raise if startv != 0 or endv is not None: page_results = page_results[startv:endv] yield from page_results @@ -2863,8 +2868,8 @@ class OnDemandPagedList(PagedList): class InAdvancePagedList(PagedList): def __init__(self, pagefunc, pagecount, pagesize): - self._pagecount = pagecount PagedList.__init__(self, pagefunc, pagesize, True) + self._pagecount = pagecount def _getslice(self, start, end): start_page = start // self._pagesize From acea8d7cfb60d9f9c33ed7662f72110531a54e6d Mon Sep 17 00:00:00 2001 From: Justin Keogh <github.com@v6y.net> Date: Thu, 3 Mar 2022 13:09:00 +0000 Subject: [PATCH 0739/2552] [utils] Fix file locking for AOSP (#2714) Closes #2080, #2670 Authored by: jakeogh --- yt_dlp/utils.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 1532210f3..8e9a7dbc8 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2141,18 +2141,28 @@ if sys.platform == 'win32': raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) else: - # Some platforms, such as Jython, is missing fcntl try: import fcntl def _lock_file(f, exclusive, block): - fcntl.flock(f, - fcntl.LOCK_SH if not exclusive - else fcntl.LOCK_EX if block - else fcntl.LOCK_EX | fcntl.LOCK_NB) + try: + fcntl.flock(f, + fcntl.LOCK_SH if not exclusive + else fcntl.LOCK_EX if block + else fcntl.LOCK_EX | fcntl.LOCK_NB) + except BlockingIOError: + raise + except OSError: # AOSP does not have flock() + fcntl.lockf(f, + fcntl.LOCK_SH if not exclusive + else fcntl.LOCK_EX if block + else fcntl.LOCK_EX | fcntl.LOCK_NB) def _unlock_file(f): - fcntl.flock(f, fcntl.LOCK_UN) + try: + fcntl.flock(f, fcntl.LOCK_UN) + except OSError: + fcntl.lockf(f, fcntl.LOCK_UN) except ImportError: UNSUPPORTED_MSG = 'file locking is not supported on this platform' From 747c0bd127ebd205278d31ec7216ffa02fe96734 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 3 Mar 2022 18:57:38 +0530 Subject: [PATCH 0740/2552] [utils] Improve file locking * Implement non-blocking locks for windows * Don't raise error when closing a closed file --- yt_dlp/utils.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 8e9a7dbc8..5eb049ab7 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2122,22 +2122,22 @@ if sys.platform == 'win32': whole_low = 0xffffffff whole_high = 0x7fffffff - def _lock_file(f, exclusive, block): # todo: block unused on win32 + def _lock_file(f, exclusive, block): overlapped = OVERLAPPED() overlapped.Offset = 0 overlapped.OffsetHigh = 0 overlapped.hEvent = 0 f._lock_file_overlapped_p = ctypes.pointer(overlapped) - handle = msvcrt.get_osfhandle(f.fileno()) - if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0, - whole_low, whole_high, f._lock_file_overlapped_p): - raise OSError('Locking file failed: %r' % ctypes.FormatError()) + + if not LockFileEx(msvcrt.get_osfhandle(f.fileno()), + (0x2 if exclusive else 0x0) | (0x0 if block else 0x1), + 0, whole_low, whole_high, f._lock_file_overlapped_p): + raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError()) def _unlock_file(f): assert f._lock_file_overlapped_p handle = msvcrt.get_osfhandle(f.fileno()) - if not UnlockFileEx(handle, 0, - whole_low, whole_high, f._lock_file_overlapped_p): + if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p): raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) else: @@ -2175,6 +2175,8 @@ else: class locked_file(object): + _closed = False + def __init__(self, filename, mode, block=True, encoding=None): assert mode in ['r', 'rb', 'a', 'ab', 'w', 'wb'] self.f = io.open(filename, mode, encoding=encoding) @@ -2192,9 +2194,11 @@ class locked_file(object): def __exit__(self, etype, value, traceback): try: - _unlock_file(self.f) + if not self._closed: + _unlock_file(self.f) finally: self.f.close() + self._closed = True def __iter__(self): return iter(self.f) From 45806d44a7bc96dc0199410fec81cdce6015a546 Mon Sep 17 00:00:00 2001 From: Emanuel Hoogeveen <55082669+ehoogeveen-medweb@users.noreply.github.com> Date: Thu, 3 Mar 2022 15:33:32 +0100 Subject: [PATCH 0741/2552] [downloader] Obey `--file-access-retries` when deleting/renaming (#2224) Authored by: ehoogeveen-medweb --- yt_dlp/downloader/common.py | 47 ++++++++++++++++++++++------------- yt_dlp/downloader/external.py | 4 +-- yt_dlp/downloader/fragment.py | 4 +-- yt_dlp/options.py | 2 +- 4 files changed, 35 insertions(+), 22 deletions(-) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 37321e34b..3a949d38a 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -210,28 +210,41 @@ class FileDownloader(object): def ytdl_filename(self, filename): return filename + '.ytdl' + def wrap_file_access(action, *, fatal=False): + def outer(func): + def inner(self, *args, **kwargs): + file_access_retries = self.params.get('file_access_retries', 0) + retry = 0 + while True: + try: + return func(self, *args, **kwargs) + except (IOError, OSError) as err: + retry = retry + 1 + if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL): + if not fatal: + self.report_error(f'unable to {action} file: {err}') + return + raise + self.to_screen( + f'[download] Unable to {action} file due to file access error. ' + f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...') + time.sleep(0.01) + return inner + return outer + + @wrap_file_access('open', fatal=True) def sanitize_open(self, filename, open_mode): - file_access_retries = self.params.get('file_access_retries', 10) - retry = 0 - while True: - try: - return sanitize_open(filename, open_mode) - except (IOError, OSError) as err: - retry = retry + 1 - if retry > file_access_retries or err.errno not in (errno.EACCES,): - raise - self.to_screen( - '[download] Got file access error. Retrying (attempt %d of %s) ...' - % (retry, self.format_retries(file_access_retries))) - time.sleep(0.01) + return sanitize_open(filename, open_mode) + + @wrap_file_access('remove') + def try_remove(self, filename): + os.remove(filename) + @wrap_file_access('rename') def try_rename(self, old_filename, new_filename): if old_filename == new_filename: return - try: - os.replace(old_filename, new_filename) - except (IOError, OSError) as err: - self.report_error(f'unable to rename file: {err}') + os.replace(old_filename, new_filename) def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 03ae3a00e..be6202eef 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -159,9 +159,9 @@ class ExternalFD(FragmentFD): dest.write(decrypt_fragment(fragment, src.read())) src.close() if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(fragment_filename)) + self.try_remove(encodeFilename(fragment_filename)) dest.close() - os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) + self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) return 0 diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 83a9f81b6..95fb2f9e7 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -159,7 +159,7 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) + self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): @@ -305,7 +305,7 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) if os.path.isfile(ytdl_filename): - os.remove(ytdl_filename) + self.try_remove(ytdl_filename) elapsed = time.time() - ctx['started'] if ctx['tmpfilename'] == '-': diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 6fcef98cd..9908f3975 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -727,7 +727,7 @@ def create_parser(): help='Number of retries (default is %default), or "infinite"') downloader.add_option( '--file-access-retries', - dest='file_access_retries', metavar='RETRIES', default=10, + dest='file_access_retries', metavar='RETRIES', default=3, help='Number of times to retry on file access error (default is %default), or "infinite"') downloader.add_option( '--fragment-retries', From ded9f32667d7fc8db672b9360162bc2ec6b095f7 Mon Sep 17 00:00:00 2001 From: coletdev <coletdjnz@protonmail.com> Date: Fri, 4 Mar 2022 20:27:09 +1300 Subject: [PATCH 0742/2552] [extractor] Support `--mark-watched` without `_NETRC_MACHINE` (#2939) Authored by: coletdjnz --- yt_dlp/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d8bb21137..53b21ece6 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3678,7 +3678,7 @@ class InfoExtractor(object): def mark_watched(self, *args, **kwargs): if not self.get_param('mark_watched', False): return - if (self._get_login_info()[0] is not None + if (hasattr(self, '_NETRC_MACHINE') and self._get_login_info()[0] is not None or self.get_param('cookiefile') or self.get_param('cookiesfrombrowser')): self._mark_watched(*args, **kwargs) From 5bcccbfec3de4bc69d529016f784d04db7b11e04 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi <loiht.b17vt220@stu.ptit.edu.vn> Date: Fri, 4 Mar 2022 18:18:46 +0700 Subject: [PATCH 0743/2552] [telegram] Add extractor (#2922) Closes #2910 Authored by: hatienl0i261299 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/telegram.py | 37 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 yt_dlp/extractor/telegram.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0f26dc24f..c9e1a3f4e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1593,6 +1593,7 @@ from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE +from .telegram import TelegramEmbedIE from .telemb import TeleMBIE from .telemundo import TelemundoIE from .telequebec import ( diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py new file mode 100644 index 000000000..2dfa261e9 --- /dev/null +++ b/yt_dlp/extractor/telegram.py @@ -0,0 +1,37 @@ +from .common import InfoExtractor + + +class TelegramEmbedIE(InfoExtractor): + IE_NAME = 'telegram:embed' + _VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://t.me/europa_press/613', + 'info_dict': { + 'id': '613', + 'ext': 'mp4', + 'title': 'Europa Press', + 'description': '6ce2d7e8d56eda16d80607b23db7b252', + 'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + webpage_embed = self._download_webpage(f'{url}?embed=1', video_id) + + formats = [{ + 'url': self._proto_relative_url(self._search_regex( + '<video[^>]+src="([^"]+)"', webpage_embed, 'source')), + 'ext': 'mp4', + }] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True), + 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True), + 'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', + webpage_embed, 'thumbnail'), + 'formats': formats, + } From 761fba6d229d1dfa69325de1d6236ae40864b3ca Mon Sep 17 00:00:00 2001 From: Ha Tien Loi <loiht.b17vt220@stu.ptit.edu.vn> Date: Fri, 4 Mar 2022 18:19:57 +0700 Subject: [PATCH 0744/2552] [daystar] Add extractor (#2924) Closes #2887 Authored by; hatienl0i261299 --- yt_dlp/extractor/daystar.py | 48 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 49 insertions(+) create mode 100644 yt_dlp/extractor/daystar.py diff --git a/yt_dlp/extractor/daystar.py b/yt_dlp/extractor/daystar.py new file mode 100644 index 000000000..4f59d904f --- /dev/null +++ b/yt_dlp/extractor/daystar.py @@ -0,0 +1,48 @@ +from .common import InfoExtractor +from ..utils import js_to_json, urljoin + + +class DaystarClipIE(InfoExtractor): + IE_NAME = 'daystar:clip' + _VALID_URL = r'https?://player\.daystar\.tv/(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://player.daystar.tv/0MTO2ITM', + 'info_dict': { + 'id': '0MTO2ITM', + 'ext': 'mp4', + 'title': 'The Dark World of COVID Pt. 1 | Aaron Siri', + 'description': 'a420d320dda734e5f29458df3606c5f4', + 'thumbnail': r're:^https?://.+\.jpg', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + src_iframe = self._search_regex(r'\<iframe[^>]+src="([^"]+)"', webpage, 'src iframe') + webpage_iframe = self._download_webpage( + src_iframe.replace('player.php', 'config2.php'), video_id, headers={'Referer': src_iframe}) + + sources = self._parse_json(self._search_regex( + r'sources\:\s*(\[.*?\])', webpage_iframe, 'm3u8 source'), video_id, transform_source=js_to_json) + + formats, subtitles = [], {} + for source in sources: + file = source.get('file') + if file and source.get('type') == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + urljoin('https://www.lightcast.com/embed/', file), + video_id, 'mp4', fatal=False, headers={'Referer': src_iframe}) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), + 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), + 'thumbnail': self._search_regex(r'image:\s*"([^"]+)', webpage_iframe, 'thumbnail'), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index c9e1a3f4e..caab2d8ef 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -348,6 +348,7 @@ from .daum import ( DaumPlaylistIE, DaumUserIE, ) +from .daystar import DaystarClipIE from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import ( From ecca4519b7b684446f5da7ba86c9f1b5f0eedf5d Mon Sep 17 00:00:00 2001 From: Ha Tien Loi <loiht.b17vt220@stu.ptit.edu.vn> Date: Fri, 4 Mar 2022 18:22:45 +0700 Subject: [PATCH 0745/2552] [zingmp3] Fix extractor (#2889) Authored by: hatienl0i261299 --- yt_dlp/extractor/zingmp3.py | 135 ++++++++++++++++++++++++------------ 1 file changed, 89 insertions(+), 46 deletions(-) diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index a3edc158f..f84ba5cff 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -1,22 +1,47 @@ # coding: utf-8 from __future__ import unicode_literals +import hashlib +import hmac +import urllib.parse + from .common import InfoExtractor from ..utils import ( int_or_none, + traverse_obj, + HEADRequest, ) class ZingMp3BaseIE(InfoExtractor): - _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?:%s)/[^/]+/(?P<id>\w+)\.html' + _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/]+/(?P<id>\w+)(?:\.html|\?)' _GEO_COUNTRIES = ['VN'] + _DOMAIN = 'https://zingmp3.vn' + _SLUG_API = { + 'bai-hat': '/api/v2/page/get/song', + 'embed': '/api/v2/page/get/song', + 'video-clip': '/api/v2/page/get/video', + 'playlist': '/api/v2/page/get/playlist', + 'album': '/api/v2/page/get/playlist', + 'lyric': '/api/v2/lyric/get/lyric', + 'song_streaming': '/api/v2/song/get/streaming', + } + + _API_KEY = '88265e23d4284f25963e6eedac8fbfa3' + _SECRET_KEY = b'2aa2d1c561e809b267f3638c4a307aab' - def _extract_item(self, item, fatal): - item_id = item['id'] - title = item.get('name') or item['title'] + def _extract_item(self, item, song_id, type_url, fatal): + item_id = item.get('encodeId') or song_id + title = item.get('title') or item.get('alias') + + if type_url == 'video-clip': + source = item.get('streaming') + else: + api = self.get_api_with_signature(name_api=self._SLUG_API.get('song_streaming'), param={'id': item_id}) + source = self._download_json(api, video_id=item_id).get('data') formats = [] - for k, v in (item.get('source') or {}).items(): + for k, v in (source or {}).items(): if not v: continue if k in ('mp4', 'hls'): @@ -34,31 +59,35 @@ class ZingMp3BaseIE(InfoExtractor): 'height': int_or_none(self._search_regex( r'^(\d+)p', res, 'resolution', default=None)), }) - else: - formats.append({ - 'ext': 'mp3', - 'format_id': k, - 'tbr': int_or_none(k), - 'url': self._proto_relative_url(v), - 'vcodec': 'none', - }) + continue + elif v == 'VIP': + continue + formats.append({ + 'ext': 'mp3', + 'format_id': k, + 'tbr': int_or_none(k), + 'url': self._proto_relative_url(v), + 'vcodec': 'none', + }) if not formats: if not fatal: return - msg = item['msg'] + msg = item.get('msg') if msg == 'Sorry, this content is not available in your country.': self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) self.raise_no_formats(msg, expected=True) self._sort_formats(formats) - subtitles = None lyric = item.get('lyric') - if lyric: - subtitles = { - 'origin': [{ - 'url': lyric, - }], - } + if not lyric: + api = self.get_api_with_signature(name_api=self._SLUG_API.get("lyric"), param={'id': item_id}) + info_lyric = self._download_json(api, video_id=item_id) + lyric = traverse_obj(info_lyric, ('data', 'file')) + subtitles = { + 'origin': [{ + 'url': lyric, + }], + } if lyric else None album = item.get('album') or {} @@ -66,30 +95,41 @@ class ZingMp3BaseIE(InfoExtractor): 'id': item_id, 'title': title, 'formats': formats, - 'thumbnail': item.get('thumbnail'), + 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'), 'subtitles': subtitles, 'duration': int_or_none(item.get('duration')), 'track': title, - 'artist': item.get('artists_names'), - 'album': album.get('name') or album.get('title'), - 'album_artist': album.get('artists_names'), + 'artist': traverse_obj(item, 'artistsNames', 'artists_names'), + 'album': traverse_obj(album, 'name', 'title'), + 'album_artist': traverse_obj(album, 'artistsNames', 'artists_names'), } + def _real_initialize(self): + if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'): + self._request_webpage(HEADRequest(self._DOMAIN), None, note='Updating cookies') + def _real_extract(self, url): - page_id = self._match_id(url) - webpage = self._download_webpage( - url.replace('://zingmp3.vn/', '://mp3.zing.vn/'), - page_id, query={'play_song': 1}) - data_path = self._search_regex( - r'data-xml="([^"]+)', webpage, 'data path') - return self._process_data(self._download_json( - 'https://mp3.zing.vn/xhr' + data_path, page_id)['data']) + song_id, type_url = self._match_valid_url(url).group('id', 'type') + + api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id}) + + return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url) + + def get_api_with_signature(self, name_api, param): + sha256 = hashlib.sha256(''.join(f'{k}={v}' for k, v in param.items()).encode('utf-8')).hexdigest() + + data = { + 'apiKey': self._API_KEY, + 'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(), + **param, + } + return f'{self._DOMAIN}{name_api}?{urllib.parse.urlencode(data)}' class ZingMp3IE(ZingMp3BaseIE): - _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip' + _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed' _TESTS = [{ - 'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', + 'url': 'https://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'md5': 'ead7ae13693b3205cbc89536a077daed', 'info_dict': { 'id': 'ZWZB9WAB', @@ -108,7 +148,7 @@ class ZingMp3IE(ZingMp3BaseIE): 'album_artist': 'Bảo Thy', }, }, { - 'url': 'https://mp3.zing.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html', + 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html', 'md5': 'e9c972b693aa88301ef981c8151c4343', 'info_dict': { 'id': 'ZO8ZF7C7', @@ -119,15 +159,18 @@ class ZingMp3IE(ZingMp3BaseIE): 'track': 'Sương Hoa Đưa Lối', 'artist': 'K-ICM, RYO', }, + }, { + 'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false', + 'only_matching': True, }, { 'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'only_matching': True, }] IE_NAME = 'zingmp3' - IE_DESC = 'mp3.zing.vn' + IE_DESC = 'zingmp3.vn' - def _process_data(self, data): - return self._extract_item(data, True) + def _process_data(self, data, song_id, type_url): + return self._extract_item(data, song_id, type_url, True) class ZingMp3AlbumIE(ZingMp3BaseIE): @@ -139,7 +182,7 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): 'id': 'ZWZBWDAF', 'title': 'Lâu Đài Tình Ái', }, - 'playlist_count': 10, + 'playlist_count': 9, }, { 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 'only_matching': True, @@ -149,12 +192,12 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): }] IE_NAME = 'zingmp3:album' - def _process_data(self, data): + def _process_data(self, data, song_id, type_url): def entries(): - for item in (data.get('items') or []): - entry = self._extract_item(item, False) + for item in traverse_obj(data, ('song', 'items')) or []: + entry = self._extract_item(item, song_id, type_url, False) if entry: yield entry - info = data.get('info') or {} - return self.playlist_result( - entries(), info.get('id'), info.get('name') or info.get('title')) + + return self.playlist_result(entries(), traverse_obj(data, 'id', 'encodeId'), + traverse_obj(data, 'name', 'title')) From 97bef011ee3a26909b58db864c94104fb7b91b66 Mon Sep 17 00:00:00 2001 From: Jeff Huffman <tejing@tejing.com> Date: Fri, 4 Mar 2022 03:27:35 -0800 Subject: [PATCH 0746/2552] [crunchyroll] Better error reporting on login failure (#2938) Authored by: tejing1 --- yt_dlp/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index ffe291098..b6ba5ef56 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -85,7 +85,7 @@ class CrunchyrollBaseIE(InfoExtractor): 'session_id': session_id }).encode('ascii')) if login_response['code'] != 'ok': - raise ExtractorError('Login failed. Bad username or password?', expected=True) + raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True) if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): raise ExtractorError('Login succeeded but did not set etp_rt cookie') From b90dbe6c198f51921d137c3c5cd517d4222bca18 Mon Sep 17 00:00:00 2001 From: i6t <62123048+i6t@users.noreply.github.com> Date: Fri, 4 Mar 2022 22:53:43 +0900 Subject: [PATCH 0747/2552] [Gettr] Improve extractor (#2920) Authored by: i6t --- yt_dlp/extractor/gettr.py | 82 ++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index 9842edd81..327a4d0b8 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -8,8 +8,8 @@ from ..utils import ( dict_get, float_or_none, int_or_none, - remove_end, str_or_none, + traverse_obj, try_get, url_or_none, urljoin, @@ -36,8 +36,9 @@ class GettrIE(GettrBaseIE): 'ext': 'mp4', 'uploader': 'EpochTV', 'uploader_id': 'epochtv', + 'upload_date': '20210927', 'thumbnail': r're:^https?://.+/out\.jpg', - 'timestamp': 1632782451058, + 'timestamp': 1632782451.058, 'duration': 58.5585, 'tags': ['hornofafrica', 'explorations'], } @@ -50,43 +51,69 @@ class GettrIE(GettrBaseIE): 'ext': 'mp4', 'uploader': 'Neues Forum Freiheit', 'uploader_id': 'nf_freiheit', + 'upload_date': '20210718', 'thumbnail': r're:^https?://.+/out\.jpg', - 'timestamp': 1626594455017, + 'timestamp': 1626594455.017, 'duration': 23, 'tags': 'count:12', } + }, { + # quote post + 'url': 'https://gettr.com/post/pxn5b743a9', + 'only_matching': True, + }, { + # quote with video + 'url': 'https://gettr.com/post/pxtiiz5ca2', + 'only_matching': True, + }, { + # streaming embed + 'url': 'https://gettr.com/post/pxlu8p3b13', + 'only_matching': True, + }, { + # youtube embed + 'url': 'https://gettr.com/post/pv6wp9e24c', + 'only_matching': True, + 'add_ie': ['Youtube'], }] def _real_extract(self, url): post_id = self._match_id(url) webpage = self._download_webpage(url, post_id) - api_data = self._call_api('post/%s?incl="poststats|userinfo"' % post_id, post_id) post_data = api_data.get('data') - user_data = try_get(api_data, lambda x: x['aux']['uinf'][post_data['uid']]) or {} + user_data = try_get(api_data, lambda x: x['aux']['uinf'][post_data['uid']], dict) or {} - if post_data.get('nfound'): - raise ExtractorError(post_data.get('txt'), expected=True) + vid = post_data.get('vid') + ovid = post_data.get('ovid') + + if post_data.get('p_type') == 'stream': + return self.url_result(f'https://gettr.com/streaming/{post_id}', ie='GettrStreaming', video_id=post_id) + + if not (ovid or vid): + embed_url = url_or_none(post_data.get('prevsrc')) + shared_post_id = traverse_obj(api_data, ('aux', 'shrdpst', '_id'), ('data', 'rpstIds', 0), expected_type=str) + + if embed_url: + return self.url_result(embed_url) + elif shared_post_id: + return self.url_result(f'https://gettr.com/post/{shared_post_id}', ie='Gettr', video_id=shared_post_id) + else: + raise ExtractorError('There\'s no video in this post.') title = description = str_or_none( post_data.get('txt') or self._og_search_description(webpage)) uploader = str_or_none( user_data.get('nickname') - or remove_end(self._og_search_title(webpage), ' on GETTR')) + or self._search_regex(r'^(.+?) on GETTR', self._og_search_title(webpage, default=''), 'uploader', fatal=False)) + if uploader: title = '%s - %s' % (uploader, title) - if not dict_get(post_data, ['vid', 'ovid']): - raise ExtractorError('There\'s no video in this post.') - - vid = post_data.get('vid') - ovid = post_data.get('ovid') - - formats = self._extract_m3u8_formats( + formats, subtitles = self._extract_m3u8_formats_and_subtitles( urljoin(self._MEDIA_BASE_URL, vid), post_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if vid else [] + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if vid else ([], {}) if ovid: formats.append({ @@ -103,15 +130,16 @@ class GettrIE(GettrBaseIE): 'id': post_id, 'title': title, 'description': description, - 'thumbnail': url_or_none( - urljoin(self._MEDIA_BASE_URL, post_data.get('main')) - or self._og_search_thumbnail(webpage)), - 'timestamp': int_or_none(post_data.get('cdate')), + 'formats': formats, + 'subtitles': subtitles, + 'uploader': uploader, 'uploader_id': str_or_none( dict_get(user_data, ['_id', 'username']) or post_data.get('uid')), - 'uploader': uploader, - 'formats': formats, + 'thumbnail': url_or_none( + urljoin(self._MEDIA_BASE_URL, post_data.get('main')) + or self._html_search_meta(['og:image', 'image'], webpage, 'thumbnail', fatal=False)), + 'timestamp': float_or_none(dict_get(post_data, ['cdate', 'udate']), scale=1000), 'duration': float_or_none(post_data.get('vid_dur')), 'tags': post_data.get('htgs'), } @@ -165,19 +193,19 @@ class GettrStreamingIE(GettrBaseIE): thumbnails = [{ 'url': urljoin(self._MEDIA_BASE_URL, thumbnail), - } for thumbnail in try_get(video_info, lambda x: x['postData']['imgs']) or []] + } for thumbnail in try_get(video_info, lambda x: x['postData']['imgs'], list) or []] self._sort_formats(formats) return { 'id': video_id, - 'title': try_get(video_info, lambda x: x['postData']['ttl']), - 'description': try_get(video_info, lambda x: x['postData']['dsc']), + 'title': try_get(video_info, lambda x: x['postData']['ttl'], str), + 'description': try_get(video_info, lambda x: x['postData']['dsc'], str), 'formats': formats, 'subtitles': subtitles, 'thumbnails': thumbnails, - 'uploader': try_get(video_info, lambda x: x['liveHostInfo']['nickname']), - 'uploader_id': try_get(video_info, lambda x: x['liveHostInfo']['_id']), + 'uploader': try_get(video_info, lambda x: x['liveHostInfo']['nickname'], str), + 'uploader_id': try_get(video_info, lambda x: x['liveHostInfo']['_id'], str), 'view_count': int_or_none(live_info.get('viewsCount')), 'timestamp': float_or_none(live_info.get('startAt'), scale=1000), 'duration': float_or_none(live_info.get('duration'), scale=1000), From 3a4bb9f75134a8b9055612e50464f6e42f7386c8 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 19:18:21 +0530 Subject: [PATCH 0748/2552] [lbry] Fix `--ignore-no-formats-error` Closes #2942 --- yt_dlp/extractor/lbry.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index 1405ce0c7..5d5457c53 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -17,6 +17,7 @@ from ..utils import ( parse_qs, OnDemandPagedList, try_get, + UnsupportedError, urljoin, ) @@ -196,11 +197,11 @@ class LBRYIE(LBRYBaseIE): live_data = self._download_json( f'https://api.live.odysee.com/v1/odysee/live/{claim_id}', claim_id, note='Downloading livestream JSON metadata')['data'] - if not live_data['live']: - raise ExtractorError('This stream is not live', expected=True) - streaming_url = final_url = live_data['url'] + streaming_url = final_url = live_data.get('url') + if not final_url and not live_data.get('live'): + self.raise_no_formats('This stream is not live', True, claim_id) else: - raise ExtractorError('Unsupported URL', expected=True) + raise UnsupportedError(url) info = self._parse_stream(result, url) if determine_ext(final_url) == 'm3u8': From 7265a2190c46b7312669d613898055c2b080d26e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 19:37:43 +0530 Subject: [PATCH 0749/2552] Fix doubling of `video_id` in `ExtractorError` --- yt_dlp/extractor/common.py | 3 ++- yt_dlp/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 53b21ece6..6c1fe55f8 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -639,7 +639,7 @@ class InfoExtractor(object): } if hasattr(e, 'countries'): kwargs['countries'] = e.countries - raise type(e)(e.msg, **kwargs) + raise type(e)(e.orig_msg, **kwargs) except compat_http_client.IncompleteRead as e: raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url)) except (KeyError, StopIteration) as e: @@ -1101,6 +1101,7 @@ class InfoExtractor(object): if metadata_available and ( self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg) + return if method is not None: msg = '%s. %s' % (msg, self._LOGIN_HINTS[method]) raise ExtractorError(msg, expected=True) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 5eb049ab7..4134acfdc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1060,7 +1060,7 @@ class ExtractorError(YoutubeDLError): if sys.exc_info()[0] in network_exceptions: expected = True - self.msg = str(msg) + self.orig_msg = str(msg) self.traceback = tb self.expected = expected self.cause = cause @@ -1071,7 +1071,7 @@ class ExtractorError(YoutubeDLError): super(ExtractorError, self).__init__(''.join(( format_field(ie, template='[%s] '), format_field(video_id, template='%s: '), - self.msg, + msg, format_field(cause, template=' (caused by %r)'), '' if expected else bug_reports_message()))) From 4c3f8c3fb68637d80acc58f908b1511f9160bdbc Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 19:40:10 +0530 Subject: [PATCH 0750/2552] Handle negative duration from extractor Closes #2921 --- yt_dlp/YoutubeDL.py | 2 ++ yt_dlp/utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 10eebecf2..9672d0cd3 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2392,6 +2392,8 @@ class YoutubeDL(object): sanitize_string_field(info_dict, 'id') sanitize_numeric_fields(info_dict) + if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None): + self.report_warning('"duration" field is negative, there is an error in extractor') if 'playlist' not in info_dict: # It isn't part of a playlist diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4134acfdc..ef2c6bb24 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2257,7 +2257,7 @@ def unsmuggle_url(smug_url, default=None): def format_decimal_suffix(num, fmt='%d%s', *, factor=1000): """ Formats numbers with decimal sufixes like K, M, etc """ num, factor = float_or_none(num), float(factor) - if num is None: + if num is None or num < 0: return None exponent = 0 if num == 0 else int(math.log(num, factor)) suffix = ['', *'kMGTPEZY'][exponent] From 319b6059d2e4ae7bbcd6389667b99eba63ebd98c Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 19:40:42 +0530 Subject: [PATCH 0751/2552] Better error message when no --live-from-start format --- yt_dlp/YoutubeDL.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9672d0cd3..23e42f740 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -954,13 +954,13 @@ class YoutubeDL(object): except UnicodeEncodeError: self.to_screen('Deleting existing file') - def raise_no_formats(self, info, forced=False): + def raise_no_formats(self, info, forced=False, *, msg=None): has_drm = info.get('__has_drm') - msg = 'This video is DRM protected' if has_drm else 'No video formats found!' - expected = self.params.get('ignore_no_formats_error') - if forced or not expected: + ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) + msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' + if forced or not ignored: raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], - expected=has_drm or expected) + expected=has_drm or ignored or expected) else: self.report_warning(msg) @@ -2440,11 +2440,14 @@ class YoutubeDL(object): if not self.params.get('allow_unplayable_formats'): formats = [f for f in formats if not f.get('has_drm')] - if info_dict.get('is_live'): - get_from_start = bool(self.params.get('live_from_start')) + get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start')) + if not get_from_start: + info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + if info_dict.get('is_live') and formats: formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] - if not get_from_start: - info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + if get_from_start and not formats: + self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. ' + 'If you want to download from the current time, pass --no-live-from-start') if not formats: self.raise_no_formats(info_dict) From be8cd3cb1d013754950907904c52ae401c6e84fc Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 19:41:01 +0530 Subject: [PATCH 0752/2552] [twitch] Fix field name of `view_count` --- yt_dlp/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 54e500edd..bee26c3a3 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -1048,7 +1048,7 @@ class TwitchClipsIE(TwitchBaseIE): 'title': clip.get('title') or video_id, 'formats': formats, 'duration': int_or_none(clip.get('durationSeconds')), - 'views': int_or_none(clip.get('viewCount')), + 'view_count': int_or_none(clip.get('viewCount')), 'timestamp': unified_timestamp(clip.get('createdAt')), 'thumbnails': thumbnails, 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str), From a10aa588b061cd50024f6c0a02e66af640801d20 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 19:41:44 +0530 Subject: [PATCH 0753/2552] [FormatSort] Consider `acodec`=`ogg` as `vorbis` --- yt_dlp/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 6c1fe55f8..dbf5ef8d4 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1618,7 +1618,7 @@ class InfoExtractor(object): 'vcodec': {'type': 'ordered', 'regex': True, 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']}, 'acodec': {'type': 'ordered', 'regex': True, - 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']}, + 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']}, 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range', 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]}, 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol', From 933dbf5a558602278a79d55fe1aaf8f66ea54e6b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 19:42:11 +0530 Subject: [PATCH 0754/2552] [bandcamp] Detect acodec --- yt_dlp/extractor/bandcamp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 42223dab7..745055e2d 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -183,6 +183,7 @@ class BandcampIE(InfoExtractor): 'format_note': f.get('description'), 'filesize': parse_filesize(f.get('size_mb')), 'vcodec': 'none', + 'acodec': format_id.split('-')[0], }) self._sort_formats(formats) From 2a938746f3db75fc160aab055e889f1e31ccabed Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 19:45:05 +0530 Subject: [PATCH 0755/2552] Fix verbose log when stdout/stderr encoding is `None` See: 5c104538270e5fc5ff8cf1007c34c0bf1e82e003 --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 23e42f740..0aee3b122 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3588,7 +3588,7 @@ class YoutubeDL(object): return def get_encoding(stream): - ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) + ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) if not supports_terminal_sequences(stream): from .compat import WINDOWS_VT_MODE ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)' From e48b3875ec4426ab9437fd06b857266d6e15bb55 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 19:53:18 +0530 Subject: [PATCH 0756/2552] Revert 2e4cacd038b8543ccecfa9081dff6f656c979548 Closes #2923 --- yt_dlp/extractor/youtube.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 602d48e3c..4e812af99 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -240,13 +240,15 @@ def build_innertube_clients(): base_client, *variant = client.split('_') ytcfg['priority'] = 10 * priority(base_client) - if variant == ['embedded']: - ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY - INNERTUBE_CLIENTS[f'{base_client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) + if not variant: + INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' + agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY agegate_ytcfg['priority'] -= 1 + elif variant == ['embedded']: + ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY ytcfg['priority'] -= 2 - elif variant: + else: ytcfg['priority'] -= 3 From 8b7539d27c0a47d8d08e0522bdb66c571483377b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 29 Jan 2022 03:25:35 +0530 Subject: [PATCH 0757/2552] Implement `--add-header` without modifying `std_headers` Closes #2526, #1614 --- README.md | 5 ++--- yt_dlp/YoutubeDL.py | 8 ++++++-- yt_dlp/__init__.py | 15 ++++++--------- yt_dlp/extractor/instagram.py | 3 +-- yt_dlp/extractor/mildom.py | 3 +-- yt_dlp/extractor/openload.py | 3 +-- yt_dlp/extractor/rtve.py | 3 +-- yt_dlp/extractor/vimeo.py | 3 +-- yt_dlp/options.py | 7 +++---- yt_dlp/utils.py | 7 ++++++- 10 files changed, 28 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 1aab0ba94..ce5af129e 100644 --- a/README.md +++ b/README.md @@ -737,9 +737,6 @@ You can also fork the project on github and run your fork's [build workflow](.gi --prefer-insecure Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube) - --user-agent UA Specify a custom user agent - --referer URL Specify a custom referer, use if the video - access is restricted to one domain --add-header FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times @@ -1866,6 +1863,8 @@ While these options are redundant, they are still expected to be used due to the --reject-title REGEX --match-filter "title !~= (?i)REGEX" --min-views COUNT --match-filter "view_count >=? COUNT" --max-views COUNT --match-filter "view_count <=? COUNT" + --user-agent UA --add-header "User-Agent:UA" + --referer URL --add-header "Referer:URL" #### Not recommended diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 0aee3b122..49143cb16 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -83,6 +83,7 @@ from .utils import ( make_dir, make_HTTPS_handler, MaxDownloadsReached, + merge_headers, network_exceptions, number_of_digits, orderedSet, @@ -332,6 +333,7 @@ class YoutubeDL(object): nocheckcertificate: Do not verify SSL certificates prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. + http_headers: A dictionary of custom headers to be used for all requests proxy: URL of the proxy server to use geo_verification_proxy: URL of the proxy to use for IP address verification on geo-restricted sites. @@ -647,6 +649,9 @@ class YoutubeDL(object): else self.params['format'] if callable(self.params['format']) else self.build_format_selector(self.params['format'])) + # Set http_headers defaults according to std_headers + self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {})) + self._setup_opener() if auto_init: @@ -2250,8 +2255,7 @@ class YoutubeDL(object): return _build_selector_function(parsed_selector) def _calc_headers(self, info_dict): - res = std_headers.copy() - res.update(info_dict.get('http_headers') or {}) + res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {}) cookies = self._calc_cookies(info_dict) if cookies: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index c87c5b6df..926b5cad3 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -41,6 +41,7 @@ from .utils import ( SameFileError, setproctitle, std_headers, + traverse_obj, write_string, ) from .update import run_update @@ -75,20 +76,15 @@ def _real_main(argv=None): parser, opts, args = parseOpts(argv) warnings, deprecation_warnings = [], [] - # Set user agent if opts.user_agent is not None: - std_headers['User-Agent'] = opts.user_agent - - # Set referer + opts.headers.setdefault('User-Agent', opts.user_agent) if opts.referer is not None: - std_headers['Referer'] = opts.referer - - # Custom HTTP headers - std_headers.update(opts.headers) + opts.headers.setdefault('Referer', opts.referer) # Dump user agent if opts.dump_user_agent: - write_string(std_headers['User-Agent'] + '\n', out=sys.stdout) + ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) + write_string(f'{ua}\n', out=sys.stdout) sys.exit(0) # Batch file verification @@ -767,6 +763,7 @@ def _real_main(argv=None): 'legacyserverconnect': opts.legacy_server_connect, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, + 'http_headers': opts.headers, 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, 'bidi_workaround': opts.bidi_workaround, diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index a2cc9f748..3bb786d6a 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -17,7 +17,6 @@ from ..utils import ( get_element_by_attribute, int_or_none, lowercase_escape, - std_headers, str_or_none, str_to_int, traverse_obj, @@ -503,7 +502,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE): '%s' % rhx_gis, '', '%s:%s' % (rhx_gis, csrf_token), - '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']), + '%s:%s:%s' % (rhx_gis, csrf_token, self.get_param('http_headers')['User-Agent']), ] # try all of the ways to generate a GIS query, and not only use the diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index ddeaa7021..b5a2e17f2 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -8,7 +8,6 @@ import json from .common import InfoExtractor from ..utils import ( - std_headers, update_url_query, random_uuidv4, try_get, @@ -70,7 +69,7 @@ class MildomBaseIE(InfoExtractor): 'clu': '', 'wh': '1919*810', 'rtm': self.iso_timestamp(), - 'ua': std_headers['User-Agent'], + 'ua': self.get_param('http_headers')['User-Agent'], }).encode('utf8')).decode('utf8').replace('\n', ''), }).encode('utf8')) self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization') diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index 6ec54509b..36927009d 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -16,7 +16,6 @@ from ..utils import ( ExtractorError, get_exe_version, is_outdated_version, - std_headers, Popen, ) @@ -208,7 +207,7 @@ class PhantomJSwrapper(object): replaces = self.options replaces['url'] = url - user_agent = headers.get('User-Agent') or std_headers['User-Agent'] + user_agent = headers.get('User-Agent') or self.get_param('http_headers')['User-Agent'] replaces['ua'] = user_agent.replace('"', '\\"') replaces['jscode'] = jscode diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index af1bb943d..7a1dc6f32 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -17,7 +17,6 @@ from ..utils import ( qualities, remove_end, remove_start, - std_headers, try_get, ) @@ -71,7 +70,7 @@ class RTVEALaCartaIE(InfoExtractor): }] def _real_initialize(self): - user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8') + user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8') self._manager = self._download_json( 'http://www.rtve.es/odin/loki/' + user_agent_b64, None, 'Fetching manager info')['manager'] diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 1a9fd00e4..77ffb4bfb 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -28,7 +28,6 @@ from ..utils import ( parse_qs, sanitized_Request, smuggle_url, - std_headers, str_or_none, try_get, unified_timestamp, @@ -758,7 +757,7 @@ class VimeoIE(VimeoBaseInfoExtractor): def _real_extract(self, url): url, data = unsmuggle_url(url, {}) - headers = std_headers.copy() + headers = self.get_param('http_headers').copy() if 'http_headers' in data: headers.update(data['http_headers']) if 'Referer' not in headers: diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 9908f3975..17d8d5da6 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -860,17 +860,16 @@ def create_parser(): workarounds.add_option( '--user-agent', metavar='UA', dest='user_agent', - help='Specify a custom user agent') + help=optparse.SUPPRESS_HELP) workarounds.add_option( '--referer', metavar='URL', dest='referer', default=None, - help='Specify a custom referer, use if the video access is restricted to one domain', - ) + help=optparse.SUPPRESS_HELP) workarounds.add_option( '--add-header', metavar='FIELD:VALUE', dest='headers', default={}, type='str', action='callback', callback=_dict_from_options_callback, - callback_kwargs={'multiple_keys': False, 'process_key': None}, + callback_kwargs={'multiple_keys': False}, help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times', ) workarounds.add_option( diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ef2c6bb24..be0c69d8f 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1372,7 +1372,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): if url != url_escaped: req = update_Request(req, url=url_escaped) - for h, v in std_headers.items(): + for h, v in self._params.get('http_headers', std_headers).items(): # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 # The dict keys are capitalized because of this bug by urllib if h.capitalize() not in req.headers: @@ -5436,3 +5436,8 @@ class WebSocketsWrapper(): has_websockets = bool(compat_websockets) + + +def merge_headers(*dicts): + """Merge dicts of network headers case insensitively, prioritizing the latter ones""" + return {k.capitalize(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} From 72e995f1226a2fce3cd7f5fda50ebe024fe2a57e Mon Sep 17 00:00:00 2001 From: FestplattenSchnitzel <45077355+FestplattenSchnitzel@users.noreply.github.com> Date: Fri, 4 Mar 2022 17:19:07 +0100 Subject: [PATCH 0758/2552] [VideocampusSachsen] Add extractors (#2787) Authored by: FestplattenSchnitzel --- yt_dlp/extractor/extractors.py | 4 ++ yt_dlp/extractor/videocampus_sachsen.py | 96 +++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 yt_dlp/extractor/videocampus_sachsen.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index caab2d8ef..5ef1901e4 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1835,6 +1835,10 @@ from .vice import ( from .vidbit import VidbitIE from .viddler import ViddlerIE from .videa import VideaIE +from .videocampus_sachsen import ( + VideocampusSachsenIE, + VideocampusSachsenEmbedIE, +) from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomore import ( diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py new file mode 100644 index 000000000..96e98573f --- /dev/null +++ b/yt_dlp/extractor/videocampus_sachsen.py @@ -0,0 +1,96 @@ +# coding: utf-8 +from .common import InfoExtractor + + +class VideocampusSachsenIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://videocampus\.sachsen\.de/(?: + m/(?P<tmp_id>[0-9a-f]+)| + (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32}) + )''' + + _TESTS = [ + { + 'url': 'https://videocampus.sachsen.de/m/e0d6c8ce6e394c188f1342f1ab7c50ed6fc4490b808699801def5cb2e46d76ca7367f622a9f516c542ffb805b24d6b643bd7c81f385acaac4c59081b87a2767b', + 'info_dict': { + 'id': 'e6b9349905c1628631f175712250f2a1', + 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7', + 'ext': 'mp4', + }, + }, + { + 'url': 'https://videocampus.sachsen.de/video/Was-ist-selbstgesteuertes-Lernen/fc99c527e4205b121cb7c74433469262', + 'info_dict': { + 'id': 'fc99c527e4205b121cb7c74433469262', + 'title': 'Was ist selbstgesteuertes Lernen?', + 'display_id': 'Was-ist-selbstgesteuertes-Lernen', + 'ext': 'mp4', + }, + }, + { + 'url': 'https://videocampus.sachsen.de/category/video/Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht/09d4ed029002eb1bdda610f1103dd54c/100', + 'info_dict': { + 'id': '09d4ed029002eb1bdda610f1103dd54c', + 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht', + 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht', + 'ext': 'mp4', + }, + }, + ] + + def _real_extract(self, url): + video_id, tmp_id, display_id = self._match_valid_url(url).group('id', 'tmp_id', 'display_id') + webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or '' + + if not tmp_id: + video_id = self._html_search_regex( + r'src="https?://videocampus\.sachsen\.de/media/embed\?key=([0-9a-f]+)&', + webpage, 'video_id') + + title = self._html_search_regex( + (r'<h1>(?P<content>[^<]+)</h1>', *self._meta_regex('title')), + webpage, 'title', group='content', fatal=False) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', + video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles + } + + +class VideocampusSachsenEmbedIE(InfoExtractor): + _VALID_URL = r'https?://videocampus.sachsen.de/media/embed\?key=(?P<id>[0-9a-f]+)' + + _TESTS = [ + { + 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262', + 'info_dict': { + 'id': 'fc99c527e4205b121cb7c74433469262', + 'title': 'Was ist selbstgesteuertes Lernen?', + 'ext': 'mp4', + }, + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<img[^>]*title="([^"<]+)"', webpage, 'title', fatal=False) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', + video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'subtitles': subtitles, + } From 50e93e03a7ca6ae35a319ea310104f7d6d91eee3 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 4 Mar 2022 22:31:04 +0530 Subject: [PATCH 0759/2552] Update to ytdl-commit-6508688 Make default upload_/release_date a compat_str https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a Except: * "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531 - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9 - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798 - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446 --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/extractor/aliexpress.py | 2 +- yt_dlp/extractor/alsace20tv.py | 87 ++++++++++++++++++ yt_dlp/extractor/applepodcasts.py | 48 +++++++--- yt_dlp/extractor/arte.py | 42 +++++++++ yt_dlp/extractor/audiomack.py | 1 + yt_dlp/extractor/bbc.py | 57 +++++++++--- yt_dlp/extractor/bigo.py | 6 +- yt_dlp/extractor/cpac.py | 148 ++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 9 ++ yt_dlp/extractor/nuvid.py | 49 ++++++++-- yt_dlp/extractor/rutv.py | 5 +- yt_dlp/extractor/streamcz.py | 18 +++- yt_dlp/extractor/tele5.py | 87 +++++++----------- yt_dlp/extractor/tv2dk.py | 17 +++- yt_dlp/extractor/uol.py | 1 - yt_dlp/extractor/urplay.py | 53 ++++++++--- yt_dlp/extractor/videa.py | 9 +- yt_dlp/extractor/vimeo.py | 18 ++++ yt_dlp/extractor/wdr.py | 57 ++++++++---- yt_dlp/extractor/zdf.py | 45 +++++++-- 21 files changed, 621 insertions(+), 140 deletions(-) create mode 100644 yt_dlp/extractor/alsace20tv.py create mode 100644 yt_dlp/extractor/cpac.py diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 49143cb16..57201b6dc 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2397,7 +2397,7 @@ class YoutubeDL(object): sanitize_string_field(info_dict, 'id') sanitize_numeric_fields(info_dict) if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None): - self.report_warning('"duration" field is negative, there is an error in extractor') + self.report_warning('"duration" field is negative, there is an error in extractor') if 'playlist' not in info_dict: # It isn't part of a playlist diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py index 6f241e683..9722fe9ac 100644 --- a/yt_dlp/extractor/aliexpress.py +++ b/yt_dlp/extractor/aliexpress.py @@ -18,7 +18,7 @@ class AliExpressLiveIE(InfoExtractor): 'id': '2800002704436634', 'ext': 'mp4', 'title': 'CASIMA7.22', - 'thumbnail': r're:http://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', 'uploader': 'CASIMA Official Store', 'timestamp': 1500717600, 'upload_date': '20170722', diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py new file mode 100644 index 000000000..4aae6fe74 --- /dev/null +++ b/yt_dlp/extractor/alsace20tv.py @@ -0,0 +1,87 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + clean_html, + dict_get, + get_element_by_class, + int_or_none, + unified_strdate, + url_or_none, +) + + +class Alsace20TVBaseIE(InfoExtractor): + def _extract_video(self, video_id, url=None): + info = self._download_json( + 'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ), + video_id) or {} + title = info.get('titre') + + formats = [] + for res, fmt_url in (info.get('files') or {}).items(): + formats.extend( + self._extract_smil_formats(fmt_url, video_id, fatal=False) + if '/smil:_' in fmt_url + else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) + self._sort_formats(formats) + + webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' + thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) + upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) + upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': clean_html(get_element_by_class('wysiwyg', webpage)), + 'upload_date': upload_date, + 'thumbnail': thumbnail, + 'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None), + 'view_count': int_or_none(info.get('nb_vues')), + } + + +class Alsace20TVIE(Alsace20TVBaseIE): + _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)' + _TESTS = [{ + 'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html', + 'info_dict': { + 'id': 'lyNHCXpYJh', + 'ext': 'mp4', + 'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7', + 'title': 'Votre JT du jeudi 3 février', + 'upload_date': '20220203', + 'thumbnail': r're:https?://.+\.jpg', + 'duration': 1073, + 'view_count': int, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + return self._extract_video(video_id, url) + + +class Alsace20TVEmbedIE(Alsace20TVBaseIE): + _VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)' + _TESTS = [{ + 'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh', + # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', + 'info_dict': { + 'id': 'lyNHCXpYJh', + 'ext': 'mp4', + 'title': 'Votre JT du jeudi 3 février', + 'upload_date': '20220203', + 'thumbnail': r're:https?://.+\.jpg', + 'view_count': int, + }, + 'params': { + 'format': 'bestvideo', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + return self._extract_video(video_id) diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index 6a74de758..9139ff777 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -3,7 +3,9 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + clean_html, clean_podcast_url, + get_element_by_class, int_or_none, parse_iso8601, try_get, @@ -14,16 +16,17 @@ class ApplePodcastsIE(InfoExtractor): _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)' _TESTS = [{ 'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', - 'md5': 'df02e6acb11c10e844946a39e7222b08', + 'md5': '41dc31cd650143e530d9423b6b5a344f', 'info_dict': { 'id': '1000482637777', 'ext': 'mp3', 'title': '207 - Whitney Webb Returns', - 'description': 'md5:13a73bade02d2e43737751e3987e1399', + 'description': 'md5:75ef4316031df7b41ced4e7b987f79c6', 'upload_date': '20200705', - 'timestamp': 1593921600, - 'duration': 6425, + 'timestamp': 1593932400, + 'duration': 6454, 'series': 'The Tim Dillon Show', + 'thumbnail': 're:.+[.](png|jpe?g|webp)', } }, { 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', @@ -39,24 +42,47 @@ class ApplePodcastsIE(InfoExtractor): def _real_extract(self, url): episode_id = self._match_id(url) webpage = self._download_webpage(url, episode_id) - ember_data = self._parse_json(self._search_regex( - r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<', - webpage, 'ember data'), episode_id) - ember_data = ember_data.get(episode_id) or ember_data - episode = ember_data['data']['attributes'] + episode_data = {} + ember_data = {} + # new page type 2021-11 + amp_data = self._parse_json(self._search_regex( + r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<', + webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {} + amp_data = try_get(amp_data, + lambda a: self._parse_json( + next(a[x] for x in iter(a) if episode_id in x), + episode_id), + dict) or {} + amp_data = amp_data.get('d') or [] + episode_data = try_get( + amp_data, + lambda a: next(x for x in a + if x['type'] == 'podcast-episodes' and x['id'] == episode_id), + dict) + if not episode_data: + # try pre 2021-11 page type: TODO: consider deleting if no longer used + ember_data = self._parse_json(self._search_regex( + r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<', + webpage, 'ember data'), episode_id) or {} + ember_data = ember_data.get(episode_id) or ember_data + episode_data = try_get(ember_data, lambda x: x['data'], dict) + episode = episode_data['attributes'] description = episode.get('description') or {} series = None - for inc in (ember_data.get('included') or []): + for inc in (amp_data or ember_data.get('included') or []): if inc.get('type') == 'media/podcast': series = try_get(inc, lambda x: x['attributes']['name']) + series = series or clean_html(get_element_by_class('podcast-header__identity', webpage)) return { 'id': episode_id, - 'title': episode['name'], + 'title': episode.get('name'), 'url': clean_podcast_url(episode['assetUrl']), 'description': description.get('standard') or description.get('short'), 'timestamp': parse_iso8601(episode.get('releaseDateTime')), 'duration': int_or_none(episode.get('durationInMilliseconds'), 1000), 'series': series, + 'thumbnail': self._og_search_thumbnail(webpage), + 'vcodec': 'none', } diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 296b169d2..a7ffdc24c 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -12,6 +12,7 @@ from ..utils import ( int_or_none, parse_qs, qualities, + strip_or_none, try_get, unified_strdate, url_or_none, @@ -253,3 +254,44 @@ class ArteTVPlaylistIE(ArteTVBaseIE): title = collection.get('title') description = collection.get('shortDescription') or collection.get('teaserText') return self.playlist_result(entries, playlist_id, title, description) + + +class ArteTVCategoryIE(ArteTVBaseIE): + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES + _TESTS = [{ + 'url': 'https://www.arte.tv/en/videos/politics-and-society/', + 'info_dict': { + 'id': 'politics-and-society', + 'title': 'Politics and society', + 'description': 'Investigative documentary series, geopolitical analysis, and international commentary', + }, + 'playlist_mincount': 13, + }, + ] + + @classmethod + def suitable(cls, url): + return ( + not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, )) + and super(ArteTVCategoryIE, cls).suitable(url)) + + def _real_extract(self, url): + lang, playlist_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, playlist_id) + + items = [] + for video in re.finditer( + r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang, + webpage): + video = video.group('url') + if video == url: + continue + if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): + items.append(video) + + title = (self._og_search_title(webpage, default=None) + or self._html_search_regex(r'<title\b[^>]*>([^<]+)', default=None)) + title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url) + + return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title, + description=self._og_search_description(webpage, default=None)) diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index 31fb859ae..19775cf0f 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -29,6 +29,7 @@ class AudiomackIE(InfoExtractor): } }, # audiomack wrapper around soundcloud song + # Needs new test URL. { 'add_ie': ['Soundcloud'], 'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle', diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 199a3f8e2..b664a7007 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -11,6 +11,7 @@ from ..compat import ( compat_etree_Element, compat_HTTPError, compat_str, + compat_urllib_error, compat_urlparse, ) from ..utils import ( @@ -38,7 +39,7 @@ from ..utils import ( class BBCCoUkIE(InfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' - _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})' + _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})' _VALID_URL = r'''(?x) https?:// (?:www\.)?bbc\.co\.uk/ @@ -394,9 +395,17 @@ class BBCCoUkIE(InfoExtractor): formats.extend(self._extract_mpd_formats( href, programme_id, mpd_id=format_id, fatal=False)) elif transfer_format == 'hls': - formats.extend(self._extract_m3u8_formats( - href, programme_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=format_id, fatal=False)) + # TODO: let expected_status be passed into _extract_xxx_formats() instead + try: + fmts = self._extract_m3u8_formats( + href, programme_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id=format_id, fatal=False) + except ExtractorError as e: + if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError) + and e.exc_info[1].code in (403, 404)): + raise + fmts = [] + formats.extend(fmts) elif transfer_format == 'hds': formats.extend(self._extract_f4m_formats( href, programme_id, f4m_id=format_id, fatal=False)) @@ -784,21 +793,33 @@ class BBCIE(BBCCoUkIE): 'timestamp': 1437785037, 'upload_date': '20150725', }, + }, { + # video with window.__INITIAL_DATA__ and value as JSON string + 'url': 'https://www.bbc.com/news/av/world-europe-59468682', + 'info_dict': { + 'id': 'p0b71qth', + 'ext': 'mp4', + 'title': 'Why France is making this woman a national hero', + 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4', + 'thumbnail': r're:https?://.+/.+\.jpg', + 'timestamp': 1638230731, + 'upload_date': '20211130', + }, }, { # single video article embedded with data-media-vpid 'url': 'http://www.bbc.co.uk/sport/rowing/35908187', 'only_matching': True, }, { + # bbcthreeConfig 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1', 'info_dict': { 'id': 'p06556y7', 'ext': 'mp4', - 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?', - 'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd', + 'title': 'Things Not To Say to people that live on council estates', + 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.", + 'duration': 360, + 'thumbnail': r're:https?://.+/.+\.jpg', }, - 'params': { - 'skip_download': True, - } }, { # window.__PRELOADED_STATE__ 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl', @@ -1171,9 +1192,16 @@ class BBCIE(BBCCoUkIE): return self.playlist_result( entries, playlist_id, playlist_title, playlist_description) - initial_data = self._parse_json(self._parse_json(self._search_regex( - r'window\.__INITIAL_DATA__\s*=\s*("{.+?}");', webpage, - 'preload state', default='"{}"'), playlist_id, fatal=False), playlist_id, fatal=False) + initial_data = self._search_regex( + r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage, + 'quoted preload state', default=None) + if initial_data is None: + initial_data = self._search_regex( + r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage, + 'preload state', default={}) + else: + initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False) + initial_data = self._parse_json(initial_data, playlist_id, fatal=False) if initial_data: def parse_media(media): if not media: @@ -1214,7 +1242,10 @@ class BBCIE(BBCCoUkIE): if name == 'media-experience': parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict)) elif name == 'article': - for block in (try_get(resp, lambda x: x['data']['content']['model']['blocks'], list) or []): + for block in (try_get(resp, + (lambda x: x['data']['blocks'], + lambda x: x['data']['content']['model']['blocks'],), + list) or []): if block.get('type') != 'media': continue parse_media(block.get('model')) diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index 6e38ecc1d..ddf76ac55 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -34,9 +34,11 @@ class BigoIE(InfoExtractor): 'https://bigo.tv/studio/getInternalStudioInfo', user_id, data=urlencode_postdata({'siteId': user_id})) + if not isinstance(info_raw, dict): + raise ExtractorError('Received invalid JSON data') if info_raw.get('code'): raise ExtractorError( - f'{info_raw["msg"]} (code {info_raw["code"]})', expected=True) + 'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True) info = info_raw.get('data') or {} if not info.get('alive'): @@ -44,7 +46,7 @@ class BigoIE(InfoExtractor): return { 'id': info.get('roomId') or user_id, - 'title': info.get('roomTopic'), + 'title': info.get('roomTopic') or info.get('nick_name') or user_id, 'formats': [{ 'url': info.get('hls_src'), 'ext': 'mp4', diff --git a/yt_dlp/extractor/cpac.py b/yt_dlp/extractor/cpac.py new file mode 100644 index 000000000..22741152c --- /dev/null +++ b/yt_dlp/extractor/cpac.py @@ -0,0 +1,148 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + str_or_none, + try_get, + unified_timestamp, + update_url_query, + urljoin, +) + +# compat_range +try: + if callable(xrange): + range = xrange +except (NameError, TypeError): + pass + + +class CPACIE(InfoExtractor): + IE_NAME = 'cpac' + _VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?Pl-)?episode\?id=(?P[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})' + _TEST = { + # 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909', + 'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f', + 'md5': 'e46ad699caafd7aa6024279f2614e8fa', + 'info_dict': { + 'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f', + 'ext': 'mp4', + 'upload_date': '20220215', + 'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022', + 'description': 'md5:466a206abd21f3a6f776cdef290c23fb', + 'timestamp': 1644901200, + }, + 'params': { + 'format': 'bestvideo', + 'hls_prefer_native': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + url_lang = 'fr' if '/l-episode?' in url else 'en' + + content = self._download_json( + 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id, + video_id) + video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str) + formats = [] + if video_url: + content = content['page'] + title = str_or_none(content['details']['title_%s_t' % (url_lang, )]) + formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4') + for fmt in formats: + # prefer language to match URL + fmt_lang = fmt.get('language') + if fmt_lang == url_lang: + fmt['language_preference'] = 10 + elif not fmt_lang: + fmt['language_preference'] = -1 + else: + fmt['language_preference'] = -10 + + self._sort_formats(formats) + + category = str_or_none(content['details']['category_%s_t' % (url_lang, )]) + + def is_live(v_type): + return (v_type == 'live') if v_type is not None else None + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))), + 'timestamp': unified_timestamp(content['details'].get('liveDateTime')), + 'category': [category] if category else None, + 'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))), + 'is_live': is_live(content['details'].get('type')), + } + + +class CPACPlaylistIE(InfoExtractor): + IE_NAME = 'cpac:playlist' + _VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?Pemission|rechercher))\?(?:[^&]+&)*?(?P(?:id=\d+|programId=\d+|key=[^&]+))' + + _TESTS = [{ + 'url': 'https://www.cpac.ca/program?id=6', + 'info_dict': { + 'id': 'id=6', + 'title': 'Headline Politics', + 'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.', + }, + 'playlist_count': 10, + }, { + 'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc', + 'info_dict': { + 'id': 'key=hudson', + 'title': 'hudson', + }, + 'playlist_count': 22, + }, { + 'url': 'https://www.cpac.ca/search?programId=50', + 'info_dict': { + 'id': 'programId=50', + 'title': '50', + }, + 'playlist_count': 9, + }, { + 'url': 'https://www.cpac.ca/emission?id=6', + 'only_matching': True, + }, { + 'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en' + pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult') + api_url = ( + 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s' + % (pl_type, video_id, )) + content = self._download_json(api_url, video_id) + entries = [] + total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1) + for page in range(1, total_pages + 1): + if page > 1: + api_url = update_url_query(api_url, {'page': '%d' % (page, ), }) + content = self._download_json( + api_url, video_id, + note='Downloading continuation - %d' % (page, ), + fatal=False) + + for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []: + episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )])) + if episode_url: + entries.append(episode_url) + + return self.playlist_result( + (self.url_result(entry) for entry in entries), + playlist_id=video_id, + playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1], + playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]), + ) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 5ef1901e4..9e84655d6 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -68,6 +68,10 @@ from .anvato import AnvatoIE from .aol import AolIE from .allocine import AllocineIE from .aliexpress import AliExpressLiveIE +from .alsace20tv import ( + Alsace20TVIE, + Alsace20TVEmbedIE, +) from .apa import APAIE from .aparat import AparatIE from .appleconnect import AppleConnectIE @@ -91,6 +95,7 @@ from .arte import ( ArteTVIE, ArteTVEmbedIE, ArteTVPlaylistIE, + ArteTVCategoryIE, ) from .arnes import ArnesIE from .asiancrush import ( @@ -306,6 +311,10 @@ from .commonprotocols import ( from .condenast import CondeNastIE from .contv import CONtvIE from .corus import CorusIE +from .cpac import ( + CPACIE, + CPACPlaylistIE, +) from .cozytv import CozyTVIE from .cracked import CrackedIE from .crackle import CrackleIE diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index 7487824f9..84fb97d6a 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -1,11 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor from ..utils import ( parse_duration, int_or_none, - try_get, + strip_or_none, + traverse_obj, + url_or_none, ) @@ -20,14 +23,30 @@ class NuvidIE(InfoExtractor): 'title': 'italian babe', 'duration': 321.0, 'age_limit': 18, + 'thumbnail': r're:https?://.+\.jpg', } }, { 'url': 'https://m.nuvid.com/video/6523263', + 'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52', 'info_dict': { 'id': '6523263', 'ext': 'mp4', - 'age_limit': 18, 'title': 'Slut brunette college student anal dorm', + 'duration': 421.0, + 'age_limit': 18, + 'thumbnail': r're:https?://.+\.jpg', + 'thumbnails': list, + } + }, { + 'url': 'http://m.nuvid.com/video/6415801/', + 'md5': '638d5ececb138d5753593f751ae3f697', + 'info_dict': { + 'id': '6415801', + 'ext': 'mp4', + 'title': 'My best friend wanted to fuck my wife for a long time', + 'duration': 1882, + 'age_limit': 18, + 'thumbnail': r're:https?://.+\.jpg', } }] @@ -46,6 +65,16 @@ class NuvidIE(InfoExtractor): 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', }) + webpage = self._download_webpage( + 'http://m.nuvid.com/video/%s' % (video_id, ), + video_id, 'Downloading video page', fatal=False) or '' + + title = strip_or_none(video_data.get('title') or self._html_search_regex( + (r''']*?\btitle\s*=\s*(?P"|'|\b)(?P[^"]+)(?P=q)\s*>''', + r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''', + r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''), + webpage, 'title', group='title')) + formats = [{ 'url': source, 'format_id': qualities.get(quality), @@ -55,19 +84,19 @@ class NuvidIE(InfoExtractor): self._check_formats(formats, video_id) self._sort_formats(formats) - title = video_data.get('title') - thumbnail_base_url = try_get(video_data, lambda x: x['thumbs']['url']) - thumbnail_extension = try_get(video_data, lambda x: x['thumbs']['extension']) - thumbnail_id = self._search_regex( - r'/media/videos/tmb/6523263/preview/(/d+)' + thumbnail_extension, video_data.get('poster', ''), 'thumbnail id', default=19) - thumbnail = f'{thumbnail_base_url}player/{thumbnail_id}{thumbnail_extension}' - duration = parse_duration(video_data.get('duration') or video_data.get('duration_format')) + duration = parse_duration(traverse_obj(video_data, 'duration', 'duration_format')) + thumbnails = [ + {'url': thumb_url} for thumb_url in re.findall( + r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', webpage) + if url_or_none(thumb_url)] + if url_or_none(video_data.get('poster')): + thumbnails.append({'url': video_data['poster'], 'preference': 1}) return { 'id': video_id, 'formats': formats, 'title': title, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'duration': duration, 'age_limit': 18, } diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index 3de86b232..66ac32deb 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -6,7 +6,8 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, - int_or_none + int_or_none, + str_to_int ) @@ -179,7 +180,7 @@ class RUTVIE(InfoExtractor): 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22', 'rtmp_live': True, 'ext': 'flv', - 'vbr': int(quality), + 'vbr': str_to_int(quality), 'quality': preference, } elif transport == 'm3u8': diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py index 0191c77de..4cb9923e2 100644 --- a/yt_dlp/extractor/streamcz.py +++ b/yt_dlp/extractor/streamcz.py @@ -22,6 +22,20 @@ class StreamCZIE(InfoExtractor): 'title': 'Bůh', 'display_id': 'buh', 'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165', + 'duration': 1369.6, + 'view_count': int, + } + }, { + 'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937', + 'md5': '41fd358000086a1ccdb068c77809b158', + 'info_dict': { + 'id': '64087937', + 'ext': 'mp4', + 'title': 'Kdo to mluví? Velké odhalení přináší nový pořad už od 25. srpna', + 'display_id': 'kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna', + 'description': 'md5:97a811000a6460266029d6c1c2ebcd59', + 'duration': 50.2, + 'view_count': int, } }, { 'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267', @@ -31,7 +45,9 @@ class StreamCZIE(InfoExtractor): 'ext': 'mp4', 'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili', 'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili', - 'description': 'md5:1dcb5e010eb697dedc5942f76c5b3744', + 'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf', + 'duration': 442.84, + 'view_count': int, } }] diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index 0d9cf75ca..c7beee153 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -1,19 +1,15 @@ # coding: utf-8 from __future__ import unicode_literals -import re - -from .common import InfoExtractor -from .jwplatform import JWPlatformIE -from .nexx import NexxIE +from .dplay import DPlayIE +from ..compat import compat_urlparse from ..utils import ( - NO_DEFAULT, - parse_qs, - smuggle_url, + ExtractorError, + extract_attributes, ) -class Tele5IE(InfoExtractor): +class Tele5IE(DPlayIE): _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)' _GEO_COUNTRIES = ['DE'] _TESTS = [{ @@ -28,6 +24,7 @@ class Tele5IE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'No longer available: "404 Seite nicht gefunden"', }, { # jwplatform, nexx unavailable 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/', @@ -42,7 +39,20 @@ class Tele5IE(InfoExtractor): 'params': { 'skip_download': True, }, - 'add_ie': [JWPlatformIE.ie_key()], + 'skip': 'No longer available, redirects to Filme page', + }, { + 'url': 'https://tele5.de/mediathek/angel-of-mine/', + 'info_dict': { + 'id': '1252360', + 'ext': 'mp4', + 'upload_date': '20220109', + 'timestamp': 1641762000, + 'title': 'Angel of Mine', + 'description': 'md5:a72546a175e1286eb3251843a52d1ad7', + }, + 'params': { + 'format': 'bestvideo', + }, }, { 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', 'only_matching': True, @@ -64,45 +74,18 @@ class Tele5IE(InfoExtractor): }] def _real_extract(self, url): - qs = parse_qs(url) - video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0] - - NEXX_ID_RE = r'\d{6,}' - JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}' - - def nexx_result(nexx_id): - return self.url_result( - 'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id, - ie=NexxIE.ie_key(), video_id=nexx_id) - - nexx_id = jwplatform_id = None - - if video_id: - if re.match(NEXX_ID_RE, video_id): - return nexx_result(video_id) - elif re.match(JWPLATFORM_ID_RE, video_id): - jwplatform_id = video_id - - if not nexx_id: - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - def extract_id(pattern, name, default=NO_DEFAULT): - return self._html_search_regex( - (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern, - r'\s+id\s*=\s*["\']player_(%s)' % pattern, - r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name, - default=default) - - nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None) - if nexx_id: - return nexx_result(nexx_id) - - if not jwplatform_id: - jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id') - - return self.url_result( - smuggle_url( - 'jwplatform:%s' % jwplatform_id, - {'geo_countries': self._GEO_COUNTRIES}), - ie=JWPlatformIE.ie_key(), video_id=jwplatform_id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player') + player_info = extract_attributes(player_element) + asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', )) + endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname + source_type = player_info.get('sourcetype') + if source_type: + endpoint = '%s-%s' % (source_type, endpoint) + try: + return self._get_disco_api_info(url, asset_id, endpoint, realm, country) + except ExtractorError as e: + if getattr(e, 'message', '') == 'Missing deviceId in context': + self.report_drm(video_id) + raise diff --git a/yt_dlp/extractor/tv2dk.py b/yt_dlp/extractor/tv2dk.py index 8bd5fd640..ec5cbdf03 100644 --- a/yt_dlp/extractor/tv2dk.py +++ b/yt_dlp/extractor/tv2dk.py @@ -41,8 +41,16 @@ class TV2DKIE(InfoExtractor): 'duration': 1347, 'view_count': int, }, - 'params': { - 'skip_download': True, + 'add_ie': ['Kaltura'], + }, { + 'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn', + 'info_dict': { + 'id': '1_7iwll9n0', + 'ext': 'mp4', + 'upload_date': '20211027', + 'title': 'Gadekamp #6 - Højhuse i København', + 'uploader_id': 'tv2lorry', + 'timestamp': 1635345229, }, 'add_ie': ['Kaltura'], }, { @@ -91,11 +99,14 @@ class TV2DKIE(InfoExtractor): add_entry(partner_id, kaltura_id) if not entries: kaltura_id = self._search_regex( - r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id') + (r'entry_id\s*:\s*["\']([0-9a-z_]+)', + r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id') partner_id = self._search_regex( (r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage, 'partner id') add_entry(partner_id, kaltura_id) + if len(entries) == 1: + return entries[0] return self.playlist_result(entries) diff --git a/yt_dlp/extractor/uol.py b/yt_dlp/extractor/uol.py index 4a2a97fa4..1baee0b10 100644 --- a/yt_dlp/extractor/uol.py +++ b/yt_dlp/extractor/uol.py @@ -95,7 +95,6 @@ class UOLIE(InfoExtractor): if v: query[k] = v f_url = update_url_query(f_url, query) - format_id = format_id if format_id == 'HLS': m3u8_formats = self._extract_m3u8_formats( f_url, media_id, 'mp4', 'm3u8_native', diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index 753ffa49c..eb2ab26e1 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -4,7 +4,11 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( dict_get, + ExtractorError, int_or_none, + ISO639Utils, + parse_age_limit, + try_get, unified_timestamp, ) @@ -23,9 +27,10 @@ class URPlayIE(InfoExtractor): 'upload_date': '20171214', 'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik', 'duration': 2269, - 'categories': ['Kultur & historia'], + 'categories': ['Vetenskap & teknik'], 'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'], 'episode': 'Om vetenskap, kritiskt tänkande och motstånd', + 'age_limit': 15, }, }, { 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', @@ -50,11 +55,16 @@ class URPlayIE(InfoExtractor): video_id = self._match_id(url) url = url.replace('skola.se/Produkter', 'play.se/program') webpage = self._download_webpage(url, video_id) - vid = int(video_id) - accessible_episodes = self._parse_json(self._html_search_regex( - r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"', - webpage, 'urplayer data'), video_id)['accessibleEpisodes'] - urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid) + urplayer_data = self._search_nextjs_data(webpage, video_id, fatal=False) or {} + if urplayer_data: + urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict) + if not urplayer_data: + raise ExtractorError('Unable to parse __NEXT_DATA__') + else: + accessible_episodes = self._parse_json(self._html_search_regex( + r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"', + webpage, 'urplayer data'), video_id)['accessibleEpisodes'] + urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id)) episode = urplayer_data['title'] host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] @@ -72,11 +82,28 @@ class URPlayIE(InfoExtractor): self._sort_formats(formats) subtitles = {} - subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location") - if subs: - subtitles.setdefault('Svenska', []).append({ - 'url': subs, - }) + + def parse_lang_code(code): + "3-character language code or None (utils candidate)" + if code is None: + return + lang = code.lower() + if not ISO639Utils.long2short(lang): + lang = ISO639Utils.short2long(lang) + return lang or None + + for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items(): + if (k in ('sd', 'hd') or not isinstance(v, dict)): + continue + lang, sttl_url = (v.get(kk) for kk in ('language', 'location', )) + if not sttl_url: + continue + lang = parse_lang_code(lang) + if not lang: + continue + sttl = subtitles.get(lang) or [] + sttl.append({'ext': k, 'url': sttl_url, }) + subtitles[lang] = sttl image = urplayer_data.get('image') or {} thumbnails = [] @@ -98,7 +125,6 @@ class URPlayIE(InfoExtractor): return { 'id': video_id, - 'subtitles': subtitles, 'title': '%s : %s' % (series_title, episode) if series_title else episode, 'description': urplayer_data.get('description'), 'thumbnails': thumbnails, @@ -111,4 +137,7 @@ class URPlayIE(InfoExtractor): 'season': series.get('label'), 'episode': episode, 'episode_number': int_or_none(urplayer_data.get('episodeNumber')), + 'age_limit': parse_age_limit(min(try_get(a, lambda x: x['from'], int) or 0 + for a in urplayer_data.get('ageRanges', []))), + 'subtitles': subtitles, } diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index 512ade7af..90d705092 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -111,7 +111,6 @@ class VideaIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - video_page = self._download_webpage(url, video_id) if 'videa.hu/player' in url: @@ -146,7 +145,7 @@ class VideaIE(InfoExtractor): compat_b64decode(b64_info), key), video_id) video = xpath_element(info, './video', 'video') - if not video: + if video is None: raise ExtractorError(xpath_element( info, './error', fatal=True), expected=True) sources = xpath_element( @@ -163,9 +162,9 @@ class VideaIE(InfoExtractor): source_exp = source.get('exp') if not (source_url and source_name): continue - hash_value = None - if hash_values: - hash_value = xpath_text(hash_values, 'hash_value_' + source_name) + hash_value = ( + xpath_text(hash_values, 'hash_value_' + source_name) + if hash_values is not None else None) if hash_value and source_exp: source_url = update_url_query(source_url, { 'md5': hash_value, diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 77ffb4bfb..458a751fe 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -636,6 +636,24 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/392479337/a52724358e', 'only_matching': True, }, + { + # similar, but all numeric: ID must be 581039021, not 9603038895 + # issue #29690 + 'url': 'https://vimeo.com/581039021/9603038895', + 'info_dict': { + 'id': '581039021', + # these have to be provided but we don't care + 'ext': 'mp4', + 'timestamp': 1627621014, + 'title': 're:.+', + 'uploader_id': 're:.+', + 'uploader': 're:.+', + 'upload_date': r're:\d+', + }, + 'params': { + 'skip_download': True, + }, + } # https://gettingthingsdone.com/workflowmap/ # vimeo embed with check-password page protected by Referer header ] diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index 45bfe5f3a..ef58a66c3 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -10,6 +10,7 @@ from ..compat import ( ) from ..utils import ( determine_ext, + dict_get, ExtractorError, js_to_json, strip_jsonp, @@ -22,13 +23,14 @@ from ..utils import ( class WDRIE(InfoExtractor): + __API_URL_TPL = '//deviceids-medp.wdr.de/ondemand/%s/%s' _VALID_URL = r'''(?x)https?:// (?:deviceids-medp\.wdr\.de/ondemand/\d+/| kinder\.wdr\.de/(?!mediathek/)[^#?]+-) (?P<id>\d+)\.(?:js|assetjsonp) ''' _GEO_COUNTRIES = ['DE'] - _TEST = { + _TESTS = [{ 'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js', 'info_dict': { 'id': 'mdb-1557833', @@ -36,11 +38,19 @@ class WDRIE(InfoExtractor): 'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe', 'upload_date': '20180112', }, - } + }] + + def _asset_url(self, wdr_id): + id_len = max(len(wdr_id), 5) + return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id, ), '.js')) def _real_extract(self, url): video_id = self._match_id(url) + if url.startswith('wdr:'): + video_id = url[4:] + url = self._asset_url(video_id) + metadata = self._download_json( url, video_id, transform_source=strip_jsonp) @@ -126,10 +136,10 @@ class WDRIE(InfoExtractor): } -class WDRPageIE(InfoExtractor): - _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' +class WDRPageIE(WDRIE): + _MAUS_REGEX = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/)*?(?P<maus_id>[^/?#.]+)(?:/?|/index\.php5|\.php5)$' _PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html' - _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL + _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _MAUS_REGEX _TESTS = [ { @@ -170,11 +180,11 @@ class WDRPageIE(InfoExtractor): { 'url': 'http://www1.wdr.de/mediathek/video/live/index.html', 'info_dict': { - 'id': 'mdb-1406149', + 'id': 'mdb-2296252', 'ext': 'mp4', - 'title': r're:^WDR Fernsehen im Livestream \(nur in Deutschland erreichbar\) [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'title': r're:^WDR Fernsehen im Livestream (?:\(nur in Deutschland erreichbar\) )?[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'alt_title': 'WDR Fernsehen Live', - 'upload_date': '20150101', + 'upload_date': '20201112', 'is_live': True, }, 'params': { @@ -183,7 +193,7 @@ class WDRPageIE(InfoExtractor): }, { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html', - 'playlist_mincount': 7, + 'playlist_mincount': 6, 'info_dict': { 'id': 'aktuelle-stunde-120', }, @@ -191,10 +201,10 @@ class WDRPageIE(InfoExtractor): { 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', 'info_dict': { - 'id': 'mdb-1552552', + 'id': 'mdb-2627637', 'ext': 'mp4', 'upload_date': 're:^[0-9]{8}$', - 'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$', + 'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$', }, 'skip': 'The id changes from week to week because of the new episode' }, @@ -207,6 +217,7 @@ class WDRPageIE(InfoExtractor): 'upload_date': '20130919', 'title': 'Sachgeschichte - Achterbahn ', }, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html', @@ -232,6 +243,7 @@ class WDRPageIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html', @@ -245,7 +257,7 @@ class WDRPageIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) - display_id = mobj.group('display_id') + display_id = dict_get(mobj.groupdict(), ('display_id', 'maus_id'), 'wdrmaus') webpage = self._download_webpage(url, display_id) entries = [] @@ -271,6 +283,14 @@ class WDRPageIE(InfoExtractor): jsonp_url = try_get( media_link_obj, lambda x: x['mediaObj']['url'], compat_str) if jsonp_url: + # metadata, or player JS with ['ref'] giving WDR id, or just media, perhaps + clip_id = media_link_obj['mediaObj'].get('ref') + if jsonp_url.endswith('.assetjsonp'): + asset = self._download_json( + jsonp_url, display_id, fatal=False, transform_source=strip_jsonp) + clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], compat_str) + if clip_id: + jsonp_url = self._asset_url(clip_id[4:]) entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key())) # Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html) @@ -290,16 +310,14 @@ class WDRPageIE(InfoExtractor): class WDRElefantIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)' _TEST = { - 'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015', + 'url': 'http://www.wdrmaus.de/elefantenseite/#elefantenkino_wippe', + # adaptive stream: unstable file MD5 'info_dict': { - 'title': 'Folge Oster-Spezial 2015', - 'id': 'mdb-1088195', + 'title': 'Wippe', + 'id': 'mdb-1198320', 'ext': 'mp4', 'age_limit': None, - 'upload_date': '20150406' - }, - 'params': { - 'skip_download': True, + 'upload_date': '20071003' }, } @@ -334,6 +352,7 @@ class WDRMobileIE(InfoExtractor): /[0-9]+/[0-9]+/ (?P<id>[0-9]+)_(?P<title>[0-9]+)''' IE_NAME = 'wdr:mobile' + _WORKING = False # no such domain _TEST = { 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4', 'info_dict': { diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 0aa5184f7..5f4d26622 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -136,6 +136,34 @@ class ZDFBaseIE(InfoExtractor): class ZDFIE(ZDFBaseIE): _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html' _TESTS = [{ + # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html + 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html', + 'md5': '34ec321e7eb34231fd88616c65c92db0', + 'info_dict': { + 'id': '210222_phx_nachgehakt_corona_protest', + 'ext': 'mp4', + 'title': 'Wohin führt der Protest in der Pandemie?', + 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', + 'duration': 1691, + 'timestamp': 1613948400, + 'upload_date': '20210221', + }, + 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"', + }, { + # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html + 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html', + 'md5': '0aff3e7bc72c8813f5e0fae333316a1d', + 'info_dict': { + 'id': '141007_ab18_10wochensommer_film', + 'ext': 'mp4', + 'title': 'Ab 18! - 10 Wochen Sommer', + 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26', + 'duration': 2660, + 'timestamp': 1608604200, + 'upload_date': '20201222', + }, + 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"', + }, { 'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html', 'info_dict': { 'id': '211230_sendung_hjo', @@ -195,13 +223,16 @@ class ZDFIE(ZDFBaseIE): 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html', 'only_matching': True, }, { - # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html - 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html', - 'only_matching': True - }, { - # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html - 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html', - 'only_matching': True + 'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html', + 'info_dict': { + 'id': 'video_artede_083871-001-A', + 'ext': 'mp4', + 'title': 'Tödliche Flucht (1/6)', + 'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315', + 'duration': 3193.0, + 'timestamp': 1641355200, + 'upload_date': '20220105', + }, }] def _extract_entry(self, url, player, content, video_id): From 27231526ae4dd3b0619d25a2e9d73186c1197c2f Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas <zmousm@users.noreply.github.com> Date: Fri, 4 Mar 2022 23:52:48 +0200 Subject: [PATCH 0760/2552] [ant1newsgr] Add extractor (#1982) Authored by: zmousm --- yt_dlp/extractor/ant1newsgr.py | 143 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/common.py | 4 +- yt_dlp/extractor/extractors.py | 5 ++ yt_dlp/extractor/generic.py | 7 ++ yt_dlp/extractor/tvopengr.py | 19 +---- yt_dlp/utils.py | 22 +++++ 6 files changed, 181 insertions(+), 19 deletions(-) create mode 100644 yt_dlp/extractor/ant1newsgr.py diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py new file mode 100644 index 000000000..7d70e0427 --- /dev/null +++ b/yt_dlp/extractor/ant1newsgr.py @@ -0,0 +1,143 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + HEADRequest, + ExtractorError, + determine_ext, + scale_thumbnails_to_max_format_width, + unescapeHTML, +) + + +class Ant1NewsGrBaseIE(InfoExtractor): + def _download_and_extract_api_data(self, video_id, netloc, cid=None): + url = f'{self.http_scheme()}//{netloc}{self._API_PATH}' + info = self._download_json(url, video_id, query={'cid': cid or video_id}) + try: + source = info['url'] + except KeyError: + raise ExtractorError('no source found for %s' % video_id) + formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4') + if determine_ext(source) == 'm3u8' else ([{'url': source}], {})) + self._sort_formats(formats) + thumbnails = scale_thumbnails_to_max_format_width( + formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') + return { + 'id': video_id, + 'title': info.get('title'), + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subs, + } + + +class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE): + IE_NAME = 'ant1newsgr:watch' + IE_DESC = 'ant1news.gr videos' + _VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/' + _API_PATH = '/templates/data/player' + + _TESTS = [{ + 'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45', + 'md5': '95925e6b32106754235f2417e0d2dfab', + 'info_dict': { + 'id': '1506168', + 'ext': 'mp4', + 'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a', + 'description': 'md5:18665af715a6dcfeac1d6153a44f16b0', + 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg', + }, + }] + + def _real_extract(self, url): + video_id, netloc = self._match_valid_url(url).group('id', 'netloc') + webpage = self._download_webpage(url, video_id) + info = self._download_and_extract_api_data(video_id, netloc) + info['description'] = self._og_search_description(webpage) + return info + + +class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE): + IE_NAME = 'ant1newsgr:article' + IE_DESC = 'ant1news.gr articles' + _VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/' + + _TESTS = [{ + 'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron', + 'md5': '294f18331bb516539d72d85a82887dcc', + 'info_dict': { + 'id': '_xvg/m_cmbatw=', + 'ext': 'mp4', + 'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411', + 'timestamp': 1603092840, + 'upload_date': '20201019', + 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg', + }, + }, { + 'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn', + 'info_dict': { + 'id': '620286', + 'title': 'md5:91fe569e952e4d146485740ae927662b', + }, + 'playlist_mincount': 2, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') + embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage)) + if not embed_urls: + raise ExtractorError('no videos found for %s' % video_id, expected=True) + return self.url_result_or_playlist_from_matches( + embed_urls, video_id, info['title'], ie=Ant1NewsGrEmbedIE.ie_key(), + video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) + + +class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE): + IE_NAME = 'ant1newsgr:embed' + IE_DESC = 'ant1news.gr embedded videos' + _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' + _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)' + _API_PATH = '/news/templates/data/jsonPlayer' + + _TESTS = [{ + 'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377', + 'md5': 'dfc58c3a11a5a9aad2ba316ed447def3', + 'info_dict': { + 'id': '3f_li_c_az_jw_y_u=', + 'ext': 'mp4', + 'title': 'md5:a30c93332455f53e1e84ae0724f0adf7', + 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/bbe31201-3f09-4a4e-87f5-8ad2159fffe2.jpg', + }, + }] + + @classmethod + def _extract_urls(cls, webpage): + _EMBED_URL_RE = rf'{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+' + _EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_EMBED_URL_RE})(?P=_q1)' + for mobj in re.finditer(_EMBED_RE, webpage): + url = unescapeHTML(mobj.group('url')) + if not cls.suitable(url): + continue + yield url + + def _real_extract(self, url): + video_id = self._match_id(url) + + canonical_url = self._request_webpage( + HEADRequest(url), video_id, + note='Resolve canonical player URL', + errnote='Could not resolve canonical player URL').geturl() + _, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url) + cid = urllib.parse.parse_qs(query)['cid'][0] + + return self._download_and_extract_api_data(video_id, netloc, cid=cid) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index dbf5ef8d4..f86e7cb3e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1140,8 +1140,8 @@ class InfoExtractor(object): 'url': url, } - def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None, **kwargs): - urls = (self.url_result(self._proto_relative_url(m), ie) + def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None, video_kwargs=None, **kwargs): + urls = (self.url_result(self._proto_relative_url(m), ie, **(video_kwargs or {})) for m in orderedSet(map(getter, matches) if getter else matches)) return self.playlist_result(urls, playlist_id, playlist_title, **kwargs) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 9e84655d6..5448acf01 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1401,6 +1401,11 @@ from .megatvcom import ( MegaTVComIE, MegaTVComEmbedIE, ) +from .ant1newsgr import ( + Ant1NewsGrWatchIE, + Ant1NewsGrArticleIE, + Ant1NewsGrEmbedIE, +) from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import ( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d975e4bdb..0ddd050ff 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -103,6 +103,7 @@ from .videopress import VideoPressIE from .rutube import RutubeIE from .glomex import GlomexEmbedIE from .megatvcom import MegaTVComEmbedIE +from .ant1newsgr import Ant1NewsGrEmbedIE from .limelight import LimelightBaseIE from .anvato import AnvatoIE from .washingtonpost import WashingtonPostIE @@ -3544,6 +3545,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( megatvcom_urls, video_id, video_title, ie=MegaTVComEmbedIE.ie_key()) + # Look for ant1news.gr embeds + ant1newsgr_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage)) + if ant1newsgr_urls: + return self.playlist_from_matches( + ant1newsgr_urls, video_id, video_title, ie=Ant1NewsGrEmbedIE.ie_key()) + # Look for WashingtonPost embeds wapo_urls = WashingtonPostIE._extract_urls(webpage) if wapo_urls: diff --git a/yt_dlp/extractor/tvopengr.py b/yt_dlp/extractor/tvopengr.py index 667f6660f..a11cdc6b0 100644 --- a/yt_dlp/extractor/tvopengr.py +++ b/yt_dlp/extractor/tvopengr.py @@ -7,7 +7,7 @@ from .common import InfoExtractor from ..utils import ( determine_ext, get_elements_text_and_html_by_attribute, - merge_dicts, + scale_thumbnails_to_max_format_width, unescapeHTML, ) @@ -78,21 +78,6 @@ class TVOpenGrWatchIE(TVOpenGrBaseIE): self._sort_formats(formats) return formats, subs - @staticmethod - def _scale_thumbnails_to_max_width(formats, thumbnails, url_width_re): - _keys = ('width', 'height') - max_dimensions = max( - [tuple(format.get(k) or 0 for k in _keys) for format in formats], - default=(0, 0)) - if not max_dimensions[0]: - return thumbnails - return [ - merge_dicts( - {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])}, - dict(zip(_keys, max_dimensions)), thumbnail) - for thumbnail in thumbnails - ] - def _real_extract(self, url): netloc, video_id, display_id = self._match_valid_url(url).group('netloc', 'id', 'slug') if netloc.find('tvopen.gr') == -1: @@ -102,7 +87,7 @@ class TVOpenGrWatchIE(TVOpenGrBaseIE): info['formats'], info['subtitles'] = self._extract_formats_and_subs( self._download_json(self._API_ENDPOINT, video_id, query={'cid': video_id}), video_id) - info['thumbnails'] = self._scale_thumbnails_to_max_width( + info['thumbnails'] = scale_thumbnails_to_max_format_width( info['formats'], info['thumbnails'], r'(?<=/imgHandler/)\d+') description, _html = next(get_elements_text_and_html_by_attribute('class', 'description', webpage)) if description and _html.startswith('<span '): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index be0c69d8f..87463c999 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5271,6 +5271,28 @@ def join_nonempty(*values, delim='-', from_dict=None): return delim.join(map(str, filter(None, values))) +def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re): + """ + Find the largest format dimensions in terms of video width and, for each thumbnail: + * Modify the URL: Match the width with the provided regex and replace with the former width + * Update dimensions + + This function is useful with video services that scale the provided thumbnails on demand + """ + _keys = ('width', 'height') + max_dimensions = max( + [tuple(format.get(k) or 0 for k in _keys) for format in formats], + default=(0, 0)) + if not max_dimensions[0]: + return thumbnails + return [ + merge_dicts( + {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])}, + dict(zip(_keys, max_dimensions)), thumbnail) + for thumbnail in thumbnails + ] + + def parse_http_range(range): """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """ if not range: From bed30106f544fb3ae995f0e3e73cf39789edeecc Mon Sep 17 00:00:00 2001 From: foghawk <scimitarfawkes@gmail.com> Date: Fri, 4 Mar 2022 21:24:49 -0600 Subject: [PATCH 0761/2552] [tumblr] Fix extractor (#2883) Authored by: foghawk --- yt_dlp/extractor/tumblr.py | 350 ++++++++++++++++++++++++++++--------- 1 file changed, 264 insertions(+), 86 deletions(-) diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index a9ad2e513..a3e0e15f2 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + traverse_obj, urlencode_postdata ) @@ -14,31 +15,130 @@ class TumblrIE(InfoExtractor): _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' _NETRC_MACHINE = 'tumblr' _LOGIN_URL = 'https://www.tumblr.com/login' + _OAUTH_URL = 'https://www.tumblr.com/api/v2/oauth2/token' _TESTS = [{ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', 'md5': '479bb068e5b16462f5176a6828829767', 'info_dict': { 'id': '54196191430', 'ext': 'mp4', - 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...', + 'title': 'md5:dfac39636969fe6bf1caa2d50405f069', 'description': 'md5:390ab77358960235b6937ab3b8528956', + 'uploader_id': 'tatianamaslanydaily', + 'uploader_url': 'https://tatianamaslanydaily.tumblr.com/', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 127, + 'like_count': int, + 'repost_count': int, + 'age_limit': 0, + 'tags': ['Orphan Black', 'Tatiana Maslany', 'Interview', 'Video', 'OB S1 DVD Extras'], } }, { + 'note': 'multiple formats', 'url': 'https://maskofthedragon.tumblr.com/post/626907179849564160/mona-talking-in-english', 'md5': 'f43ff8a8861712b6cf0e0c2bd84cfc68', 'info_dict': { 'id': '626907179849564160', 'ext': 'mp4', - 'title': 'Me roast is buggered!, Mona\xa0“talking” in\xa0“english”', + 'title': 'Mona\xa0“talking” in\xa0“english”', 'description': 'md5:082a3a621530cb786ad2b7592a6d9e2c', + 'uploader_id': 'maskofthedragon', + 'uploader_url': 'https://maskofthedragon.tumblr.com/', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 7, + 'like_count': int, + 'repost_count': int, + 'age_limit': 0, + 'tags': 'count:19', }, 'params': { 'format': 'hd', }, + }, { + 'note': 'non-iframe video (with related posts)', + 'url': 'https://shieldfoss.tumblr.com/post/675519763813908480', + 'md5': '12bdb75661ef443bffe5a4dac1dbf118', + 'info_dict': { + 'id': '675519763813908480', + 'ext': 'mp4', + 'title': 'Shieldfoss', + 'uploader_id': 'nerviovago', + 'uploader_url': 'https://nerviovago.tumblr.com/', + 'thumbnail': r're:^https?://.*\.jpg', + 'like_count': int, + 'repost_count': int, + 'age_limit': 0, + 'tags': [], + } + }, { + 'note': 'dashboard only (original post)', + 'url': 'https://jujanon.tumblr.com/post/159704441298/my-baby-eating', + 'md5': '029f7c91ab386701b211e3d494d2d95e', + 'info_dict': { + 'id': '159704441298', + 'ext': 'mp4', + 'title': 'md5:ba79365861101f4911452728d2950561', + 'description': 'md5:773738196cea76b6996ec71e285bdabc', + 'uploader_id': 'jujanon', + 'uploader_url': 'https://jujanon.tumblr.com/', + 'thumbnail': r're:^https?://.*\.jpg', + 'like_count': int, + 'repost_count': int, + 'age_limit': 0, + 'tags': ['crabs', 'my video', 'my pets'], + } + }, { + 'note': 'dashboard only (reblog)', + 'url': 'https://bartlebyshop.tumblr.com/post/180294460076/duality-of-bird', + 'md5': '04334e7cadb1af680d162912559f51a5', + 'info_dict': { + 'id': '180294460076', + 'ext': 'mp4', + 'title': 'duality of bird', + 'description': 'duality of bird', + 'uploader_id': 'todaysbird', + 'uploader_url': 'https://todaysbird.tumblr.com/', + 'thumbnail': r're:^https?://.*\.jpg', + 'like_count': int, + 'repost_count': int, + 'age_limit': 0, + 'tags': [], + } + }, { + 'note': 'dashboard only (external)', + 'url': 'https://afloweroutofstone.tumblr.com/post/675661759168823296/the-blues-remembers-everything-the-country-forgot', + 'info_dict': { + 'id': 'q67_fd7b8SU', + 'ext': 'mp4', + 'title': 'The Blues Remembers Everything the Country Forgot', + 'alt_title': 'The Blues Remembers Everything the Country Forgot', + 'description': 'md5:1a6b4097e451216835a24c1023707c79', + 'release_date': '20201224', + 'creator': 'md5:c2239ba15430e87c3b971ba450773272', + 'uploader': 'Moor Mother - Topic', + 'upload_date': '20201223', + 'uploader_id': 'UCxrMtFBRkFvQJ_vVM4il08w', + 'uploader_url': 'http://www.youtube.com/channel/UCxrMtFBRkFvQJ_vVM4il08w', + 'thumbnail': r're:^https?://i.ytimg.com/.*', + 'channel': 'Moor Mother - Topic', + 'channel_id': 'UCxrMtFBRkFvQJ_vVM4il08w', + 'channel_url': 'https://www.youtube.com/channel/UCxrMtFBRkFvQJ_vVM4il08w', + 'channel_follower_count': int, + 'duration': 181, + 'view_count': int, + 'like_count': int, + 'age_limit': 0, + 'categories': ['Music'], + 'tags': 'count:7', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'availability': 'public', + 'track': 'The Blues Remembers Everything the Country Forgot', + 'artist': 'md5:c2239ba15430e87c3b971ba450773272', + 'album': 'Brass', + 'release_year': 2020, + }, + 'add_ie': ['Youtube'], }, { 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching', 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab', @@ -54,17 +154,48 @@ class TumblrIE(InfoExtractor): }, # 'add_ie': ['Vidme'], 'skip': 'dead embedded video host' + }, { + 'url': 'https://prozdvoices.tumblr.com/post/673201091169681408/what-recording-voice-acting-sounds-like', + 'md5': 'a0063fc8110e6c9afe44065b4ea68177', + 'info_dict': { + 'id': 'eomhW5MLGWA', + 'ext': 'mp4', + 'title': 'what recording voice acting sounds like', + 'description': 'md5:1da3faa22d0e0b1d8b50216c284ee798', + 'uploader': 'ProZD', + 'upload_date': '20220112', + 'uploader_id': 'ProZD', + 'uploader_url': 'http://www.youtube.com/user/ProZD', + 'thumbnail': r're:^https?://i.ytimg.com/.*', + 'channel': 'ProZD', + 'channel_id': 'UC6MFZAOHXlKK1FI7V0XQVeA', + 'channel_url': 'https://www.youtube.com/channel/UC6MFZAOHXlKK1FI7V0XQVeA', + 'channel_follower_count': int, + 'duration': 20, + 'view_count': int, + 'like_count': int, + 'age_limit': 0, + 'categories': ['Film & Animation'], + 'tags': [], + 'live_status': 'not_live', + 'playable_in_embed': True, + 'availability': 'public', + }, + 'add_ie': ['Youtube'], }, { 'url': 'https://dominustempori.tumblr.com/post/673572712813297664/youtubes-all-right-for-some-pretty-cool', - 'md5': '5e45724c70b748f64f5a1731ac72c84a', + 'md5': '203e9eb8077e3f45bfaeb4c86c1467b8', 'info_dict': { 'id': '87816359', - 'ext': 'mp4', + 'ext': 'mov', 'title': 'Harold Ramis', + 'description': 'md5:be8e68cbf56ce0785c77f0c6c6dfaf2c', 'uploader': 'Resolution Productions Group', 'uploader_id': 'resolutionproductions', 'uploader_url': 'https://vimeo.com/resolutionproductions', + 'upload_date': '20140227', 'thumbnail': r're:^https?://i.vimeocdn.com/video/.*', + 'timestamp': 1393523719, 'duration': 291, }, 'add_ie': ['Vimeo'], @@ -107,116 +238,163 @@ class TumblrIE(InfoExtractor): 'add_ie': ['Instagram'], }] + _providers = { + 'instagram': 'Instagram', + 'vimeo': 'Vimeo', + 'vine': 'Vine', + 'youtube': 'Youtube', + } + + _ACCESS_TOKEN = None + def _real_initialize(self): + self.get_access_token() self._login() + def get_access_token(self): + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page', fatal=False) + if login_page: + self._ACCESS_TOKEN = self._search_regex( + r'"API_TOKEN":\s*"(\w+)"', login_page, 'API access token', fatal=False) + if not self._ACCESS_TOKEN: + self.report_warning('Failed to get access token; metadata will be missing and some videos may not work') + def _login(self): username, password = self._get_login_info() - if username is None: + if not username: return - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - - login_form = self._hidden_inputs(login_page) - login_form.update({ - 'user[email]': username, - 'user[password]': password - }) - - response, urlh = self._download_webpage_handle( - self._LOGIN_URL, None, 'Logging in', - data=urlencode_postdata(login_form), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': self._LOGIN_URL, - }) - - # Successful login - if '/dashboard' in urlh.geturl(): + if not self._ACCESS_TOKEN: return - login_errors = self._parse_json( - self._search_regex( - r'RegistrationForm\.errors\s*=\s*(\[.+?\])\s*;', response, - 'login errors', default='[]'), - None, fatal=False) - if login_errors: - raise ExtractorError( - 'Unable to login: %s' % login_errors[0], expected=True) - - self.report_warning('Login has probably failed') + self._download_json( + self._OAUTH_URL, None, 'Logging in', + data=urlencode_postdata({ + 'password': password, + 'grant_type': 'password', + 'username': username, + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Authorization': f'Bearer {self._ACCESS_TOKEN}', + }, + errnote='Login failed', fatal=False) def _real_extract(self, url): - m_url = self._match_valid_url(url) - video_id = m_url.group('id') - blog = m_url.group('blog_name') + blog, video_id = self._match_valid_url(url).groups() - url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) + url = f'http://{blog}.tumblr.com/post/{video_id}/' webpage, urlh = self._download_webpage_handle(url, video_id) redirect_url = urlh.geturl() - if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'): - raise ExtractorError( - 'This Tumblr may contain sensitive media. ' - 'Disable safe mode in your account settings ' - 'at https://www.tumblr.com/settings/account#safe_mode', - expected=True) + api_only = bool(self._search_regex( + r'(tumblr.com|^)/(safe-mode|login_required|blog/view)', + redirect_url, 'redirect', default=None)) + + if api_only and not self._ACCESS_TOKEN: + raise ExtractorError('Cannot get data for dashboard-only post without access token') + + post_json = {} + if self._ACCESS_TOKEN: + post_json = traverse_obj( + self._download_json( + f'https://www.tumblr.com/api/v2/blog/{blog}/posts/{video_id}/permalink', + video_id, headers={'Authorization': f'Bearer {self._ACCESS_TOKEN}'}, fatal=False), + ('response', 'timeline', 'elements', 0)) or {} + content_json = traverse_obj(post_json, ('trail', 0, 'content'), ('content')) or [] + video_json = next( + (item for item in content_json if item.get('type') == 'video'), {}) + media_json = video_json.get('media') or {} + if api_only and not media_json.get('url') and not video_json.get('url'): + raise ExtractorError('Failed to find video data for dashboard-only post') + + if not media_json.get('url') and video_json.get('url'): + # external video host + return self.url_result( + video_json['url'], + self._providers.get(video_json.get('provider'), 'Generic')) + + video_url = self._og_search_video_url(webpage, default=None) + duration = None + formats = [] + + # iframes can supply duration and sometimes additional formats, so check for one iframe_url = self._search_regex( - r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', + fr'src=\'(https?://www\.tumblr\.com/video/{blog}/{video_id}/[^\']+)\'', webpage, 'iframe url', default=None) - if iframe_url is None: + if iframe_url: + iframe = self._download_webpage( + iframe_url, video_id, 'Downloading iframe page', + headers={'Referer': redirect_url}) + + options = self._parse_json( + self._search_regex( + r'data-crt-options=(["\'])(?P<options>.+?)\1', iframe, + 'hd video url', default='', group='options'), + video_id, fatal=False) + if options: + duration = int_or_none(options.get('duration')) + + hd_url = options.get('hdUrl') + if hd_url: + # there are multiple formats; extract them + # ignore other sources of width/height data as they may be wrong + sources = [] + sd_url = self._search_regex( + r'<source[^>]+src=(["\'])(?P<url>.+?)\1', iframe, + 'sd video url', default=None, group='url') + if sd_url: + sources.append((sd_url, 'sd')) + sources.append((hd_url, 'hd')) + + formats = [{ + 'url': video_url, + 'format_id': format_id, + 'height': int_or_none(self._search_regex( + r'_(\d+)\.\w+$', video_url, 'height', default=None)), + 'quality': quality, + } for quality, (video_url, format_id) in enumerate(sources)] + + if not media_json.get('url') and not video_url and not iframe_url: + # external video host (but we weren't able to figure it out from the api) iframe_url = self._search_regex( r'src=["\'](https?://safe\.txmblr\.com/svc/embed/inline/[^"\']+)["\']', webpage, 'embed iframe url', default=None) return self.url_result(iframe_url or redirect_url, 'Generic') - iframe = self._download_webpage( - iframe_url, video_id, 'Downloading iframe page', - headers={'Referer': redirect_url}) - - duration = None - sources = [] - - sd_url = self._search_regex( - r'<source[^>]+src=(["\'])(?P<url>.+?)\1', iframe, - 'sd video url', default=None, group='url') - if sd_url: - sources.append((sd_url, 'sd')) - - options = self._parse_json( - self._search_regex( - r'data-crt-options=(["\'])(?P<options>.+?)\1', iframe, - 'hd video url', default='', group='options'), - video_id, fatal=False) - if options: - duration = int_or_none(options.get('duration')) - hd_url = options.get('hdUrl') - if hd_url: - sources.append((hd_url, 'hd')) - - formats = [{ - 'url': video_url, - 'ext': 'mp4', - 'format_id': format_id, - 'height': int_or_none(self._search_regex( - r'/(\d{3,4})$', video_url, 'height', default=None)), - 'quality': quality, - } for quality, (video_url, format_id) in enumerate(sources)] - + formats = formats or [{ + 'url': media_json.get('url') or video_url, + 'width': int_or_none( + media_json.get('width') or self._og_search_property('video:width', webpage, default=None)), + 'height': int_or_none( + media_json.get('height') or self._og_search_property('video:height', webpage, default=None)), + }] self._sort_formats(formats) - # The only place where you can get a title, it's not complete, - # but searching in other places doesn't work for all videos - video_title = self._html_search_regex( - r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?', - webpage, 'title') + # the url we're extracting from might be an original post or it might be a reblog. + # if it's a reblog, og:description will be the reblogger's comment, not the uploader's. + # content_json is always the op, so if it exists but has no text, there's no description + if content_json: + description = '\n\n'.join(( + item.get('text') for item in content_json if item.get('type') == 'text')) or None + else: + description = self._og_search_description(webpage, default=None) + uploader_id = traverse_obj(post_json, 'reblogged_root_name', 'blog_name') return { 'id': video_id, - 'title': video_title, - 'description': self._og_search_description(webpage, default=None), - 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'title': post_json.get('summary') or (blog if api_only else self._html_search_regex( + r'(?s)(?P<title>.*?)(?: \| Tumblr)?', webpage, 'title')), + 'description': description, + 'thumbnail': (traverse_obj(video_json, ('poster', 0, 'url')) + or self._og_search_thumbnail(webpage, default=None)), + 'uploader_id': uploader_id, + 'uploader_url': f'https://{uploader_id}.tumblr.com/' if uploader_id else None, 'duration': duration, + 'like_count': post_json.get('like_count'), + 'repost_count': post_json.get('reblog_count'), + 'age_limit': {True: 18, False: 0}.get(post_json.get('is_nsfw')), + 'tags': post_json.get('tags'), 'formats': formats, } From d49669acad71f640ffd8b78f0ea7911ae1f67720 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 5 Mar 2022 09:38:03 +0530 Subject: [PATCH 0762/2552] [youtube] Fix automatic captions Closes #2956 --- yt_dlp/extractor/youtube.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4e812af99..ee0277fd7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3410,11 +3410,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if caption_track.get('kind') != 'asr': trans_code += f'-{lang_code}' trans_name += format_field(lang_name, template=' from %s') - process_language( - automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code}) + # Add an "-orig" label to the original language so that it can be distinguished. + # The subs are returned without "-orig" as well for compatibility if lang_code == f'a-{trans_code}': process_language( - automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {'tlang': trans_code}) + automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {}) + # Setting tlang=lang returns damaged subtitles. + # Not using lang_code == f'a-{trans_code}' here for future-proofing + orig_lang = parse_qs(base_url).get('lang', [None])[-1] + process_language(automatic_captions, base_url, trans_code, trans_name, + {} if orig_lang == trans_code else {'tlang': trans_code}) info['automatic_captions'] = automatic_captions info['subtitles'] = subtitles From 1ed7953a7405a8613b4a6d9ada1f91c04edb83c0 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Sun, 6 Mar 2022 17:11:10 +0900 Subject: [PATCH 0763/2552] [utils] render_table: Fix character calculation for removing extra gap without this fix, the column next to delimiter will lack leading spaces on terminal (see https://github.com/yt-dlp/yt-dlp/pull/920#issuecomment-1059914615 for the situation) --- yt_dlp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 87463c999..8256d543e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3485,7 +3485,7 @@ def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False): extra_gap += 1 if delim: table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data - table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter + table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter for row in table: for pos, text in enumerate(map(str, row)): if '\t' in text: From b46ccbc6d41cc8e1845d35b0600c78b7288d9aa4 Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Mon, 7 Mar 2022 23:02:27 -0500 Subject: [PATCH 0764/2552] [build] Update pyinstaller to 4.10 --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1ca67034f..6820889e2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -164,7 +164,7 @@ jobs: - name: Install Requirements run: | brew install coreutils - /usr/bin/python3 -m pip install -U --user pip Pyinstaller==4.9 -r requirements.txt + /usr/bin/python3 -m pip install -U --user pip Pyinstaller==4.10 -r requirements.txt - name: Bump version id: bump_version run: /usr/bin/python3 devscripts/update-version.py @@ -233,7 +233,7 @@ jobs: # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | python -m pip install --upgrade pip setuptools wheel py2exe - pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.9-py3-none-any.whl" -r requirements.txt + pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.10-py3-none-any.whl" -r requirements.txt - name: Bump version id: bump_version env: @@ -320,7 +320,7 @@ jobs: - name: Install Requirements run: | python -m pip install --upgrade pip setuptools wheel - pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.9-py3-none-any.whl" -r requirements.txt + pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.10-py3-none-any.whl" -r requirements.txt - name: Bump version id: bump_version env: From 2807d1709bb5b0d79d210145cb213242b2dbf41a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 8 Mar 2022 07:25:00 +0530 Subject: [PATCH 0765/2552] [nrk] Add fallback API Closes #1891 --- yt_dlp/extractor/nrk.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 49d58a685..4d723e886 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -8,6 +8,7 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + compat_HTTPError, determine_ext, ExtractorError, int_or_none, @@ -147,10 +148,14 @@ class NRKIE(NRKBaseIE): def _real_extract(self, url): video_id = self._match_id(url).split('/')[-1] - path_templ = 'playback/%s/program/' + video_id - def call_playback_api(item, query=None): - return self._call_api(path_templ % item, video_id, item, query=query) + try: + return self._call_api(f'playback/{item}/program/{video_id}', video_id, item, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query) + raise + # known values for preferredCdn: akamai, iponly, minicdn and telenor manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'}) From 497d2fab6c25df04362e5fac8383acd28030a97b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 8 Mar 2022 12:04:49 +0530 Subject: [PATCH 0766/2552] [utils] Better traceback for `ExtractorError` --- yt_dlp/extractor/common.py | 2 +- yt_dlp/utils.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f86e7cb3e..5b7de1296 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -749,7 +749,7 @@ class InfoExtractor(object): errmsg = '%s: %s' % (errnote, error_to_compat_str(err)) if fatal: - raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) + raise ExtractorError(errmsg, cause=err) else: self.report_warning(errmsg) return False diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 8256d543e..9406eb834 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1076,9 +1076,10 @@ class ExtractorError(YoutubeDLError): '' if expected else bug_reports_message()))) def format_traceback(self): - if self.traceback is None: - return None - return ''.join(traceback.format_tb(self.traceback)) + return join_nonempty( + self.traceback and ''.join(traceback.format_tb(self.traceback)), + self.cause and ''.join(traceback.format_exception(self.cause)[1:]), + delim='\n') or None class UnsupportedError(ExtractorError): From 992f9a730b49fd36fc422be8d802f98ebcdce418 Mon Sep 17 00:00:00 2001 From: coletdev Date: Tue, 8 Mar 2022 20:28:00 +1300 Subject: [PATCH 0767/2552] [youtube] Prefer UTC upload date for videos (#2223) Except for live/scheduled streams/premieres. Closes #1881 Related: #2402 Authored-by: coletdjnz --- yt_dlp/extractor/youtube.py | 103 ++++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index ee0277fd7..041815a19 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2081,7 +2081,93 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'age_limit': 0, 'channel_follower_count': int }, 'params': {'format': 'mhtml', 'skip_download': True} - } + }, { + # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) + 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', + 'info_dict': { + 'id': '2NUZ8W2llS4', + 'ext': 'mp4', + 'title': 'The NP that test your phone performance 🙂', + 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d', + 'uploader': 'Leon Nguyen', + 'uploader_id': 'VNSXIII', + 'uploader_url': 'http://www.youtube.com/user/VNSXIII', + 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA', + 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA', + 'duration': 21, + 'view_count': int, + 'age_limit': 0, + 'categories': ['Gaming'], + 'tags': 'count:23', + 'playable_in_embed': True, + 'live_status': 'not_live', + 'upload_date': '20220103', + 'like_count': int, + 'availability': 'public', + 'channel': 'Leon Nguyen', + 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp', + 'channel_follower_count': int + } + }, { + # date text is premiered video, ensure upload date in UTC (published 1641172509) + 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM', + 'info_dict': { + 'id': 'mzZzzBU6lrM', + 'ext': 'mp4', + 'title': 'I Met GeorgeNotFound In Real Life...', + 'description': 'md5:cca98a355c7184e750f711f3a1b22c84', + 'uploader': 'Quackity', + 'uploader_id': 'QuackityHQ', + 'uploader_url': 'http://www.youtube.com/user/QuackityHQ', + 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q', + 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q', + 'duration': 955, + 'view_count': int, + 'age_limit': 0, + 'categories': ['Entertainment'], + 'tags': 'count:26', + 'playable_in_embed': True, + 'live_status': 'not_live', + 'release_timestamp': 1641172509, + 'release_date': '20220103', + 'upload_date': '20220103', + 'like_count': int, + 'availability': 'public', + 'channel': 'Quackity', + 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg', + 'channel_follower_count': int + } + }, + { # continuous livestream. Microformat upload date should be preferred. + # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27 + 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU', + 'info_dict': { + 'id': 'kgx4WGK0oNU', + 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'ext': 'mp4', + 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA', + 'availability': 'public', + 'age_limit': 0, + 'release_timestamp': 1637975704, + 'upload_date': '20210619', + 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', + 'live_status': 'is_live', + 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg', + 'uploader': '阿鲍Abao', + 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', + 'channel': 'Abao in Tokyo', + 'channel_follower_count': int, + 'release_date': '20211127', + 'tags': 'count:39', + 'categories': ['People & Blogs'], + 'like_count': int, + 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA', + 'view_count': int, + 'playable_in_embed': True, + 'description': 'md5:2ef1d002cad520f65825346e2084e49d', + }, + 'params': {'skip_download': True} + }, ] @classmethod @@ -3336,9 +3422,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # URL checking if user don't care about getting the best possible thumbnail 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')), 'description': video_description, - 'upload_date': unified_strdate( - get_first(microformats, 'uploadDate') - or search_meta('uploadDate')), 'uploader': get_first(video_details, 'author'), 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None, 'uploader_url': owner_profile_url, @@ -3489,6 +3572,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for content in contents: vpir = content.get('videoPrimaryInfoRenderer') if vpir: + info['upload_date'] = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') stl = vpir.get('superTitleLink') if stl: stl = self._get_text(stl) @@ -3567,6 +3651,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'uploader_id', 'channel_url': 'uploader_url', } + + # The upload date for scheduled and current live streams / premieres in microformats + # is generally the true upload date. Although not in UTC, we will prefer that in this case. + # Note this changes to the published date when the stream/premiere has finished. + # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139 + if not info.get('upload_date') or info.get('is_live') or info.get('live_status') == 'is_upcoming': + info['upload_date'] = ( + unified_strdate(get_first(microformats, 'uploadDate')) + or unified_strdate(search_meta('uploadDate')) + or info.get('upload_date')) + for to, frm in fallbacks.items(): if not info.get(to): info[to] = info.get(frm) From 409cdd1ec9659e06e67da05c3e9ffba0ce05f727 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 8 Mar 2022 12:48:50 +0530 Subject: [PATCH 0768/2552] [ard] Fix valid URL Partial fix for #2975 --- yt_dlp/extractor/ard.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 4ad5d6ddd..7ea339b39 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -407,8 +407,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): (?:(?:beta|www)\.)?ardmediathek\.de/ (?:(?P[^/]+)/)? (?:player|live|video|(?Psendung|sammlung))/ - (?:(?P[^?#]+)/)? - (?P(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)''' + (?:(?P(?(playlist)[^?#]+?|[^?#]+))/)? + (?P(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) + (?(playlist)/(?P\d+)?/?(?:[?#]|$))''' _TESTS = [{ 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', @@ -436,6 +437,13 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', 'upload_date': '20211108', }, + }, { + 'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1', + 'playlist_count': 6, + 'info_dict': { + 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw', + 'title': 'beforeigners/beforeigners/staffel-1', + }, }, { 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'only_matching': True, @@ -561,14 +569,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): break pageNumber = pageNumber + 1 - return self.playlist_result(entries, playlist_title=display_id) + return self.playlist_result(entries, playlist_id, playlist_title=display_id) def _real_extract(self, url): - video_id, display_id, playlist_type, client = self._match_valid_url(url).group( - 'id', 'display_id', 'playlist', 'client') + video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group( + 'id', 'display_id', 'playlist', 'client', 'season') display_id, client = display_id or video_id, client or 'ard' if playlist_type: + # TODO: Extract only specified season return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) player_page = self._download_json( From fb6e3f4389b74d273fb34b737b2c5f75bf864d0e Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Tue, 8 Mar 2022 23:49:10 +0900 Subject: [PATCH 0769/2552] [mildom] Rework extractors (#2940) Authored by: Lesmiscore --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/mildom.py | 295 +++++++++++++++------------------ 2 files changed, 137 insertions(+), 159 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 5448acf01..e023a9802 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -848,6 +848,7 @@ from .microsoftvirtualacademy import ( from .mildom import ( MildomIE, MildomVodIE, + MildomClipIE, MildomUserVodIE, ) from .minds import ( diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index b5a2e17f2..ab718acb2 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -1,102 +1,43 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 -from datetime import datetime -import itertools +import functools import json from .common import InfoExtractor from ..utils import ( - update_url_query, - random_uuidv4, - try_get, + determine_ext, + dict_get, + ExtractorError, float_or_none, - dict_get -) -from ..compat import ( - compat_str, + OnDemandPagedList, + random_uuidv4, + traverse_obj, + update_url_query, ) class MildomBaseIE(InfoExtractor): _GUEST_ID = None - _DISPATCHER_CONFIG = None - - def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', init=False): - query = query or {} - if query: - query['__platform'] = 'web' - url = update_url_query(url, self._common_queries(query, init=init)) - content = self._download_json(url, video_id, note=note) - if content['code'] == 0: - return content['body'] - else: - self.raise_no_formats( - f'Video not found or premium content. {content["code"]} - {content["message"]}', - expected=True) - def _common_queries(self, query={}, init=False): - dc = self._fetch_dispatcher_config() - r = { - 'timestamp': self.iso_timestamp(), - '__guest_id': '' if init else self.guest_id(), - '__location': dc['location'], - '__country': dc['country'], - '__cluster': dc['cluster'], - '__platform': 'web', - '__la': self.lang_code(), - '__pcv': 'v2.9.44', - 'sfr': 'pc', - 'accessToken': '', - } - r.update(query) - return r - - def _fetch_dispatcher_config(self): - if not self._DISPATCHER_CONFIG: - tmp = self._download_json( - 'https://disp.mildom.com/serverListV2', 'initialization', - note='Downloading dispatcher_config', data=json.dumps({ - 'protover': 0, - 'data': base64.b64encode(json.dumps({ - 'fr': 'web', - 'sfr': 'pc', - 'devi': 'Windows', - 'la': 'ja', - 'gid': None, - 'loc': '', - 'clu': '', - 'wh': '1919*810', - 'rtm': self.iso_timestamp(), - 'ua': self.get_param('http_headers')['User-Agent'], - }).encode('utf8')).decode('utf8').replace('\n', ''), - }).encode('utf8')) - self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization') - return self._DISPATCHER_CONFIG - - @staticmethod - def iso_timestamp(): - 'new Date().toISOString()' - return datetime.utcnow().isoformat()[0:-3] + 'Z' - - def guest_id(self): - 'getGuestId' - if self._GUEST_ID: - return self._GUEST_ID - self._GUEST_ID = try_get( - self, ( - lambda x: x._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/guest/h5init', 'initialization', - note='Downloading guest token', init=True)['guest_id'] or None, - lambda x: x._get_cookies('https://www.mildom.com').get('gid').value, - lambda x: x._get_cookies('https://m.mildom.com').get('gid').value, - ), compat_str) or '' - return self._GUEST_ID - - def lang_code(self): - 'getCurrentLangCode' - return 'ja' + def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None): + if not self._GUEST_ID: + self._GUEST_ID = f'pc-gp-{random_uuidv4()}' + + content = self._download_json( + url, video_id, note=note, data=json.dumps(body).encode() if body else None, + headers={'Content-Type': 'application/json'} if body else {}, + query={ + '__guest_id': self._GUEST_ID, + '__platform': 'web', + **(query or {}), + }) + + if content['code'] != 0: + raise ExtractorError( + f'Mildom says: {content["message"]} (code {content["code"]})', + expected=True) + return content['body'] class MildomIE(MildomBaseIE): @@ -106,31 +47,13 @@ class MildomIE(MildomBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - url = 'https://www.mildom.com/%s' % video_id - - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id) enterstudio = self._call_api( 'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id, note='Downloading live metadata', query={'user_id': video_id}) result_video_id = enterstudio.get('log_id', video_id) - title = try_get( - enterstudio, ( - lambda x: self._html_search_meta('twitter:description', webpage), - lambda x: x['anchor_intro'], - ), compat_str) - description = try_get( - enterstudio, ( - lambda x: x['intro'], - lambda x: x['live_intro'], - ), compat_str) - uploader = try_get( - enterstudio, ( - lambda x: self._html_search_meta('twitter:title', webpage), - lambda x: x['loginname'], - ), compat_str) - servers = self._call_api( 'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id, note='Downloading live server list', query={ @@ -138,17 +61,20 @@ class MildomIE(MildomBaseIE): 'live_server_type': 'hls', }) - stream_query = self._common_queries({ - 'streamReqId': random_uuidv4(), - 'is_lhls': '0', - }) - m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query) - formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={ - 'Referer': 'https://www.mildom.com/', - 'Origin': 'https://www.mildom.com', - }, note='Downloading m3u8 information') - - del stream_query['streamReqId'], stream_query['timestamp'] + playback_token = self._call_api( + 'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id, + note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'}) + playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False) + if not playback_token: + raise ExtractorError('Failed to obtain live playback token') + + formats = self._extract_m3u8_formats( + f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}', + result_video_id, 'mp4', headers={ + 'Referer': 'https://www.mildom.com/', + 'Origin': 'https://www.mildom.com', + }) + for fmt in formats: fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/' @@ -156,10 +82,10 @@ class MildomIE(MildomBaseIE): return { 'id': result_video_id, - 'title': title, - 'description': description, + 'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'), + 'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str), 'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000), - 'uploader': uploader, + 'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'), 'uploader_id': video_id, 'formats': formats, 'is_live': True, @@ -168,7 +94,7 @@ class MildomIE(MildomBaseIE): class MildomVodIE(MildomBaseIE): IE_NAME = 'mildom:vod' - IE_DESC = 'Download a VOD in Mildom' + IE_DESC = 'VOD in Mildom' _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P\d+)/(?P(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)' _TESTS = [{ 'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269', @@ -215,11 +141,8 @@ class MildomVodIE(MildomBaseIE): }] def _real_extract(self, url): - m = self._match_valid_url(url) - user_id, video_id = m.group('user_id'), m.group('id') - url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id) - - webpage = self._download_webpage(url, video_id) + user_id, video_id = self._match_valid_url(url).group('user_id', 'id') + webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id) autoplay = self._call_api( 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id, @@ -227,20 +150,6 @@ class MildomVodIE(MildomBaseIE): 'v_id': video_id, })['playback'] - title = try_get( - autoplay, ( - lambda x: self._html_search_meta('og:description', webpage), - lambda x: x['title'], - ), compat_str) - description = try_get( - autoplay, ( - lambda x: x['video_intro'], - ), compat_str) - uploader = try_get( - autoplay, ( - lambda x: x['author_info']['login_name'], - ), compat_str) - formats = [{ 'url': autoplay['audio_url'], 'format_id': 'audio', @@ -265,17 +174,81 @@ class MildomVodIE(MildomBaseIE): return { 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': float_or_none(autoplay['publish_time'], scale=1000), - 'duration': float_or_none(autoplay['video_length'], scale=1000), + 'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'), + 'description': traverse_obj(autoplay, 'video_intro'), + 'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000), + 'duration': float_or_none(autoplay.get('video_length'), scale=1000), 'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')), - 'uploader': uploader, + 'uploader': traverse_obj(autoplay, ('author_info', 'login_name')), 'uploader_id': user_id, 'formats': formats, } +class MildomClipIE(MildomBaseIE): + IE_NAME = 'mildom:clip' + IE_DESC = 'Clip in Mildom' + _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P(?P\d+)-[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9', + 'info_dict': { + 'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9', + 'title': '全然違ったよ', + 'timestamp': 1619181890, + 'duration': 59, + 'thumbnail': r're:https?://.+', + 'uploader': 'ざきんぽ', + 'uploader_id': '10042245', + }, + }, { + 'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864', + 'info_dict': { + 'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864', + 'title': 'かっこいい', + 'timestamp': 1621094003, + 'duration': 59, + 'thumbnail': r're:https?://.+', + 'uploader': '(ルーキー', + 'uploader_id': '10111524', + }, + }, { + 'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902', + 'info_dict': { + 'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902', + 'title': 'あ', + 'timestamp': 1614769431, + 'duration': 31, + 'thumbnail': r're:https?://.+', + 'uploader': 'ドルゴルスレンギーン=ダグワドルジ', + 'uploader_id': '10660174', + }, + }] + + def _real_extract(self, url): + user_id, video_id = self._match_valid_url(url).group('user_id', 'id') + webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id) + + clip_detail = self._call_api( + 'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id, + note='Downloading playback metadata', query={ + 'clip_id': video_id, + }) + + return { + 'id': video_id, + 'title': self._html_search_meta( + ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'), + 'timestamp': float_or_none(clip_detail.get('create_time')), + 'duration': float_or_none(clip_detail.get('length')), + 'thumbnail': clip_detail.get('cover'), + 'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')), + 'uploader_id': user_id, + + 'url': clip_detail['url'], + 'ext': determine_ext(clip_detail.get('url'), 'mp4'), + } + + class MildomUserVodIE(MildomBaseIE): IE_NAME = 'mildom:user:vod' IE_DESC = 'Download all VODs from specific user in Mildom' @@ -286,29 +259,32 @@ class MildomUserVodIE(MildomBaseIE): 'id': '10093333', 'title': 'Uploads from ねこばたけ', }, - 'playlist_mincount': 351, + 'playlist_mincount': 732, }, { 'url': 'https://www.mildom.com/profile/10882672', 'info_dict': { 'id': '10882672', 'title': 'Uploads from kson組長(けいそん)', }, - 'playlist_mincount': 191, + 'playlist_mincount': 201, }] - def _entries(self, user_id): - for page in itertools.count(1): - reply = self._call_api( - 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList', - user_id, note='Downloading page %d' % page, query={ - 'user_id': user_id, - 'page': page, - 'limit': '30', - }) - if not reply: - break - for x in reply: - yield self.url_result('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id'])) + def _fetch_page(self, user_id, page): + page += 1 + reply = self._call_api( + 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList', + user_id, note=f'Downloading page {page}', query={ + 'user_id': user_id, + 'page': page, + 'limit': '30', + }) + if not reply: + return + for x in reply: + v_id = x.get('v_id') + if not v_id: + continue + yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}') def _real_extract(self, url): user_id = self._match_id(url) @@ -319,4 +295,5 @@ class MildomUserVodIE(MildomBaseIE): query={'user_id': user_id}, note='Downloading user profile')['user_info'] return self.playlist_result( - self._entries(user_id), user_id, 'Uploads from %s' % profile['loginname']) + OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30), + user_id, f'Uploads from {profile["loginname"]}') From b637c4e22ec57a178f82734fbb39f98bc1a679cb Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Tue, 8 Mar 2022 23:56:30 +0900 Subject: [PATCH 0770/2552] [mildom] Fix linter --- yt_dlp/extractor/mildom.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index ab718acb2..5f2df29c6 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -13,7 +13,6 @@ from ..utils import ( OnDemandPagedList, random_uuidv4, traverse_obj, - update_url_query, ) From 9e0e6adb2d5697e1ddedbfe3fbd23f498a8ff66c Mon Sep 17 00:00:00 2001 From: CplPwnies Date: Tue, 8 Mar 2022 10:18:52 -0600 Subject: [PATCH 0771/2552] [adobepass] Add Suddenlink MSO (#2977) Closes #2704 Authored by: CplPwnies --- yt_dlp/extractor/adobepass.py | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index bebcafa6b..f0eba8844 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1345,6 +1345,11 @@ MSO_INFO = { 'username_field': 'username', 'password_field': 'password', }, + 'Suddenlink': { + 'name': 'Suddenlink', + 'username_field': 'username', + 'password_field': 'password', + }, } @@ -1635,6 +1640,52 @@ class AdobePassIE(InfoExtractor): urlh.geturl(), video_id, 'Sending final bookend', query=hidden_data) + post_form(mvpd_confirm_page_res, 'Confirming Login') + elif mso_id == 'Suddenlink': + # Suddenlink is similar to SlingTV in using a tab history count and a meta refresh, + # but they also do a dynmaic redirect using javascript that has to be followed as well + first_bookend_page, urlh = post_form( + provider_redirect_page_res, 'Pressing Continue...') + + hidden_data = self._hidden_inputs(first_bookend_page) + hidden_data['history_val'] = 1 + + provider_login_redirect_page = self._download_webpage( + urlh.geturl(), video_id, 'Sending First Bookend', + query=hidden_data) + + provider_tryauth_url = self._html_search_regex( + r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl') + + provider_tryauth_page = self._download_webpage( + provider_tryauth_url, video_id, 'Submitting TryAuth', + query=hidden_data) + + provider_login_page_res = self._download_webpage_handle( + f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}', + video_id, 'Getting Login Page', + query=hidden_data) + + provider_association_redirect, urlh = post_form( + provider_login_page_res, 'Logging in', { + mso_info['username_field']: username, + mso_info['password_field']: password + }) + + provider_refresh_redirect_url = extract_redirect_url( + provider_association_redirect, url=urlh.geturl()) + + last_bookend_page, urlh = self._download_webpage_handle( + provider_refresh_redirect_url, video_id, + 'Downloading Auth Association Redirect Page') + + hidden_data = self._hidden_inputs(last_bookend_page) + hidden_data['history_val'] = 3 + + mvpd_confirm_page_res = self._download_webpage_handle( + urlh.geturl(), video_id, 'Sending Final Bookend', + query=hidden_data) + post_form(mvpd_confirm_page_res, 'Confirming Login') else: # Some providers (e.g. DIRECTV NOW) have another meta refresh From 4390d5ec12349e5b5bba30af6b4e7f08678af41a Mon Sep 17 00:00:00 2001 From: coletdev Date: Wed, 9 Mar 2022 05:44:05 +1300 Subject: [PATCH 0772/2552] Add brotli content-encoding support (#2433) Authored by: coletdjnz --- README.md | 3 ++- pyinst.py | 2 +- requirements.txt | 2 ++ setup.py | 2 +- yt_dlp/YoutubeDL.py | 2 ++ yt_dlp/compat.py | 8 ++++++++ yt_dlp/utils.py | 21 ++++++++++++++++++++- 7 files changed, 36 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ce5af129e..81b5d417d 100644 --- a/README.md +++ b/README.md @@ -268,6 +268,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) +* [**brotli**](https://github.com/google/brotli) or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) @@ -284,7 +285,7 @@ The Windows and MacOS standalone release binaries are already built with the pyt ## COMPILE **For Windows**: -To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. +To build the Windows executable, you must have pyinstaller (and any of yt-dlp's optional dependencies if needed). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. py -m pip install -U pyinstaller -r requirements.txt py devscripts/make_lazy_extractors.py diff --git a/pyinst.py b/pyinst.py index f135ec90d..ca115fd78 100644 --- a/pyinst.py +++ b/pyinst.py @@ -74,7 +74,7 @@ def version_to_list(version): def dependency_options(): - dependencies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') + dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets') excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] yield from (f'--hidden-import={module}' for module in dependencies) diff --git a/requirements.txt b/requirements.txt index cecd08eae..cb0eece46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ mutagen pycryptodomex websockets +brotli; platform_python_implementation=='CPython' +brotlicffi; platform_python_implementation!='CPython' \ No newline at end of file diff --git a/setup.py b/setup.py index f08ae2309..3e599cd95 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ LONG_DESCRIPTION = '\n\n'.join(( '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', open('README.md', 'r', encoding='utf-8').read())) -REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets'] +REQUIREMENTS = open('requirements.txt').read().splitlines() if sys.argv[1:2] == ['py2exe']: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 57201b6dc..51a89bd23 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -32,6 +32,7 @@ from string import ascii_letters from .compat import ( compat_basestring, + compat_brotli, compat_get_terminal_size, compat_kwargs, compat_numeric_types, @@ -3675,6 +3676,7 @@ class YoutubeDL(object): from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE lib_str = join_nonempty( + compat_brotli and compat_brotli.__name__, compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], SECRETSTORAGE_AVAILABLE and 'secretstorage', has_mutagen and 'mutagen', diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 2bc6a6b7f..0a0d3b351 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -170,6 +170,13 @@ except ImportError: except ImportError: compat_pycrypto_AES = None +try: + import brotlicffi as compat_brotli +except ImportError: + try: + import brotli as compat_brotli + except ImportError: + compat_brotli = None WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None @@ -258,6 +265,7 @@ __all__ = [ 'compat_asyncio_run', 'compat_b64decode', 'compat_basestring', + 'compat_brotli', 'compat_chr', 'compat_collections_abc', 'compat_cookiejar', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9406eb834..f6e41f837 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -47,6 +47,7 @@ from .compat import ( compat_HTMLParser, compat_HTTPError, compat_basestring, + compat_brotli, compat_chr, compat_cookiejar, compat_ctypes_WINFUNCTYPE, @@ -143,10 +144,16 @@ def random_user_agent(): return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) +SUPPORTED_ENCODINGS = [ + 'gzip', 'deflate' +] +if compat_brotli: + SUPPORTED_ENCODINGS.append('br') + std_headers = { 'User-Agent': random_user_agent(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Encoding': 'gzip, deflate', + 'Accept-Encoding': ', '.join(SUPPORTED_ENCODINGS), 'Accept-Language': 'en-us,en;q=0.5', 'Sec-Fetch-Mode': 'navigate', } @@ -1357,6 +1364,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): except zlib.error: return zlib.decompress(data) + @staticmethod + def brotli(data): + if not data: + return data + return compat_brotli.decompress(data) + def http_request(self, req): # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not # always respected by websites, some tend to give out URLs with non percent-encoded @@ -1417,6 +1430,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg del resp.headers['Content-encoding'] + # brotli + if resp.headers.get('Content-encoding', '') == 'br': + resp = compat_urllib_request.addinfourl( + io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + del resp.headers['Content-encoding'] # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see # https://github.com/ytdl-org/youtube-dl/issues/6457). if 300 <= resp.code < 400: From e66662b1e031640521e12e1e7e57ea7b4a36b5fa Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Tue, 8 Mar 2022 17:45:23 +0100 Subject: [PATCH 0773/2552] [ccma] Fix timestamp parsing (#2989) Authored by: nyuszika7h --- yt_dlp/extractor/ccma.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index ea98f8688..9dbaabfa0 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -1,17 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import calendar -import datetime - from .common import InfoExtractor from ..utils import ( clean_html, - extract_timezone, int_or_none, parse_duration, parse_resolution, try_get, + unified_timestamp, url_or_none, ) @@ -95,14 +92,8 @@ class CCMAIE(InfoExtractor): duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) tematica = try_get(informacio, lambda x: x['tematica']['text']) - timestamp = None data_utc = try_get(informacio, lambda x: x['data_emissio']['utc']) - try: - timezone, data_utc = extract_timezone(data_utc) - timestamp = calendar.timegm((datetime.datetime.strptime( - data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple()) - except TypeError: - pass + timestamp = unified_timestamp(data_utc) subtitles = {} subtitols = media.get('subtitols') or [] From a35155be17ddf099077026ba9ed662ff5f89ed70 Mon Sep 17 00:00:00 2001 From: Max Mehl <6170081+mxmehl@users.noreply.github.com> Date: Tue, 8 Mar 2022 17:48:35 +0100 Subject: [PATCH 0774/2552] [peertube] Add media.fsfe.org (#2986) Authored by: mxmehl --- yt_dlp/extractor/peertube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index e0b2ab982..9d6b82178 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -87,6 +87,7 @@ class PeerTubeIE(InfoExtractor): maindreieck-tv\.de| mani\.tube| manicphase\.me| + media\.fsfe\.org| media\.gzevd\.de| media\.inno3\.cricket| media\.kaitaia\.life| From 151f8f1c02219f997370011c111a846c003d5377 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Tue, 8 Mar 2022 23:52:51 +0700 Subject: [PATCH 0775/2552] [fptplay] Add extractor (#2949) Closes #2857 Authored by: hatienl0i261299 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/fptplay.py | 102 +++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 yt_dlp/extractor/fptplay.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index e023a9802..f028d3937 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -520,6 +520,7 @@ from .foxnews import ( FoxNewsArticleIE, ) from .foxsports import FoxSportsIE +from .fptplay import FptplayIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE from .francetv import ( diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py new file mode 100644 index 000000000..a34e90bb1 --- /dev/null +++ b/yt_dlp/extractor/fptplay.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hashlib +import time +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + join_nonempty, +) + + +class FptplayIE(InfoExtractor): + _VALID_URL = r'https?://fptplay\.vn/(?Pxem-video)/[^/]+\-(?P\w+)(?:/tap-(?P[^/]+)?/?(?:[?#]|$)|)' + _GEO_COUNTRIES = ['VN'] + IE_NAME = 'fptplay' + IE_DESC = 'fptplay.vn' + _TESTS = [{ + 'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945', + 'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33', + 'info_dict': { + 'id': '621a123016f369ebbde55945', + 'ext': 'mp4', + 'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Ms. Cupid In Love', + 'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c', + }, + }, { + 'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3', + 'md5': 'b35be968c909b3e4e1e20ca45dd261b1', + 'info_dict': { + 'id': '61f3aa8a6b3b1d2e73c60eb5', + 'ext': 'mp4', + 'title': 'Má Tôi Là Đại Gia - 3', + 'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c', + }, + }, { + 'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0', + 'only_matching': True, + }] + + def _real_extract(self, url): + type_url, video_id, episode = self._match_valid_url(url).group('type', 'id', 'episode') + webpage = self._download_webpage(url, video_id=video_id, fatal=False) + info = self._download_json(self.get_api_with_st_token(video_id, episode or 0), video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4') + self._sort_formats(formats) + return { + 'id': video_id, + 'title': join_nonempty( + self._html_search_meta(('og:title', 'twitter:title'), webpage), episode, delim=' - '), + 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), + 'formats': formats, + 'subtitles': subtitles, + } + + def get_api_with_st_token(self, video_id, episode): + path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip' + timestamp = int(time.time()) + 10800 + + t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper() + r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' + n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)] + + def convert(e): + t = '' + n = 0 + i = [0, 0, 0] + a = [0, 0, 0, 0] + s = len(e) + c = 0 + for z in range(s, 0, -1): + if n <= 3: + i[n] = e[c] + n += 1 + c += 1 + if 3 == n: + a[0] = (252 & i[0]) >> 2 + a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4) + a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6) + a[3] = (63 & i[2]) + for v in range(4): + t += r[a[v]] + n = 0 + if n: + for o in range(n, 3): + i[o] = 0 + + for o in range(n + 1): + a[0] = (252 & i[0]) >> 2 + a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4) + a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6) + a[3] = (63 & i[2]) + t += r[a[o]] + n += 1 + while n < 3: + t += '' + n += 1 + return t + + st_token = convert(n).replace('+', '-').replace('/', '_').replace('=', '') + return f'https://api.fptplay.net{path}?{urllib.parse.urlencode({"st": st_token, "e": timestamp})}' From c89bec262c7a8efa078c61b2ec59afdd4051e4bf Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Tue, 8 Mar 2022 23:55:40 +0700 Subject: [PATCH 0776/2552] [xinpianchang] Add extractor (#2963) Authored by: hatienl0i261299 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/xinpianchang.py | 95 ++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 yt_dlp/extractor/xinpianchang.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index f028d3937..f7a879ad9 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -2012,6 +2012,7 @@ from .ximalaya import ( XimalayaIE, XimalayaAlbumIE ) +from .xinpianchang import XinpianchangIE from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py new file mode 100644 index 000000000..9832d2398 --- /dev/null +++ b/yt_dlp/extractor/xinpianchang.py @@ -0,0 +1,95 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + update_url_query, + url_or_none, +) + + +class XinpianchangIE(InfoExtractor): + _VALID_URL = r'https?://www\.xinpianchang\.com/(?P[^/]+?)(?:\D|$)' + IE_NAME = 'xinpianchang' + IE_DESC = 'xinpianchang.com' + _TESTS = [{ + 'url': 'https://www.xinpianchang.com/a11766551', + 'info_dict': { + 'id': 'a11766551', + 'ext': 'mp4', + 'title': '北京2022冬奥会闭幕式再见短片-冰墩墩下班了', + 'description': 'md5:4a730c10639a82190fabe921c0fa4b87', + 'duration': 151, + 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/', + 'uploader': '正时文创', + 'uploader_id': 10357277, + 'categories': ['宣传片', '国家城市', '广告', '其他'], + 'keywords': ['北京冬奥会', '冰墩墩', '再见', '告别', '冰墩墩哭了', '感动', '闭幕式', '熄火'] + }, + }, { + 'url': 'https://www.xinpianchang.com/a11762904', + 'info_dict': { + 'id': 'a11762904', + 'ext': 'mp4', + 'title': '冬奥会决胜时刻《法国派出三只鸡?》', + 'description': 'md5:55cb139ef8f48f0c877932d1f196df8b', + 'duration': 136, + 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/', + 'uploader': '精品动画', + 'uploader_id': 10858927, + 'categories': ['动画', '三维CG'], + 'keywords': ['France Télévisions', '法国3台', '蠢萌', '冬奥会'] + }, + }, { + 'url': 'https://www.xinpianchang.com/a11779743?from=IndexPick&part=%E7%BC%96%E8%BE%91%E7%B2%BE%E9%80%89&index=2', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id=video_id) + domain = self.find_value_with_regex(var='requireNewDomain', webpage=webpage) + vid = self.find_value_with_regex(var='vid', webpage=webpage) + app_key = self.find_value_with_regex(var='modeServerAppKey', webpage=webpage) + api = update_url_query(f'{domain}/mod/api/v2/media/{vid}', {'appKey': app_key}) + data = self._download_json(api, video_id=video_id)['data'] + formats, subtitles = [], {} + for k, v in data.get('resource').items(): + if k in ('dash', 'hls'): + v_url = v.get('url') + if not v_url: + continue + if k == 'dash': + fmts, subs = self._extract_mpd_formats_and_subtitles(v_url, video_id=video_id) + elif k == 'hls': + fmts, subs = self._extract_m3u8_formats_and_subtitles(v_url, video_id=video_id) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + elif k == 'progressive': + formats.extend([{ + 'url': url_or_none(prog.get('url')), + 'width': int_or_none(prog.get('width')), + 'height': int_or_none(prog.get('height')), + 'ext': 'mp4', + } for prog in v if prog.get('url') or []]) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': data.get('title'), + 'description': data.get('description'), + 'duration': int_or_none(data.get('duration')), + 'categories': data.get('categories'), + 'keywords': data.get('keywords'), + 'thumbnail': data.get('cover'), + 'uploader': try_get(data, lambda x: x['owner']['username']), + 'uploader_id': try_get(data, lambda x: x['owner']['id']), + 'formats': formats, + 'subtitles': subtitles, + } + + def find_value_with_regex(self, var, webpage): + return self._search_regex(rf'var\s{var}\s=\s\"(?P[^\"]+)\"', webpage, name=var) From 08d30158ec8e7e08c1d83dcfde6dba18c95b2640 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 4 Mar 2022 19:38:55 +0530 Subject: [PATCH 0777/2552] [cleanup, docs] Misc cleanup Closes #2828, closes #2734, closes #2802, closes #2937 --- .gitignore | 2 + CONTRIBUTING.md | 12 +++++- CONTRIBUTORS | 4 +- Makefile | 2 +- README.md | 53 ++++++++++++----------- devscripts/prepare_manpage.py | 6 ++- setup.py | 2 +- yt_dlp/YoutubeDL.py | 2 + yt_dlp/downloader/youtube_live_chat.py | 3 ++ yt_dlp/extractor/abematv.py | 16 +++---- yt_dlp/extractor/ant1newsgr.py | 4 +- yt_dlp/extractor/common.py | 59 +++++++++++++------------- yt_dlp/extractor/frontendmasters.py | 4 +- yt_dlp/extractor/iqiyi.py | 2 +- yt_dlp/extractor/periscope.py | 2 +- yt_dlp/extractor/soundcloud.py | 16 +++---- yt_dlp/extractor/sovietscloset.py | 2 + yt_dlp/extractor/youtube.py | 2 + yt_dlp/extractor/zingmp3.py | 4 +- yt_dlp/utils.py | 4 +- 20 files changed, 114 insertions(+), 87 deletions(-) diff --git a/.gitignore b/.gitignore index 5dc82ccbe..fd51ad66e 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ cookies *.3gp *.ape +*.ass *.avi *.desktop *.flac @@ -106,6 +107,7 @@ yt-dlp.zip *.iml .vscode *.sublime-* +*.code-workspace # Lazy extractors */extractor/lazy_extractors.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bfa8ae410..dbd6a84b2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,6 +11,7 @@ - [Is anyone going to need the feature?](#is-anyone-going-to-need-the-feature) - [Is your question about yt-dlp?](#is-your-question-about-yt-dlp) - [Are you willing to share account details if needed?](#are-you-willing-to-share-account-details-if-needed) + - [Is the website primarily used for piracy](#is-the-website-primarily-used-for-piracy) - [DEVELOPER INSTRUCTIONS](#developer-instructions) - [Adding new feature or making overarching changes](#adding-new-feature-or-making-overarching-changes) - [Adding support for a new site](#adding-support-for-a-new-site) @@ -24,6 +25,7 @@ - [Collapse fallbacks](#collapse-fallbacks) - [Trailing parentheses](#trailing-parentheses) - [Use convenience conversion and parsing functions](#use-convenience-conversion-and-parsing-functions) + - [My pull request is labeled pending-fixes](#my-pull-request-is-labeled-pending-fixes) - [EMBEDDING YT-DLP](README.md#embedding-yt-dlp) @@ -123,6 +125,10 @@ While these steps won't necessarily ensure that no misuse of the account takes p - Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). - Change the password after receiving the account back. +### Is the website primarily used for piracy? + +We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in deep fake. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). + @@ -210,7 +216,7 @@ After you have ensured this site is distributing its content legally, you can fo } ``` 1. Add an import in [`yt_dlp/extractor/extractors.py`](yt_dlp/extractor/extractors.py). -1. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` +1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` 1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): @@ -658,6 +664,10 @@ duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` +# My pull request is labeled pending-fixes + +The `pending-fixes` label is added when there are changes requested to a PR. When the necessary changes are made, the label should be removed. However, despite our best efforts, it may sometimes happen that the maintainer did not see the changes or forgot to remove the label. If your PR is still marked as `pending-fixes` a few days after all requested changes have been made, feel free to ping the maintainer who labeled your issue and ask them to re-review and remove the label. + diff --git a/CONTRIBUTORS b/CONTRIBUTORS index fd93e7df3..972af8596 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -146,7 +146,7 @@ chio0hai cntrl-s Deer-Spangle DEvmIb -Grabien +Grabien/MaximVol j54vc1bk mpeter50 mrpapersonic @@ -160,7 +160,7 @@ PilzAdam zmousm iw0nderhow unit193 -TwoThousandHedgehogs +TwoThousandHedgehogs/KathrynElrod Jertzukka cypheron Hyeeji diff --git a/Makefile b/Makefile index b65ec9515..b6cb27bb0 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites com clean-test: rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ - *.3gp *.ape *.avi *.desktop *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \ + *.3gp *.ape *.ass *.avi *.desktop *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \ *.mp4 *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ diff --git a/README.md b/README.md index 81b5d417d..f24693c7b 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * **Other new options**: Many new options have been added such as `--concat-playlist`, `--print`, `--wait-for-video`, `--sleep-requests`, `--convert-thumbnails`, `--write-link`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc -* **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc +* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc * **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details @@ -130,7 +130,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order * The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both -* `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead +* `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files * `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this * Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this @@ -267,7 +267,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE) -* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) +* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen/ffmpeg cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) * [**brotli**](https://github.com/google/brotli) or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) @@ -279,6 +279,7 @@ To use or redistribute the dependencies, you must agree to their respective lice The Windows and MacOS standalone release binaries are already built with the python interpreter, mutagen, pycryptodomex and websockets included. + **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds @@ -606,11 +607,11 @@ You can also fork the project on github and run your fork's [build workflow](.gi --write-description etc. (default) --no-write-playlist-metafiles Do not write playlist metadata when using --write-info-json, --write-description etc. - --clean-infojson Remove some private fields such as + --clean-info-json Remove some private fields such as filenames from the infojson. Note that it could still contain some personal information (default) - --no-clean-infojson Write all fields to the infojson + --no-clean-info-json Write all fields to the infojson --write-comments Retrieve video comments to be placed in the infojson. The comments are fetched even without this option if the extraction is @@ -1599,25 +1600,28 @@ This option also has a few special uses: * You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?Phttps?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description * You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta_` prefix (Eg: `meta1_language`). Any value set to the `meta_` field will overwrite all default values. +**Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes. + For reference, these are the fields yt-dlp adds by default to the file metadata: -Metadata fields|From -:---|:--- -`title`|`track` or `title` -`date`|`upload_date` -`description`, `synopsis`|`description` -`purl`, `comment`|`webpage_url` -`track`|`track_number` -`artist`|`artist`, `creator`, `uploader` or `uploader_id` -`genre`|`genre` -`album`|`album` -`album_artist`|`album_artist` -`disc`|`disc_number` -`show`|`series` -`season_number`|`season_number` -`episode_id`|`episode` or `episode_id` -`episode_sort`|`episode_number` -`language` of each stream|From the format's `language` +Metadata fields | From +:--------------------------|:------------------------------------------------ +`title` | `track` or `title` +`date` | `upload_date` +`description`, `synopsis` | `description` +`purl`, `comment` | `webpage_url` +`track` | `track_number` +`artist` | `artist`, `creator`, `uploader` or `uploader_id` +`genre` | `genre` +`album` | `album` +`album_artist` | `album_artist` +`disc` | `disc_number` +`show` | `series` +`season_number` | `season_number` +`episode_id` | `episode` or `episode_id` +`episode_sort` | `episode_number` +`language` of each stream | the format's `language` + **Note**: The file format may not support some of these fields @@ -1816,12 +1820,11 @@ ydl_opts = { }], 'logger': MyLogger(), 'progress_hooks': [my_hook], + # Add custom headers + 'http_headers': {'Referer': 'https://www.google.com'} } -# Add custom headers -yt_dlp.utils.std_headers.update({'Referer': 'https://www.google.com'}) - # ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions. # Eg: "ydl.download", "ydl.download_with_info_file" with yt_dlp.YoutubeDL(ydl_opts) as ydl: diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 6612723f7..29c675f8a 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -75,7 +75,11 @@ def filter_options(readme): section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) options = '# OPTIONS\n' for line in section.split('\n')[1:]: - mobj = re.fullmatch(r'\s{4}(?P-(?:,\s|[^\s])+)(?:\s(?P([^\s]|\s(?!\s))+))?(\s{2,}(?P.+))?', line) + mobj = re.fullmatch(r'''(?x) + \s{4}(?P-(?:,\s|[^\s])+) + (?:\s(?P(?:[^\s]|\s(?!\s))+))? + (\s{2,}(?P.+))? + ''', line) if not mobj: options += f'{line.lstrip()}\n' continue diff --git a/setup.py b/setup.py index 3e599cd95..98781c5da 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ DESCRIPTION = 'A youtube-dl fork with additional features and patches' LONG_DESCRIPTION = '\n\n'.join(( 'Official repository: ', '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', - open('README.md', 'r', encoding='utf-8').read())) + open('README.md').read())) REQUIREMENTS = open('requirements.txt').read().splitlines() diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 51a89bd23..3ee3ed7d2 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -235,6 +235,8 @@ class YoutubeDL(object): See "Sorting Formats" for more details. format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. + prefer_free_formats: Whether to prefer video formats with free containers + over non-free ones of same quality. allow_multiple_video_streams: Allow multiple video streams to be merged into a single file allow_multiple_audio_streams: Allow multiple audio streams to be merged diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index ef4205edc..fbf1c3d5a 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -22,6 +22,9 @@ class YoutubeLiveChatFD(FragmentFD): def real_download(self, filename, info_dict): video_id = info_dict['video_id'] self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + if not self.params.get('skip_download'): + self.report_warning('Live chat download runs until the livestream ends. ' + 'If you wish to download the video simultaneously, run a separate yt-dlp instance') fragment_retries = self.params.get('fragment_retries', 0) test = self.params.get('test', False) diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 66b12c72f..360fa4699 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -8,10 +8,6 @@ import struct from base64 import urlsafe_b64encode from binascii import unhexlify -import typing -if typing.TYPE_CHECKING: - from ..YoutubeDL import YoutubeDL - from .common import InfoExtractor from ..aes import aes_ecb_decrypt from ..compat import ( @@ -36,15 +32,15 @@ from ..utils import ( # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862) -def add_opener(self: 'YoutubeDL', handler): +def add_opener(ydl, handler): ''' Add a handler for opening URLs, like _download_webpage ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 - assert isinstance(self._opener, compat_urllib_request.OpenerDirector) - self._opener.add_handler(handler) + assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector) + ydl._opener.add_handler(handler) -def remove_opener(self: 'YoutubeDL', handler): +def remove_opener(ydl, handler): ''' Remove handler(s) for opening URLs @param handler Either handler object itself or handler type. @@ -52,8 +48,8 @@ def remove_opener(self: 'YoutubeDL', handler): ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 - opener = self._opener - assert isinstance(self._opener, compat_urllib_request.OpenerDirector) + opener = ydl._opener + assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector) if isinstance(handler, (type, tuple)): find_cp = lambda x: isinstance(x, handler) else: diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py index 7d70e0427..1075b461e 100644 --- a/yt_dlp/extractor/ant1newsgr.py +++ b/yt_dlp/extractor/ant1newsgr.py @@ -97,8 +97,8 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE): embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage)) if not embed_urls: raise ExtractorError('no videos found for %s' % video_id, expected=True) - return self.url_result_or_playlist_from_matches( - embed_urls, video_id, info['title'], ie=Ant1NewsGrEmbedIE.ie_key(), + return self.playlist_from_matches( + embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5b7de1296..354814433 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -226,6 +226,7 @@ class InfoExtractor(object): The following fields are optional: + direct: True if a direct video file was given (must only be set by GenericIE) alt_title: A secondary title of the video. display_id An alternative identifier for the video, not necessarily unique, but available before title. Typically, id is @@ -274,7 +275,7 @@ class InfoExtractor(object): * "url": A URL pointing to the subtitles file It can optionally also have: * "name": Name or description of the subtitles - * http_headers: A dictionary of additional HTTP headers + * "http_headers": A dictionary of additional HTTP headers to add to the request. "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles'; contains automatically generated @@ -425,8 +426,8 @@ class InfoExtractor(object): title, description etc. - Subclasses of this one should re-define the _real_initialize() and - _real_extract() methods and define a _VALID_URL regexp. + Subclasses of this should define a _VALID_URL regexp and, re-define the + _real_extract() and (optionally) _real_initialize() methods. Probably, they should also be added to the list of extractors. Subclasses may also override suitable() if necessary, but ensure the function @@ -661,7 +662,7 @@ class InfoExtractor(object): return False def set_downloader(self, downloader): - """Sets the downloader for this IE.""" + """Sets a YoutubeDL instance as the downloader for this IE.""" self._downloader = downloader def _real_initialize(self): @@ -670,7 +671,7 @@ class InfoExtractor(object): def _real_extract(self, url): """Real extraction process. Redefine in subclasses.""" - pass + raise NotImplementedError('This method must be implemented by subclasses') @classmethod def ie_key(cls): @@ -1661,31 +1662,31 @@ class InfoExtractor(object): 'format_id': {'type': 'alias', 'field': 'id'}, 'preference': {'type': 'alias', 'field': 'ie_pref'}, 'language_preference': {'type': 'alias', 'field': 'lang'}, - - # Deprecated - 'dimension': {'type': 'alias', 'field': 'res'}, - 'resolution': {'type': 'alias', 'field': 'res'}, - 'extension': {'type': 'alias', 'field': 'ext'}, - 'bitrate': {'type': 'alias', 'field': 'br'}, - 'total_bitrate': {'type': 'alias', 'field': 'tbr'}, - 'video_bitrate': {'type': 'alias', 'field': 'vbr'}, - 'audio_bitrate': {'type': 'alias', 'field': 'abr'}, - 'framerate': {'type': 'alias', 'field': 'fps'}, - 'protocol': {'type': 'alias', 'field': 'proto'}, 'source_preference': {'type': 'alias', 'field': 'source'}, + 'protocol': {'type': 'alias', 'field': 'proto'}, 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}, - 'filesize_estimate': {'type': 'alias', 'field': 'size'}, - 'samplerate': {'type': 'alias', 'field': 'asr'}, - 'video_ext': {'type': 'alias', 'field': 'vext'}, - 'audio_ext': {'type': 'alias', 'field': 'aext'}, - 'video_codec': {'type': 'alias', 'field': 'vcodec'}, - 'audio_codec': {'type': 'alias', 'field': 'acodec'}, - 'video': {'type': 'alias', 'field': 'hasvid'}, - 'has_video': {'type': 'alias', 'field': 'hasvid'}, - 'audio': {'type': 'alias', 'field': 'hasaud'}, - 'has_audio': {'type': 'alias', 'field': 'hasaud'}, - 'extractor': {'type': 'alias', 'field': 'ie_pref'}, - 'extractor_preference': {'type': 'alias', 'field': 'ie_pref'}, + + # Deprecated + 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}, + 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}, + 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}, + 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}, + 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}, + 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}, + 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}, + 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}, + 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}, + 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}, + 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}, + 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}, + 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}, + 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}, + 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, + 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, + 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, + 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, + 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, + 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, } def __init__(self, ie, field_preference): @@ -1785,7 +1786,7 @@ class InfoExtractor(object): continue if self._get_field_setting(field, 'type') == 'alias': alias, field = field, self._get_field_setting(field, 'field') - if alias not in ('format_id', 'preference', 'language_preference'): + if self._get_field_setting(alias, 'deprecated'): self.ydl.deprecation_warning( f'Format sorting alias {alias} is deprecated ' f'and may be removed in a future version. Please use {field} instead') diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index 40b8cb0b4..0d29da29b 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -252,9 +252,9 @@ class FrontendMastersCourseIE(FrontendMastersPageBaseIE): entries = [] for lesson in lessons: lesson_name = lesson.get('slug') - if not lesson_name: - continue lesson_id = lesson.get('hash') or lesson.get('statsId') + if not lesson_id or not lesson_name: + continue entries.append(self._extract_lesson(chapters, lesson_id, lesson)) title = course.get('title') diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 74e20a54a..fdcf14469 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -621,7 +621,7 @@ class IqIE(InfoExtractor): preview_time = traverse_obj( initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False) if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none): - self.report_warning('This preview video is limited%s' % format_field(preview_time, template='to %s seconds')) + self.report_warning('This preview video is limited%s' % format_field(preview_time, template=' to %s seconds')) # TODO: Extract audio-only formats for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])): diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py index b93a02b7d..1a292b8ac 100644 --- a/yt_dlp/extractor/periscope.py +++ b/yt_dlp/extractor/periscope.py @@ -33,7 +33,7 @@ class PeriscopeBaseIE(InfoExtractor): return { 'id': broadcast.get('id') or video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'timestamp': parse_iso8601(broadcast.get('created_at')), 'uploader': uploader, 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 8146b3ef5..64b8a71b6 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -59,8 +59,16 @@ class SoundcloudEmbedIE(InfoExtractor): class SoundcloudBaseIE(InfoExtractor): + _NETRC_MACHINE = 'soundcloud' + _API_V2_BASE = 'https://api-v2.soundcloud.com/' _BASE_URL = 'https://soundcloud.com/' + _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' + _API_AUTH_QUERY_TEMPLATE = '?client_id=%s' + _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s' + _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s' + _access_token = None + _HEADERS = {} def _store_client_id(self, client_id): self._downloader.cache.store('soundcloud', 'client_id', client_id) @@ -103,14 +111,6 @@ class SoundcloudBaseIE(InfoExtractor): self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf' self._login() - _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' - _API_AUTH_QUERY_TEMPLATE = '?client_id=%s' - _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s' - _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s' - _access_token = None - _HEADERS = {} - _NETRC_MACHINE = 'soundcloud' - def _login(self): username, password = self._get_login_info() if username is None: diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index daf1c7450..4bc2263f0 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -67,6 +67,7 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'series': 'The Witcher', 'season': 'Misc', 'episode_number': 13, + 'episode': 'Episode 13', }, }, { @@ -92,6 +93,7 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'series': 'Arma 3', 'season': 'Zeus Games', 'episode_number': 3, + 'episode': 'Episode 3', }, }, ] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 041815a19..6451c08c0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3094,6 +3094,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Some formats may have much smaller duration than others (possibly damaged during encoding) # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000) + if is_damaged: + self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index f84ba5cff..22c62e22e 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -149,7 +149,7 @@ class ZingMp3IE(ZingMp3BaseIE): }, }, { 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html', - 'md5': 'e9c972b693aa88301ef981c8151c4343', + 'md5': 'c7f23d971ac1a4f675456ed13c9b9612', 'info_dict': { 'id': 'ZO8ZF7C7', 'title': 'Sương Hoa Đưa Lối', @@ -158,6 +158,8 @@ class ZingMp3IE(ZingMp3BaseIE): 'duration': 207, 'track': 'Sương Hoa Đưa Lối', 'artist': 'K-ICM, RYO', + 'album': 'Sương Hoa Đưa Lối (Single)', + 'album_artist': 'K-ICM, RYO', }, }, { 'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index f6e41f837..d2a9303c7 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1030,7 +1030,7 @@ def make_HTTPS_handler(params, **kwargs): def bug_reports_message(before=';'): msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , ' 'filling out the "Broken site" issue template properly. ' - 'Confirm you are on the latest version using -U') + 'Confirm you are on the latest version using yt-dlp -U') before = before.rstrip() if not before or before.endswith(('.', '!', '?')): @@ -5481,5 +5481,5 @@ has_websockets = bool(compat_websockets) def merge_headers(*dicts): - """Merge dicts of network headers case insensitively, prioritizing the latter ones""" + """Merge dicts of http headers case insensitively, prioritizing the latter ones""" return {k.capitalize(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} From a405b38f204fa1988e2d6930943525e42dd3d8d3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 8 Mar 2022 23:02:25 +0530 Subject: [PATCH 0778/2552] [youtube] Further de-prioritize 3gp format --- yt_dlp/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 6451c08c0..da49df8cd 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3115,7 +3115,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'language': join_nonempty(audio_track.get('id', '').split('.')[0], 'desc' if language_preference < -1 else ''), 'language_preference': language_preference, - 'preference': -10 if is_damaged else None, + # Strictly de-prioritize damaged and 3gp formats + 'preference': -10 if is_damaged else -2 if itag == '17' else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') From 9461cb586a07f06cd51dba1fe9d743dd7f5cff84 Mon Sep 17 00:00:00 2001 From: P-reducible Date: Tue, 8 Mar 2022 20:42:00 +0100 Subject: [PATCH 0779/2552] [Rokfin] Fix `availability` (#1534) Authored by: P-reducible --- yt_dlp/extractor/rokfin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 79a5b2336..0fd65db4b 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -100,7 +100,7 @@ class RokfinIE(InfoExtractor): video_url, video_id, fatal=False, live=live_status == 'is_live') if not formats: - if metadata.get('premiumPlan'): + if traverse_obj(metadata, 'premiumPlan', 'premium'): self.raise_login_required('This video is only available to premium users', True, method='cookies') elif scheduled: self.raise_no_formats( @@ -129,7 +129,7 @@ class RokfinIE(InfoExtractor): 'tags': traverse_obj(metadata, ('tags', ..., 'title'), expected_type=str_or_none), 'live_status': live_status, 'availability': self._availability( - needs_premium=bool(metadata.get('premiumPlan')), + needs_premium=bool(traverse_obj(metadata, 'premiumPlan', 'premium')), is_private=False, needs_subscription=False, needs_auth=False, is_unlisted=False), # 'comment_count': metadata.get('numComments'), # Data provided by website is wrong '__post_extractor': self.extract_comments(video_id) if video_type == 'post' else None, From 535eb16a44cd6c31c7b96f5e1dedc45f9e5e23c4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 9 Mar 2022 01:14:03 +0530 Subject: [PATCH 0780/2552] Release 2022.03.08 --- CONTRIBUTORS | 20 ++++++++ Changelog.md | 128 ++++++++++++++++++++++++++++++++++++++++++++++ supportedsites.md | 49 ++++++++++++++++-- 3 files changed, 192 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 972af8596..8d62c04fb 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -194,3 +194,23 @@ KiberInfinity tejing1 Bricio lazypete365 +Aniruddh-J +blackgear +CplPwnies +cyberfox1691 +FestplattenSchnitzel +hatienl0i261299 +iphoting +jakeogh +lukasfink1 +lyz-code +marieell +mdpauley +Mipsters +mxmehl +ofkz +P-reducible +pycabbage +regarten +Ronnnny +schn0sch diff --git a/Changelog.md b/Changelog.md index 6f564caa0..66fdbe503 100644 --- a/Changelog.md +++ b/Changelog.md @@ -11,6 +11,134 @@ --> +### 2022.03.08 + +* Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR) +* Add regex operator and quoting to format filters by [lukasfink1](https://github.com/lukasfink1) +* Add brotli content-encoding support by [coletdjnz](https://github.com/coletdjnz) +* Add pre-processor stage `after_filter` +* Better error message when no `--live-from-start` format +* Create necessary directories for `--print-to-file` +* Fill more fields for playlists by [Lesmiscore](https://github.com/Lesmiscore) +* Fix `-all` for `--sub-langs` +* Fix doubling of `video_id` in `ExtractorError` +* Fix for when stdout/stderr encoding is `None` +* Handle negative duration from extractor +* Implement `--add-header` without modifying `std_headers` +* Obey `--abort-on-error` for "ffmpeg not installed" +* Set `webpage_url_...` from `webpage_url` and not input URL +* Tolerate failure to `--write-link` due to unknown URL +* [aria2c] Add `--http-accept-gzip=true` +* [build] Update pyinstaller to 4.10 by [shirt-dev](https://github.com/shirt-dev) +* [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley) +* [devscripts] Improve `prepare_manpage` +* [downloader] Do not use aria2c for non-native `m3u8` +* [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb) +* [extractor] Allow `http_headers` to be specified for `thumbnails` +* [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz) +* [extractor] Fix for manifests without period duration by [dirkf,](https://github.com/dirkf,) [pukkandan](https://github.com/pukkandan) +* [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz) +* [FFmpegConcat] Abort on `--simulate` +* [FormatSort] Consider `acodec`=`ogg` as `vorbis` +* [fragment] Fix bugs around resuming with Range by [Lesmiscore](https://github.com/Lesmiscore) +* [fragment] Improve `--live-from-start` for YouTube livestreams by [Lesmiscore](https://github.com/Lesmiscore) +* [generic] Pass referer to extracted formats +* [generic] Set rss `guid` as video id by [Bricio](https://github.com/Bricio) +* [options] Better ambiguous option resolution +* [options] Rename `--clean-infojson` to `--clean-info-json` +* [SponsorBlock] Fixes for highlight and "full video labels" by [nihil-admirari](https://github.com/nihil-admirari) +* [Sponsorblock] minor fixes by [nihil-admirari](https://github.com/nihil-admirari) +* [utils] Better traceback for `ExtractorError` +* [utils] Fix file locking for AOSP by [jakeogh](https://github.com/jakeogh) +* [utils] Improve file locking +* [utils] OnDemandPagedList: Do not download pages after error +* [utils] render_table: Fix character calculation for removing extra gap by [Lesmiscore](https://github.com/Lesmiscore) +* [utils] Use `locked_file` for `sanitize_open` by [jakeogh](https://github.com/jakeogh) +* [utils] Validate `DateRange` input +* [utils] WebSockets wrapper for non-async functions by [Lesmiscore](https://github.com/Lesmiscore) +* [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos +* [cleanup] Remove extractors for some dead websites by [marieell](https://github.com/marieell) +* [cleanup, docs] Misc cleanup +* [AbemaTV] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [adobepass] Add Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) +* [ant1newsgr] Add extractor by [zmousm](https://github.com/zmousm) +* [bigo] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [Caltrans] Add extractor by [Bricio](https://github.com/Bricio) +* [daystar] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [fc2:live] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [fptplay] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [murrtube] Add extractor by [cyberfox1691](https://github.com/cyberfox1691) +* [nfb] Add extractor by [ofkz](https://github.com/ofkz) +* [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore) +* [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch) +* [piapro] Add extractor by [pycabbage,](https://github.com/pycabbage,) [Lesmiscore](https://github.com/Lesmiscore) +* [rokfin] Add extractor by [P-reducible,](https://github.com/P-reducible,) [pukkandan](https://github.com/pukkandan) +* [rokfin] Add stack and channel extractors by [P-reducible,](https://github.com/P-reducible,) [pukkandan](https://github.com/pukkandan) +* [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow) +* [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) +* [xinpianchang] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [abc] Support 1080p by [Ronnnny](https://github.com/Ronnnny) +* [afreecatv] Support password-protected livestreams by [wlritchi](https://github.com/wlritchi) +* [ard] Fix valid URL +* [ATVAt] Detect geo-restriction by [marieell](https://github.com/marieell) +* [bandcamp] Detect acodec +* [bandcamp] Fix user URLs by [lyz-code](https://github.com/lyz-code) +* [bbc] Fix extraction of news articles by [ajj8](https://github.com/ajj8) +* [beeg] Fix extractor by [Bricio](https://github.com/Bricio) +* [bigo] Fix extractor to not to use `form_params` +* [Bilibili] Pass referer for all formats by [blackgear](https://github.com/blackgear) +* [Biqle] Fix extractor by [Bricio](https://github.com/Bricio) +* [ccma] Fix timestamp parsing by [nyuszika7h](https://github.com/nyuszika7h) +* [crunchyroll] Better error reporting on login failure by [tejing1](https://github.com/tejing1) +* [cspan] Support of C-Span congress videos by [Grabien](https://github.com/Grabien) +* [dropbox] fix regex by [zenerdi0de](https://github.com/zenerdi0de) +* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [fujitv] Extract resolution for free sources by [YuenSzeHong](https://github.com/YuenSzeHong) +* [Gettr] Add `GettrStreamingIE` by [i6t](https://github.com/i6t) +* [Gettr] Fix formats order by [i6t](https://github.com/i6t) +* [Gettr] Improve extractor by [i6t](https://github.com/i6t) +* [globo] Expand valid URL by [Bricio](https://github.com/Bricio) +* [lbry] Fix `--ignore-no-formats-error` +* [manyvids] Extract `uploader` by [regarten](https://github.com/regarten) +* [mildom] Fix linter +* [mildom] Rework extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [mirrativ] Cleanup extractor code by [Lesmiscore](https://github.com/Lesmiscore) +* [nhk] Add support for NHK for School by [Lesmiscore](https://github.com/Lesmiscore) +* [niconico:tag] Add support for searching tags +* [nrk] Add fallback API +* [peekvids] Use JSON-LD by [schn0sch](https://github.com/schn0sch) +* [peertube] Add media.fsfe.org by [mxmehl](https://github.com/mxmehl) +* [rtvs] Fix extractor by [Bricio](https://github.com/Bricio) +* [spiegel] Fix `_VALID_URL` +* [ThumbnailsConvertor] Support `webp` +* [tiktok] Fix `vm.tiktok`/`vt.tiktok` URLs +* [tubitv] Fix/improve TV series extraction by [bbepis](https://github.com/bbepis) +* [tumblr] Fix extractor by [foghawk](https://github.com/foghawk) +* [twitcasting] Add fallback for finding running live by [Lesmiscore](https://github.com/Lesmiscore) +* [TwitCasting] Check for password protection by [Lesmiscore](https://github.com/Lesmiscore) +* [twitcasting] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [twitch] Fix field name of `view_count` +* [twitter] Fix for private videos by [iphoting](https://github.com/iphoting) +* [washingtonpost] Fix extractor by [Bricio](https://github.com/Bricio) +* [youtube:tab] Add `approximate_date` extractor-arg +* [youtube:tab] Follow redirect to regional channel by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Reject webpage data if redirected to home page +* [youtube] De-prioritize potentially damaged formats +* [youtube] Differentiate descriptive audio by language code +* [youtube] Ensure subtitle urls are absolute by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Escape possible `$` in `_extract_n_function_name` regex by [Lesmiscore](https://github.com/Lesmiscore) +* [youtube] Fix automatic captions +* [youtube] Fix n-sig extraction for phone player JS by [MinePlayersPE](https://github.com/MinePlayersPE) +* [youtube] Further de-prioritize 3gp format +* [youtube] Label original auto-subs +* [youtube] Prefer UTC upload date for videos by [coletdjnz](https://github.com/coletdjnz) +* [zaq1] Remove dead extractor by [marieell](https://github.com/marieell) +* [zee5] Support web-series by [Aniruddh-J](https://github.com/Aniruddh-J) +* [zingmp3] Fix extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [zoom] Add support for screen cast by [Mipsters](https://github.com/Mipsters) + + ### 2022.02.04 * [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz) diff --git a/supportedsites.md b/supportedsites.md index 7166dc53a..46ad1328d 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -24,6 +24,8 @@ - **abcnews:video** - **abcotvs**: ABC Owned Television Stations - **abcotvs:clips** + - **AbemaTV** + - **AbemaTVTitle** - **AcademicEarth:Course** - **acast** - **acast:channel** @@ -45,6 +47,8 @@ - **AlJazeera** - **Allocine** - **AlphaPorno** + - **Alsace20TV** + - **Alsace20TVEmbed** - **Alura** - **AluraCourse** - **Amara** @@ -58,6 +62,9 @@ - **AnimeLab** - **AnimeLabShows** - **AnimeOnDemand** + - **ant1newsgr:article**: ant1news.gr articles + - **ant1newsgr:embed**: ant1news.gr embedded videos + - **ant1newsgr:watch**: ant1news.gr videos - **Anvato** - **aol.com**: Yahoo screen and movies - **APA** @@ -75,6 +82,7 @@ - **Arkena** - **arte.sky.it** - **ArteTV** + - **ArteTVCategory** - **ArteTVEmbed** - **ArteTVPlaylist** - **AsianCrush** @@ -99,8 +107,8 @@ - **bandaichannel** - **Bandcamp** - **Bandcamp:album** + - **Bandcamp:user** - **Bandcamp:weekly** - - **BandcampMusic** - **bangumi.bilibili.com**: BiliBili番剧 - **BannedVideo** - **bbc**: BBC @@ -122,6 +130,7 @@ - **bfmtv:live** - **BibelTV** - **Bigflix** + - **Bigo** - **Bild**: Bild.de - **BiliBili** - **Bilibili category extractor** @@ -163,6 +172,7 @@ - **BYUtv** - **CableAV** - **Callin** + - **Caltrans** - **CAM4** - **Camdemy** - **CamdemyFolder** @@ -231,6 +241,8 @@ - **Coub** - **CozyTV** - **cp24** + - **cpac** + - **cpac:playlist** - **Cracked** - **Crackle** - **CrooksAndLiars** @@ -241,6 +253,7 @@ - **crunchyroll:playlist** - **crunchyroll:playlist:beta** - **CSpan**: C-SPAN + - **CSpanCongress** - **CtsNews**: 華視新聞 - **CTV** - **CTVNews** @@ -262,6 +275,7 @@ - **daum.net:clip** - **daum.net:playlist** - **daum.net:user** + - **daystar:clip** - **DBTV** - **DctpTv** - **DeezerAlbum** @@ -353,6 +367,7 @@ - **faz.net** - **fc2** - **fc2:embed** + - **fc2:live** - **Fczenit** - **Filmmodu** - **filmon** @@ -372,6 +387,7 @@ - **foxnews**: Fox News and Fox Business Video - **foxnews:article** - **FoxSports** + - **fptplay**: fptplay.vn - **FranceCulture** - **FranceInter** - **FranceTV** @@ -410,6 +426,7 @@ - **gem.cbc.ca:playlist** - **generic**: Generic downloader that works on some sites - **Gettr** + - **GettrStreaming** - **Gfycat** - **GiantBomb** - **Giga** @@ -622,8 +639,9 @@ - **MiaoPai** - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom + - **mildom:clip**: Clip in Mildom - **mildom:user:vod**: Download all VODs from specific user in Mildom - - **mildom:vod**: Download a VOD in Mildom + - **mildom:vod**: VOD in Mildom - **minds** - **minds:channel** - **minds:group** @@ -666,6 +684,8 @@ - **mtvservices:embedded** - **MTVUutisetArticle** - **MuenchenTV**: münchen.tv + - **Murrtube** + - **MurrtubeUser**: Murrtube user profile - **MuseScore** - **MusicdexAlbum** - **MusicdexArtist** @@ -734,9 +754,13 @@ - **NextTV**: 壹電視 - **Nexx** - **NexxEmbed** + - **NFB** - **NFHSNetwork** - **nfl.com** (Currently broken) - **nfl.com:article** (Currently broken) + - **NhkForSchoolBangumi** + - **NhkForSchoolProgramList** + - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学) - **NhkVod** - **NhkVodProgram** - **nhl.com** @@ -746,7 +770,10 @@ - **nickelodeonru** - **nicknight** - **niconico**: ニコニコ動画 - - **NiconicoPlaylist** + - **niconico:history**: NicoNico user history. Requires cookies. + - **niconico:playlist** + - **niconico:series** + - **niconico:tag**: NicoNico video tag URLs - **NiconicoUser** - **nicovideo:search**: Nico video search; "nicosearch:" prefix - **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix @@ -845,6 +872,7 @@ - **PatreonUser** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **PearVideo** + - **PeekVids** - **peer.tv** - **PeerTube** - **PeerTube:Playlist** @@ -857,6 +885,7 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** + - **Piapro** - **Picarto** - **PicartoVod** - **Piksel** @@ -876,6 +905,7 @@ - **PlaysTV** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** + - **PlayVids** - **Playwire** - **pluralsight** - **pluralsight:course** @@ -980,6 +1010,9 @@ - **RICE** - **RMCDecouverte** - **RockstarGames** + - **Rokfin** + - **rokfin:channel** + - **rokfin:stack** - **RoosterTeeth** - **RoosterTeethSeries** - **RottenTomatoes** @@ -1019,6 +1052,7 @@ - **RUTV**: RUTV.RU - **Ruutu** - **Ruv** + - **ruv.is:spila** - **safari**: safaribooksonline.com online video - **safari:api** - **safari:course**: safaribooksonline.com online courses @@ -1158,6 +1192,7 @@ - **TeleBruxelles** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** + - **telegram:embed** - **TeleMB** - **Telemundo** - **TeleQuebec** @@ -1319,6 +1354,8 @@ - **video.google:search**: Google Video search; "gvsearch:" prefix - **video.sky.it** - **video.sky.it:live** + - **VideocampusSachsen** + - **VideocampusSachsenEmbed** - **VideoDetective** - **videofy.me** - **videomore** @@ -1361,6 +1398,7 @@ - **vlive** - **vlive:channel** - **vlive:post** + - **vm.tiktok** - **Vodlocker** - **VODPl** - **VODPlatform** @@ -1395,7 +1433,7 @@ - **WatchBox** - **WatchIndianPorn**: Watch Indian Porn - **WDR** - - **wdr:mobile** + - **wdr:mobile** (Currently broken) - **WDRElefant** - **WDRPage** - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix @@ -1430,6 +1468,7 @@ - **xiami:song**: 虾米音乐 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 + - **xinpianchang**: xinpianchang.com - **XMinus** - **XNXX** - **Xstream** @@ -1488,7 +1527,7 @@ - **ZenYandex** - **ZenYandexChannel** - **Zhihu** - - **zingmp3**: mp3.zing.vn + - **zingmp3**: zingmp3.vn - **zingmp3:album** - **zoom** - **Zype** From 1eae7f94c1609a6bdd37c21a7b60d4000d6e3852 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 8 Mar 2022 19:57:15 +0000 Subject: [PATCH 0781/2552] [version] update Created by: pukkandan :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- Changelog.md | 8 ++++---- yt_dlp/version.py | 4 ++-- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 397f92824..a267b3bf8 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.02.04 (exe) + [debug] yt-dlp version 2022.03.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.02.04) + yt-dlp is up to date (2022.03.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 9b02f6f72..81c15f6a6 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.02.04 (exe) + [debug] yt-dlp version 2022.03.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.02.04) + yt-dlp is up to date (2022.03.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 3ad71edc7..fe2c2331b 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -60,12 +60,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.02.04 (exe) + [debug] yt-dlp version 2022.03.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.02.04) + yt-dlp is up to date (2022.03.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 791a7ee14..b8f7a9680 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.02.04 (exe) + [debug] yt-dlp version 2022.03.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.02.04) + yt-dlp is up to date (2022.03.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index a3a824f52..73eb25785 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -13,7 +13,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.02.04**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/Changelog.md b/Changelog.md index 66fdbe503..b9baa8aaa 100644 --- a/Changelog.md +++ b/Changelog.md @@ -36,7 +36,7 @@ * [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb) * [extractor] Allow `http_headers` to be specified for `thumbnails` * [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz) -* [extractor] Fix for manifests without period duration by [dirkf,](https://github.com/dirkf,) [pukkandan](https://github.com/pukkandan) +* [extractor] Fix for manifests without period duration by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) * [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz) * [FFmpegConcat] Abort on `--simulate` * [FormatSort] Consider `acodec`=`ogg` as `vorbis` @@ -71,9 +71,9 @@ * [nfb] Add extractor by [ofkz](https://github.com/ofkz) * [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore) * [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch) -* [piapro] Add extractor by [pycabbage,](https://github.com/pycabbage,) [Lesmiscore](https://github.com/Lesmiscore) -* [rokfin] Add extractor by [P-reducible,](https://github.com/P-reducible,) [pukkandan](https://github.com/pukkandan) -* [rokfin] Add stack and channel extractors by [P-reducible,](https://github.com/P-reducible,) [pukkandan](https://github.com/pukkandan) +* [piapro] Add extractor by [pycabbage](https://github.com/pycabbage), [Lesmiscore](https://github.com/Lesmiscore) +* [rokfin] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) +* [rokfin] Add stack and channel extractors by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) * [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow) * [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 01e1b2345..b9ac6308a 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,5 +1,5 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2022.02.04' +__version__ = '2022.03.08' -RELEASE_GIT_HEAD = 'c1653e9ef' +RELEASE_GIT_HEAD = '535eb16a4' From d1b5f70bc9f9dcda1544b88b42ecc25f7f7aa1c7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 9 Mar 2022 01:33:31 +0530 Subject: [PATCH 0782/2552] [cleanup] Refactor `__init__.py` (#2570) * Split `__init__` code into multiple functions * Clean up validation code by grouping similar types of options * Expose `parse_options` to third parties --- yt_dlp/__init__.py | 802 +++++++++++++++++---------------- yt_dlp/options.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 2 +- yt_dlp/utils.py | 3 + 4 files changed, 414 insertions(+), 395 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 926b5cad3..8221ec544 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -13,9 +13,7 @@ import random import re import sys -from .options import ( - parseOpts, -) +from .options import parseOpts from .compat import ( compat_getpass, compat_os_name, @@ -28,12 +26,12 @@ from .utils import ( decodeOption, DownloadCancelled, DownloadError, - error_to_compat_str, expand_path, - GeoUtils, float_or_none, + GeoUtils, int_or_none, match_filter_func, + NO_DEFAULT, parse_duration, preferredencoding, read_batch_urls, @@ -45,9 +43,7 @@ from .utils import ( write_string, ) from .update import run_update -from .downloader import ( - FileDownloader, -) +from .downloader import FileDownloader from .extractor import gen_extractors, list_extractors from .extractor.common import InfoExtractor from .extractor.adobepass import MSO_INFO @@ -63,59 +59,38 @@ from .postprocessor import ( from .YoutubeDL import YoutubeDL -def _real_main(argv=None): - # Compatibility fixes for Windows - if sys.platform == 'win32': - # https://github.com/ytdl-org/youtube-dl/issues/820 - codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) - - workaround_optparse_bug9161() - - setproctitle('yt-dlp') - - parser, opts, args = parseOpts(argv) - warnings, deprecation_warnings = [], [] - - if opts.user_agent is not None: - opts.headers.setdefault('User-Agent', opts.user_agent) - if opts.referer is not None: - opts.headers.setdefault('Referer', opts.referer) - - # Dump user agent - if opts.dump_user_agent: - ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) - write_string(f'{ua}\n', out=sys.stdout) - sys.exit(0) - +def get_urls(urls, batchfile, verbose): # Batch file verification batch_urls = [] - if opts.batchfile is not None: + if batchfile is not None: try: - if opts.batchfile == '-': + if batchfile == '-': write_string('Reading URLs from stdin - EOF (%s) to end:\n' % ( 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D')) batchfd = sys.stdin else: batchfd = io.open( - expand_path(opts.batchfile), + expand_path(batchfile), 'r', encoding='utf-8', errors='ignore') batch_urls = read_batch_urls(batchfd) - if opts.verbose: + if verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: - sys.exit('ERROR: batch file %s could not be read' % opts.batchfile) - all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls + sys.exit('ERROR: batch file %s could not be read' % batchfile) _enc = preferredencoding() - all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] + return [ + url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip() + for url in batch_urls + urls] + +def print_extractor_information(opts, urls): if opts.list_extractors: for ie in list_extractors(opts.age_limit): write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout) - matchedUrls = [url for url in all_urls if ie.suitable(url)] + matchedUrls = [url for url in urls if ie.suitable(url)] for mu in matchedUrls: write_string(' ' + mu + '\n', out=sys.stdout) - sys.exit(0) - if opts.list_extractor_descriptions: + elif opts.list_extractor_descriptions: for ie in list_extractors(opts.age_limit): if not ie.working(): continue @@ -127,184 +102,25 @@ def _real_main(argv=None): _COUNTS = ('', '5', '10', 'all') desc += f'; "{ie.SEARCH_KEY}:" prefix (Example: "{ie.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(_SEARCHES)}")' write_string(desc + '\n', out=sys.stdout) - sys.exit(0) - if opts.ap_list_mso: + elif opts.ap_list_mso: table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) - sys.exit(0) - - # Conflicting, missing and erroneous options - if opts.format == 'best': - warnings.append('.\n '.join(( - '"-f best" selects the best pre-merged format which is often not the best option', - 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', - 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) - if opts.exec_cmd.get('before_dl') and opts.exec_before_dl_cmd: - parser.error('using "--exec-before-download" conflicts with "--exec before_dl:"') - if opts.usenetrc and (opts.username is not None or opts.password is not None): - parser.error('using .netrc conflicts with giving username/password') - if opts.password is not None and opts.username is None: - parser.error('account username missing\n') - if opts.ap_password is not None and opts.ap_username is None: - parser.error('TV Provider account username missing\n') - if opts.autonumber_size is not None: - if opts.autonumber_size <= 0: - parser.error('auto number size must be positive') - if opts.autonumber_start is not None: - if opts.autonumber_start < 0: - parser.error('auto number start must be positive or 0') - if opts.username is not None and opts.password is None: - opts.password = compat_getpass('Type account password and press [Return]: ') - if opts.ap_username is not None and opts.ap_password is None: - opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') - if opts.ratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) - if numeric_limit is None: - parser.error('invalid rate limit specified') - opts.ratelimit = numeric_limit - if opts.throttledratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.throttledratelimit) - if numeric_limit is None: - parser.error('invalid rate limit specified') - opts.throttledratelimit = numeric_limit - if opts.min_filesize is not None: - numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) - if numeric_limit is None: - parser.error('invalid min_filesize specified') - opts.min_filesize = numeric_limit - if opts.max_filesize is not None: - numeric_limit = FileDownloader.parse_bytes(opts.max_filesize) - if numeric_limit is None: - parser.error('invalid max_filesize specified') - opts.max_filesize = numeric_limit - if opts.sleep_interval is not None: - if opts.sleep_interval < 0: - parser.error('sleep interval must be positive or 0') - if opts.max_sleep_interval is not None: - if opts.max_sleep_interval < 0: - parser.error('max sleep interval must be positive or 0') - if opts.sleep_interval is None: - parser.error('min sleep interval must be specified, use --min-sleep-interval') - if opts.max_sleep_interval < opts.sleep_interval: - parser.error('max sleep interval must be greater than or equal to min sleep interval') - else: - opts.max_sleep_interval = opts.sleep_interval - if opts.sleep_interval_subtitles is not None: - if opts.sleep_interval_subtitles < 0: - parser.error('subtitles sleep interval must be positive or 0') - if opts.sleep_interval_requests is not None: - if opts.sleep_interval_requests < 0: - parser.error('requests sleep interval must be positive or 0') - if opts.ap_mso and opts.ap_mso not in MSO_INFO: - parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') - if opts.overwrites: # --yes-overwrites implies --no-continue - opts.continue_dl = False - if opts.concurrent_fragment_downloads <= 0: - parser.error('Concurrent fragments must be positive') - if opts.wait_for_video is not None: - min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) - if min_wait is None or (max_wait is None and '-' in opts.wait_for_video): - parser.error('Invalid time range to wait') - elif max_wait is not None and max_wait < min_wait: - parser.error('Minimum time range to wait must not be longer than the maximum') - opts.wait_for_video = (min_wait, max_wait) - - def parse_retries(retries, name=''): - if retries in ('inf', 'infinite'): - parsed_retries = float('inf') - else: - try: - parsed_retries = int(retries) - except (TypeError, ValueError): - parser.error('invalid %sretry count specified' % name) - return parsed_retries - if opts.retries is not None: - opts.retries = parse_retries(opts.retries) - if opts.file_access_retries is not None: - opts.file_access_retries = parse_retries(opts.file_access_retries, 'file access ') - if opts.fragment_retries is not None: - opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ') - if opts.extractor_retries is not None: - opts.extractor_retries = parse_retries(opts.extractor_retries, 'extractor ') - if opts.buffersize is not None: - numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) - if numeric_buffersize is None: - parser.error('invalid buffer size specified') - opts.buffersize = numeric_buffersize - if opts.http_chunk_size is not None: - numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size) - if not numeric_chunksize: - parser.error('invalid http chunk size specified') - opts.http_chunk_size = numeric_chunksize - if opts.playliststart <= 0: - raise parser.error('Playlist start must be positive') - if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: - raise parser.error('Playlist end must be greater than playlist start') - if opts.extractaudio: - opts.audioformat = opts.audioformat.lower() - if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): - parser.error('invalid audio format specified') - if opts.audioquality: - opts.audioquality = opts.audioquality.strip('k').strip('K') - audioquality = int_or_none(float_or_none(opts.audioquality)) # int_or_none prevents inf, nan - if audioquality is None or audioquality < 0: - parser.error('invalid audio quality specified') - if opts.recodevideo is not None: - opts.recodevideo = opts.recodevideo.replace(' ', '') - if not re.match(FFmpegVideoConvertorPP.FORMAT_RE, opts.recodevideo): - parser.error('invalid video remux format specified') - if opts.remuxvideo is not None: - opts.remuxvideo = opts.remuxvideo.replace(' ', '') - if not re.match(FFmpegVideoRemuxerPP.FORMAT_RE, opts.remuxvideo): - parser.error('invalid video remux format specified') - if opts.convertsubtitles is not None: - if opts.convertsubtitles not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: - parser.error('invalid subtitle format specified') - if opts.convertthumbnails is not None: - if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS: - parser.error('invalid thumbnail format specified') - if opts.cookiesfrombrowser is not None: - mobj = re.match(r'(?P[^+:]+)(\s*\+\s*(?P[^:]+))?(\s*:(?P.+))?', opts.cookiesfrombrowser) - if mobj is None: - parser.error(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') - browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile') - browser_name = browser_name.lower() - if browser_name not in SUPPORTED_BROWSERS: - parser.error(f'unsupported browser specified for cookies: "{browser_name}". ' - f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') - if keyring is not None: - keyring = keyring.upper() - if keyring not in SUPPORTED_KEYRINGS: - parser.error(f'unsupported keyring specified for cookies: "{keyring}". ' - f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') - opts.cookiesfrombrowser = (browser_name, profile, keyring) - geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country - if geo_bypass_code is not None: - try: - GeoUtils.random_ipv4(geo_bypass_code) - except Exception: - parser.error('unsupported geo-bypass country or ip-block') - - if opts.date is not None: - date = DateRange.day(opts.date) else: - date = DateRange(opts.dateafter, opts.datebefore) - - compat_opts = opts.compat_opts + return False + return True - def report_conflict(arg1, arg2): - warnings.append(f'{arg2} is ignored since {arg1} was given') +def set_compat_opts(opts): def _unused_compat_opt(name): - if name not in compat_opts: + if name not in opts.compat_opts: return False - compat_opts.discard(name) - compat_opts.update(['*%s' % name]) + opts.compat_opts.discard(name) + opts.compat_opts.update(['*%s' % name]) return True def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): attr = getattr(opts, opt_name) - if compat_name in compat_opts: + if compat_name in opts.compat_opts: if attr is None: setattr(opts, opt_name, not default) return True @@ -319,36 +135,137 @@ def _real_main(argv=None): set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') set_default_compat('no-playlist-metafiles', 'allow_playlist_files') set_default_compat('no-clean-infojson', 'clean_infojson') - if 'no-attach-info-json' in compat_opts: + if 'no-attach-info-json' in opts.compat_opts: if opts.embed_infojson: _unused_compat_opt('no-attach-info-json') else: opts.embed_infojson = False - if 'format-sort' in compat_opts: + if 'format-sort' in opts.compat_opts: opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: _unused_compat_opt('multistreams') - outtmpl_default = opts.outtmpl.get('default') - if outtmpl_default == '': - outtmpl_default, opts.skip_download = None, True - del opts.outtmpl['default'] - if opts.useid: - if outtmpl_default is None: - outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s' - else: - report_conflict('--output', '--id') - if 'filename' in compat_opts: - if outtmpl_default is None: - outtmpl_default = opts.outtmpl['default'] = '%(title)s-%(id)s.%(ext)s' + if 'filename' in opts.compat_opts: + if opts.outtmpl.get('default') is None: + opts.outtmpl.update({'default': '%(title)s-%(id)s.%(ext)s'}) else: _unused_compat_opt('filename') + +def validate_options(opts): + def validate(cndn, name, value=None, msg=None): + if cndn: + return True + raise ValueError((msg or 'invalid {name} "{value}" given').format(name=name, value=value)) + + def validate_in(name, value, items, msg=None): + return validate(value is None or value in items, name, value, msg) + + def validate_regex(name, value, regex): + return validate(value is None or re.match(regex, value), name, value) + + def validate_positive(name, value, strict=False): + return validate(value is None or value > 0 or (not strict and value == 0), + name, value, '{name} "{value}" must be positive' + ('' if strict else ' or 0')) + + def validate_minmax(min_val, max_val, min_name, max_name=None): + if max_val is None or min_val is None or max_val >= min_val: + return + if not max_name: + min_name, max_name = f'min {min_name}', f'max {min_name}' + raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"') + + # Usernames and passwords + validate(not opts.usenetrc or (opts.username is None and opts.password is None), + '.netrc', msg='using {name} conflicts with giving username/password') + validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing') + validate(opts.ap_password is None or opts.ap_username is not None, + 'TV Provider account username', msg='{name} missing') + validate_in('TV Provider', opts.ap_mso, MSO_INFO, + 'Unsupported {name} "{value}", use --ap-list-mso to get a list of supported TV Providers') + + # Numbers + validate_positive('autonumber start', opts.autonumber_start) + validate_positive('autonumber size', opts.autonumber_size, True) + validate_positive('concurrent fragments', opts.concurrent_fragment_downloads, True) + validate_positive('playlist start', opts.playliststart, True) + if opts.playlistend != -1: + validate_minmax(opts.playliststart, opts.playlistend, 'playlist start', 'playlist end') + + # Time ranges + validate_positive('subtitles sleep interval', opts.sleep_interval_subtitles) + validate_positive('requests sleep interval', opts.sleep_interval_requests) + validate_positive('sleep interval', opts.sleep_interval) + validate_positive('max sleep interval', opts.max_sleep_interval) + if opts.max_sleep_interval is not None: + validate( + opts.sleep_interval is not None, 'min sleep interval', + msg='{name} must be specified; use --min-sleep-interval') + validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') + + if opts.wait_for_video is not None: + min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), + 'time range to wait for video', opts.wait_for_video) + validate_minmax(min_wait, max_wait, 'time range to wait for video') + opts.wait_for_video = (min_wait, max_wait) + + # Format sort + for f in opts.format_sort: + validate_regex('format sorting', f, InfoExtractor.FormatSort.regex) + + # Postprocessor formats + validate_in('audio format', opts.audioformat, ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS)) + validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS) + validate_in('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS) + if opts.recodevideo is not None: + opts.recodevideo = opts.recodevideo.replace(' ', '') + validate_regex('video recode format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE) + if opts.remuxvideo is not None: + opts.remuxvideo = opts.remuxvideo.replace(' ', '') + validate_regex('video remux format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE) + if opts.audioquality: + opts.audioquality = opts.audioquality.strip('k').strip('K') + # int_or_none prevents inf, nan + validate_positive('audio quality', int_or_none(float_or_none(opts.audioquality), default=0)) + + # Retries + def parse_retries(name, value): + if value is None: + return None + elif value in ('inf', 'infinite'): + return float('inf') + try: + return int(value) + except (TypeError, ValueError): + validate(False, f'{name} retry count', value) + + opts.retries = parse_retries('download', opts.retries) + opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) + opts.extractor_retries = parse_retries('extractor', opts.extractor_retries) + opts.file_access_retries = parse_retries('file access', opts.file_access_retries) + + # Bytes + def parse_bytes(name, value): + if value is None: + return None + numeric_limit = FileDownloader.parse_bytes(value) + validate(numeric_limit is not None, 'rate limit', value) + return numeric_limit + + opts.ratelimit = parse_bytes('rate limit', opts.ratelimit) + opts.ratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit) + opts.min_filesize = parse_bytes('min filesize', opts.min_filesize) + opts.max_filesize = parse_bytes('max filesize', opts.max_filesize) + opts.buffersize = parse_bytes('buffer size', opts.buffersize) + opts.http_chunk_size = parse_bytes('http chunk size', opts.http_chunk_size) + + # Output templates def validate_outtmpl(tmpl, msg): err = YoutubeDL.validate_outtmpl(tmpl) if err: - parser.error('invalid %s %r: %s' % (msg, tmpl, error_to_compat_str(err))) + raise ValueError(f'invalid {msg} "{tmpl}": {err}') for k, tmpl in opts.outtmpl.items(): validate_outtmpl(tmpl, f'{k} output template') @@ -357,32 +274,62 @@ def _real_main(argv=None): validate_outtmpl(tmpl, f'{type_} print template') for type_, tmpl_list in opts.print_to_file.items(): for tmpl, file in tmpl_list: - validate_outtmpl(tmpl, f'{type_} print-to-file template') - validate_outtmpl(file, f'{type_} print-to-file filename') + validate_outtmpl(tmpl, f'{type_} print to file template') + validate_outtmpl(file, f'{type_} print to file filename') validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') for k, tmpl in opts.progress_template.items(): k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' validate_outtmpl(tmpl, f'{k} template') - if opts.extractaudio and not opts.keepvideo and opts.format is None: - opts.format = 'bestaudio/best' - - if outtmpl_default is not None and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: - parser.error('Cannot download a video and extract audio into the same' - ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' - ' template'.format(outtmpl_default)) + outtmpl_default = opts.outtmpl.get('default') + if outtmpl_default == '': + opts.skip_download = None + del opts.outtmpl['default'] + if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: + raise ValueError( + 'Cannot download a video and extract audio into the same file! ' + f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template') + + # Remove chapters + remove_chapters_patterns, opts.remove_ranges = [], [] + for regex in opts.remove_chapters or []: + if regex.startswith('*'): + dur = list(map(parse_duration, regex[1:].split('-'))) + if len(dur) == 2 and all(t is not None for t in dur): + opts.remove_ranges.append(tuple(dur)) + continue + raise ValueError(f'invalid --remove-chapters time range "{regex}". Must be of the form *start-end') + try: + remove_chapters_patterns.append(re.compile(regex)) + except re.error as err: + raise ValueError(f'invalid --remove-chapters regex "{regex}" - {err}') + opts.remove_chapters = remove_chapters_patterns - for f in opts.format_sort: - if re.match(InfoExtractor.FormatSort.regex, f) is None: - parser.error('invalid format sort string "%s" specified' % f) + # Cookies from browser + if opts.cookiesfrombrowser: + mobj = re.match(r'(?P[^+:]+)(\s*\+\s*(?P[^:]+))?(\s*:(?P.+))?', opts.cookiesfrombrowser) + if mobj is None: + raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') + browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile') + browser_name = browser_name.lower() + if browser_name not in SUPPORTED_BROWSERS: + raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". ' + f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') + if keyring is not None: + keyring = keyring.upper() + if keyring not in SUPPORTED_KEYRINGS: + raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". ' + f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') + opts.cookiesfrombrowser = (browser_name, profile, keyring) + # MetadataParser def metadataparser_actions(f): if isinstance(f, str): cmd = '--parse-metadata %s' % compat_shlex_quote(f) try: actions = [MetadataFromFieldPP.to_action(f)] except Exception as err: - parser.error(f'{cmd} is invalid; {err}') + raise ValueError(f'{cmd} is invalid; {err}') else: cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) @@ -391,162 +338,217 @@ def _real_main(argv=None): try: MetadataParserPP.validate_action(*action) except Exception as err: - parser.error(f'{cmd} is invalid; {err}') + raise ValueError(f'{cmd} is invalid; {err}') yield action - if opts.parse_metadata is None: - opts.parse_metadata = [] + parse_metadata = opts.parse_metadata or [] if opts.metafromtitle is not None: - opts.parse_metadata.append('title:%s' % opts.metafromtitle) - opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata))) + parse_metadata.append('title:%s' % opts.metafromtitle) + opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) - any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json - or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail - or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration) + # Other options + geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country + if geo_bypass_code is not None: + try: + GeoUtils.random_ipv4(geo_bypass_code) + except Exception: + raise ValueError('unsupported geo-bypass country or ip-block') - any_printing = opts.print_json - download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive + opts.match_filter = match_filter_func(opts.match_filter) + opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore) - # If JSON is not printed anywhere, but comments are requested, save it to file - printing_json = opts.dumpjson or opts.print_json or opts.dump_single_json - if opts.getcomments and not printing_json: - opts.writeinfojson = True + if opts.download_archive is not None: + opts.download_archive = expand_path(opts.download_archive) + + if opts.user_agent is not None: + opts.headers.setdefault('User-Agent', opts.user_agent) + if opts.referer is not None: + opts.headers.setdefault('Referer', opts.referer) if opts.no_sponsorblock: - opts.sponsorblock_mark = set() - opts.sponsorblock_remove = set() - sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + opts.sponsorblock_mark = opts.sponsorblock_remove = set() + + warnings, deprecation_warnings = [], [] - opts.remove_chapters = opts.remove_chapters or [] - - if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: - if opts.sponskrub: - if opts.remove_chapters: - report_conflict('--remove-chapters', '--sponskrub') - if opts.sponsorblock_mark: - report_conflict('--sponsorblock-mark', '--sponskrub') - if opts.sponsorblock_remove: - report_conflict('--sponsorblock-remove', '--sponskrub') - opts.sponskrub = False - if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False: - report_conflict('--split-chapter', '--sponskrub-cut') - opts.sponskrub_cut = False - - if opts.remuxvideo and opts.recodevideo: - report_conflict('--recode-video', '--remux-video') - opts.remuxvideo = False - - if opts.allow_unplayable_formats: - def report_unplayable_conflict(opt_name, arg, default=False, allowed=None): - val = getattr(opts, opt_name) - if (not allowed and val) or (allowed and not allowed(val)): - report_conflict('--allow-unplayable-formats', arg) - setattr(opts, opt_name, default) - - report_unplayable_conflict('extractaudio', '--extract-audio') - report_unplayable_conflict('remuxvideo', '--remux-video') - report_unplayable_conflict('recodevideo', '--recode-video') - report_unplayable_conflict('addmetadata', '--embed-metadata') - report_unplayable_conflict('addchapters', '--embed-chapters') - report_unplayable_conflict('embed_infojson', '--embed-info-json') - opts.embed_infojson = False - report_unplayable_conflict('embedsubtitles', '--embed-subs') - report_unplayable_conflict('embedthumbnail', '--embed-thumbnail') - report_unplayable_conflict('xattrs', '--xattrs') - report_unplayable_conflict('fixup', '--fixup', default='never', allowed=lambda x: x in (None, 'never', 'ignore')) - opts.fixup = 'never' - report_unplayable_conflict('remove_chapters', '--remove-chapters', default=[]) - report_unplayable_conflict('sponsorblock_remove', '--sponsorblock-remove', default=set()) - report_unplayable_conflict('sponskrub', '--sponskrub', default=set()) - opts.sponskrub = False + # Common mistake: -f best + if opts.format == 'best': + warnings.append('.\n '.join(( + '"-f best" selects the best pre-merged format which is often not the best option', + 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', + 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) + + # --(post-processor/downloader)-args without name + def report_args_compat(name, value, key1, key2=None): + if key1 in value and key2 not in value: + warnings.append(f'{name} arguments given without specifying name. The arguments will be given to all {name}s') + return True + return False + + report_args_compat('external downloader', opts.external_downloader_args, 'default') + if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'): + opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat') + opts.postprocessor_args.setdefault('sponskrub', []) + + def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats', + val1=NO_DEFAULT, val2=NO_DEFAULT, default=False): + if val2 is NO_DEFAULT: + val2 = getattr(opts, opt2) + if not val2: + return + + if val1 is NO_DEFAULT: + val1 = getattr(opts, opt1) + if val1: + warnings.append(f'{arg1} is ignored since {arg2} was given') + setattr(opts, opt1, default) + + # Conflicting options + report_conflict('--date-after', 'dateafter', '--date', 'date', default=None) + report_conflict('--date-before', 'datebefore', '--date', 'date', default=None) + report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl')) + report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) + report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') + report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') + report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark') + report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove') + report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', val1=opts.sponskrub and opts.sponskrub_cut) + + # Conflicts with --allow-unplayable-formats + report_conflict('--add-metadata', 'addmetadata') + report_conflict('--embed-chapters', 'addchapters') + report_conflict('--embed-info-json', 'embed_infojson') + report_conflict('--embed-subs', 'embedsubtitles') + report_conflict('--embed-thumbnail', 'embedthumbnail') + report_conflict('--extract-audio', 'extractaudio') + report_conflict('--fixup', 'fixup', val1=(opts.fixup or '').lower() in ('', 'never', 'ignore'), default='never') + report_conflict('--recode-video', 'recodevideo') + report_conflict('--remove-chapters', 'remove_chapters', default=[]) + report_conflict('--remux-video', 'remuxvideo') + report_conflict('--sponskrub', 'sponskrub') + report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set()) + report_conflict('--xattrs', 'xattrs') + + # Fully deprecated options + def report_deprecation(val, old, new=None): + if not val: + return + deprecation_warnings.append( + f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new + else f'{old} is deprecated and may not work as expected') + + report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') + report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') + # report_deprecation(opts.include_ads, '--include-ads') # We may re-implement this in future + # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future + # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it + + # Dependent options + if opts.exec_before_dl_cmd: + opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd + + if opts.useid: # --id is not deprecated in youtube-dl + opts.outtmpl['default'] = '%(id)s.%(ext)s' + + if opts.overwrites: # --force-overwrites implies --no-continue + opts.continue_dl = False if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + # Add chapters when adding metadata or marking sponsors opts.addchapters = True - # PostProcessors - postprocessors = list(opts.add_postprocessors) + if opts.extractaudio and not opts.keepvideo and opts.format is None: + # Do not unnecessarily download audio + opts.format = 'bestaudio/best' + + if opts.getcomments and opts.writeinfojson is None: + # If JSON is not printed anywhere, but comments are requested, save it to file + if not opts.dumpjson or opts.print_json or opts.dump_single_json: + opts.writeinfojson = True + + if opts.allsubtitles and not (opts.embedsubtitles or opts.writeautomaticsub): + # --all-sub automatically sets --write-sub if --write-auto-sub is not given + opts.writesubtitles = True + + if opts.addmetadata and opts.embed_infojson is None: + # If embedding metadata and infojson is present, embed it + opts.embed_infojson = 'if_exists' + + # Ask for passwords + if opts.username is not None and opts.password is None: + opts.password = compat_getpass('Type account password and press [Return]: ') + if opts.ap_username is not None and opts.ap_password is None: + opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') + + return warnings, deprecation_warnings + + +def get_postprocessors(opts): + yield from opts.add_postprocessors + + if opts.parse_metadata: + yield { + 'key': 'MetadataParser', + 'actions': opts.parse_metadata, + 'when': 'pre_process' + } + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: - postprocessors.append({ + yield { 'key': 'SponsorBlock', 'categories': sponsorblock_query, 'api': opts.sponsorblock_api, - # Run this after filtering videos 'when': 'after_filter' - }) - if opts.parse_metadata: - postprocessors.append({ - 'key': 'MetadataParser', - 'actions': opts.parse_metadata, - # Run this immediately after extraction is complete - 'when': 'pre_process' - }) + } if opts.convertsubtitles: - postprocessors.append({ + yield { 'key': 'FFmpegSubtitlesConvertor', 'format': opts.convertsubtitles, - # Run this before the actual video download 'when': 'before_dl' - }) + } if opts.convertthumbnails: - postprocessors.append({ + yield { 'key': 'FFmpegThumbnailsConvertor', 'format': opts.convertthumbnails, - # Run this before the actual video download 'when': 'before_dl' - }) + } if opts.extractaudio: - postprocessors.append({ + yield { 'key': 'FFmpegExtractAudio', 'preferredcodec': opts.audioformat, 'preferredquality': opts.audioquality, 'nopostoverwrites': opts.nopostoverwrites, - }) + } if opts.remuxvideo: - postprocessors.append({ + yield { 'key': 'FFmpegVideoRemuxer', 'preferedformat': opts.remuxvideo, - }) + } if opts.recodevideo: - postprocessors.append({ + yield { 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, - }) + } # If ModifyChapters is going to remove chapters, subtitles must already be in the container. if opts.embedsubtitles: - already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts - postprocessors.append({ + keep_subs = 'no-keep-subs' not in opts.compat_opts + yield { 'key': 'FFmpegEmbedSubtitle', # already_have_subtitle = True prevents the file from being deleted after embedding - 'already_have_subtitle': already_have_subtitle - }) - if not opts.writeautomaticsub and 'no-keep-subs' not in compat_opts: + 'already_have_subtitle': opts.writesubtitles and keep_subs + } + if not opts.writeautomaticsub and keep_subs: opts.writesubtitles = True - # --all-sub automatically sets --write-sub if --write-auto-sub is not given - # this was the old behaviour if only --all-sub was given. - if opts.allsubtitles and not opts.writeautomaticsub: - opts.writesubtitles = True + # ModifyChapters must run before FFmpegMetadataPP - remove_chapters_patterns, remove_ranges = [], [] - for regex in opts.remove_chapters: - if regex.startswith('*'): - dur = list(map(parse_duration, regex[1:].split('-'))) - if len(dur) == 2 and all(t is not None for t in dur): - remove_ranges.append(tuple(dur)) - continue - parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form *start-end') - try: - remove_chapters_patterns.append(re.compile(regex)) - except re.error as err: - parser.error(f'invalid --remove-chapters regex {regex!r} - {err}') if opts.remove_chapters or sponsorblock_query: - postprocessors.append({ + yield { 'key': 'ModifyChapters', - 'remove_chapters_patterns': remove_chapters_patterns, + 'remove_chapters_patterns': opts.remove_chapters, 'remove_sponsor_segments': opts.sponsorblock_remove, - 'remove_ranges': remove_ranges, + 'remove_ranges': opts.remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, 'force_keyframes': opts.force_keyframes_at_cuts - }) + } # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support # metadata (3gp, webm, etc.) @@ -554,21 +556,19 @@ def _real_main(argv=None): # source and target containers. From this point the container won't change, # so metadata can be added here. if opts.addmetadata or opts.addchapters or opts.embed_infojson: - if opts.embed_infojson is None: - opts.embed_infojson = 'if_exists' - postprocessors.append({ + yield { 'key': 'FFmpegMetadata', 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, 'add_infojson': opts.embed_infojson, - }) + } # Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found if opts.sponskrub is not False: - postprocessors.append({ + yield { 'key': 'SponSkrub', 'path': opts.sponskrub_path, 'args': opts.sponskrub_args, @@ -576,64 +576,57 @@ def _real_main(argv=None): 'force': opts.sponskrub_force, 'ignoreerror': opts.sponskrub is None, '_from_cli': True, - }) + } if opts.embedthumbnail: - postprocessors.append({ + yield { 'key': 'EmbedThumbnail', # already_have_thumbnail = True prevents the file from being deleted after embedding 'already_have_thumbnail': opts.writethumbnail - }) + } if not opts.writethumbnail: opts.writethumbnail = True opts.outtmpl['pl_thumbnail'] = '' if opts.split_chapters: - postprocessors.append({ + yield { 'key': 'FFmpegSplitChapters', 'force_keyframes': opts.force_keyframes_at_cuts, - }) + } # XAttrMetadataPP should be run after post-processors that may change file contents if opts.xattrs: - postprocessors.append({'key': 'XAttrMetadata'}) + yield {'key': 'XAttrMetadata'} if opts.concat_playlist != 'never': - postprocessors.append({ + yield { 'key': 'FFmpegConcat', 'only_multi_video': opts.concat_playlist != 'always', 'when': 'playlist', - }) + } # Exec must be the last PP of each category - if opts.exec_before_dl_cmd: - opts.exec_cmd.setdefault('before_dl', opts.exec_before_dl_cmd) for when, exec_cmd in opts.exec_cmd.items(): - postprocessors.append({ + yield { 'key': 'Exec', 'exec_cmd': exec_cmd, - # Run this only after the files have been moved to their final locations 'when': when, - }) + } - def report_args_compat(arg, name): - warnings.append('%s given without specifying name. The arguments will be given to all %s' % (arg, name)) - if 'default' in opts.external_downloader_args: - report_args_compat('--downloader-args', 'external downloaders') +def parse_options(argv=None): + """ @returns (parser, opts, urls, ydl_opts) """ + parser, opts, urls = parseOpts(argv) + urls = get_urls(urls, opts.batchfile, opts.verbose) - if 'default-compat' in opts.postprocessor_args and 'default' not in opts.postprocessor_args: - report_args_compat('--post-processor-args', 'post-processors') - opts.postprocessor_args.setdefault('sponskrub', []) - opts.postprocessor_args['default'] = opts.postprocessor_args['default-compat'] + set_compat_opts(opts) + try: + warnings, deprecation_warnings = validate_options(opts) + except ValueError as err: + parser.error(f'{err}\n') - def report_deprecation(val, old, new=None): - if not val: - return - deprecation_warnings.append( - f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new - else f'{old} is deprecated and may not work as expected') + postprocessors = list(get_postprocessors(opts)) - report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') - report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') - report_deprecation(opts.include_ads, '--include-ads') - # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future - # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it + any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json + or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail + or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration) + + any_printing = opts.print_json final_ext = ( opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS @@ -641,11 +634,7 @@ def _real_main(argv=None): else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best') else None) - match_filter = ( - None if opts.match_filter is None - else match_filter_func(opts.match_filter)) - - ydl_opts = { + return parser, opts, urls, { 'usenetrc': opts.usenetrc, 'netrc_location': opts.netrc_location, 'username': opts.username, @@ -713,7 +702,7 @@ def _real_main(argv=None): 'playlistreverse': opts.playlist_reverse, 'playlistrandom': opts.playlist_random, 'noplaylist': opts.noplaylist, - 'logtostderr': outtmpl_default == '-', + 'logtostderr': opts.outtmpl.get('default') == '-', 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, 'updatetime': opts.updatetime, @@ -749,11 +738,11 @@ def _real_main(argv=None): 'max_filesize': opts.max_filesize, 'min_views': opts.min_views, 'max_views': opts.max_views, - 'daterange': date, + 'daterange': opts.date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, 'age_limit': opts.age_limit, - 'download_archive': download_archive_fn, + 'download_archive': opts.download_archive, 'break_on_existing': opts.break_on_existing, 'break_on_reject': opts.break_on_reject, 'break_per_url': opts.break_per_url, @@ -794,7 +783,7 @@ def _real_main(argv=None): 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, - 'match_filter': match_filter, + 'match_filter': opts.match_filter, 'no_color': opts.no_color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, @@ -809,9 +798,30 @@ def _real_main(argv=None): 'geo_bypass_ip_block': opts.geo_bypass_ip_block, '_warnings': warnings, '_deprecation_warnings': deprecation_warnings, - 'compat_opts': compat_opts, + 'compat_opts': opts.compat_opts, } + +def _real_main(argv=None): + # Compatibility fixes for Windows + if sys.platform == 'win32': + # https://github.com/ytdl-org/youtube-dl/issues/820 + codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) + + workaround_optparse_bug9161() + + setproctitle('yt-dlp') + + parser, opts, all_urls, ydl_opts = parse_options(argv) + + # Dump user agent + if opts.dump_user_agent: + ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) + write_string(f'{ua}\n', out=sys.stdout) + + if print_extractor_information(opts, all_urls): + sys.exit(0) + with YoutubeDL(ydl_opts) as ydl: actual_use = all_urls or opts.load_info_filename @@ -865,4 +875,10 @@ def main(argv=None): sys.exit(f'\nERROR: {e}') -__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] +__all__ = [ + 'main', + 'YoutubeDL', + 'parse_options', + 'gen_extractors', + 'list_extractors', +] diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 17d8d5da6..ce234d298 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1182,7 +1182,7 @@ def create_parser(): help='Do not write video description (default)') filesystem.add_option( '--write-info-json', - action='store_true', dest='writeinfojson', default=False, + action='store_true', dest='writeinfojson', default=None, help='Write video metadata to a .info.json file (this may contain personal information)') filesystem.add_option( '--no-write-info-json', diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 907627381..aee84cf5b 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -404,7 +404,7 @@ class FFmpegPostProcessor(PostProcessor): class FFmpegExtractAudioPP(FFmpegPostProcessor): COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') - SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac') + SUPPORTED_EXTS = ('aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac') def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index d2a9303c7..10a9a72ff 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3603,6 +3603,9 @@ def match_str(filter_str, dct, incomplete=False): def match_filter_func(filter_str): + if filter_str is None: + return None + def _match_func(info_dict, *args, **kwargs): if match_str(filter_str, info_dict, *args, **kwargs): return None From aee6ce5867c59214df2381fc1a46b7096dc8c26d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 9 Mar 2022 01:39:47 +0530 Subject: [PATCH 0783/2552] [build] Fix bug in 08d30158ec8e7e08c1d83dcfde6dba18c95b2640 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 98781c5da..503599c76 100644 --- a/setup.py +++ b/setup.py @@ -21,9 +21,9 @@ DESCRIPTION = 'A youtube-dl fork with additional features and patches' LONG_DESCRIPTION = '\n\n'.join(( 'Official repository: ', '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', - open('README.md').read())) + open('README.md', encoding='utf-8').read())) -REQUIREMENTS = open('requirements.txt').read().splitlines() +REQUIREMENTS = open('requirements.txt', encoding='utf-8').read().splitlines() if sys.argv[1:2] == ['py2exe']: From c0c2c57d352b90ab7cede1698a377d44c9886869 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 9 Mar 2022 01:52:16 +0530 Subject: [PATCH 0784/2552] Release 2022.03.08.1 --- Changelog.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Changelog.md b/Changelog.md index b9baa8aaa..fd6c26a7c 100644 --- a/Changelog.md +++ b/Changelog.md @@ -11,6 +11,11 @@ --> +### 2022.03.08.1 + +* [cleanup] Refactor `__init__.py` +* [build] Fix bug + ### 2022.03.08 * Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR) From a3b7dff015bf24141763c1125c7c0f7e80c37fd3 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 8 Mar 2022 20:23:28 +0000 Subject: [PATCH 0785/2552] [version] update Created by: pukkandan :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index a267b3bf8..4d9187143 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08 (exe) + [debug] yt-dlp version 2022.03.08.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08) + yt-dlp is up to date (2022.03.08.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 81c15f6a6..cff73b555 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08 (exe) + [debug] yt-dlp version 2022.03.08.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08) + yt-dlp is up to date (2022.03.08.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index fe2c2331b..44012044a 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -60,12 +60,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08 (exe) + [debug] yt-dlp version 2022.03.08.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08) + yt-dlp is up to date (2022.03.08.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index b8f7a9680..d93380725 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08 (exe) + [debug] yt-dlp version 2022.03.08.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08) + yt-dlp is up to date (2022.03.08.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 73eb25785..51987d533 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -13,7 +13,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index b9ac6308a..d5df2af90 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,5 +1,5 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2022.03.08' +__version__ = '2022.03.08.1' -RELEASE_GIT_HEAD = '535eb16a4' +RELEASE_GIT_HEAD = 'c0c2c57d3' From ff91cf748343c41a74b09120896feccd390f91ce Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 9 Mar 2022 02:24:41 +0530 Subject: [PATCH 0786/2552] [utils] Add `get_first` --- yt_dlp/extractor/facebook.py | 9 ++++----- yt_dlp/extractor/tiktok.py | 4 ++-- yt_dlp/extractor/youtube.py | 5 +---- yt_dlp/utils.py | 4 ++++ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index d39dcc058..ef57b221c 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -18,6 +18,7 @@ from ..utils import ( ExtractorError, float_or_none, get_element_by_id, + get_first, int_or_none, js_to_json, merge_dicts, @@ -405,11 +406,9 @@ class FacebookIE(InfoExtractor): ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or [] if str(m.get('id')) == video_id and m.get('__typename') == 'Video'] - title = traverse_obj(media, (..., 'title', 'text'), get_all=False) - description = traverse_obj(media, ( - ..., 'creation_story', 'comet_sections', 'message', 'story', 'message', 'text'), get_all=False) - uploader_data = (traverse_obj(media, (..., 'owner'), get_all=False) - or traverse_obj(post, (..., 'node', 'actors', ...), get_all=False) or {}) + title = get_first(media, ('title', 'text')) + description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) + uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} page_title = title or self._html_search_regex(( r']*class="uiHeaderTitle"[^>]*>(?P[^<]*)', diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 620973a9f..56cc2dcc6 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( ExtractorError, HEADRequest, + get_first, int_or_none, join_nonempty, LazyList, @@ -816,8 +817,7 @@ class DouyinIE(TikTokIE): render_data = self._parse_json( render_data_json, video_id, transform_source=compat_urllib_parse_unquote) - return self._parse_aweme_video_web( - traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url) + return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url) class TikTokVMIE(InfoExtractor): diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index da49df8cd..66bb8d9f0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -39,6 +39,7 @@ from ..utils import ( ExtractorError, float_or_none, format_field, + get_first, int_or_none, is_html, join_nonempty, @@ -72,10 +73,6 @@ from ..utils import ( ) -def get_first(obj, keys, **kwargs): - return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) - - # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 10a9a72ff..9b130e109 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5218,6 +5218,10 @@ def traverse_dict(dictn, keys, casesense=True): return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True) +def get_first(obj, keys, **kwargs): + return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) + + def variadic(x, allowed_types=(str, bytes, dict)): return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,) From e248be3319c4869536e2ddd87e9bcee6b5e439eb Mon Sep 17 00:00:00 2001 From: coletdev Date: Wed, 9 Mar 2022 10:00:57 +1300 Subject: [PATCH 0787/2552] [panopto] Add extractors (#2908) Based on https://github.com/ytdl-org/youtube-dl/pull/13449 Closes #1946 Authored by: coletdjnz, kmark --- README.md | 3 +- yt_dlp/extractor/extractors.py | 5 + yt_dlp/extractor/generic.py | 13 + yt_dlp/extractor/panopto.py | 445 +++++++++++++++++++++++++++++++++ yt_dlp/options.py | 2 +- 5 files changed, 465 insertions(+), 3 deletions(-) create mode 100644 yt_dlp/extractor/panopto.py diff --git a/README.md b/README.md index f24693c7b..08c4df38f 100644 --- a/README.md +++ b/README.md @@ -367,8 +367,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi available. Pass the minimum number of seconds (or range) to wait between retries --no-wait-for-video Do not wait for scheduled streams (default) - --mark-watched Mark videos watched (even with --simulate). - Currently only supported for YouTube + --mark-watched Mark videos watched (even with --simulate) --no-mark-watched Do not mark videos watched (default) --no-colors Do not emit color codes in output --compat-options OPTS Options that can help keep compatibility diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index f7a879ad9..1b7bbf989 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1152,6 +1152,11 @@ from .palcomp3 import ( PalcoMP3VideoIE, ) from .pandoratv import PandoraTVIE +from .panopto import ( + PanoptoIE, + PanoptoListIE, + PanoptoPlaylistIE +) from .paramountplus import ( ParamountPlusIE, ParamountPlusSeriesIE, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 0ddd050ff..6a8b8543b 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -146,6 +146,7 @@ from .tvp import TVPEmbedIE from .blogger import BloggerIE from .mainstreaming import MainStreamingIE from .gfycat import GfycatIE +from .panopto import PanoptoBaseIE class GenericIE(InfoExtractor): @@ -2498,6 +2499,15 @@ class GenericIE(InfoExtractor): 'id': '?vid=2295' }, 'playlist_count': 9 + }, + { + # Panopto embeds + 'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video', + 'info_dict': { + 'title': 'Insert a quiz into a Panopto video', + 'id': 'insert-a-quiz-into-a-panopto-video' + }, + 'playlist_count': 1 } ] @@ -3723,6 +3733,9 @@ class GenericIE(InfoExtractor): if gfycat_urls: return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key()) + panopto_urls = PanoptoBaseIE._extract_urls(webpage) + if panopto_urls: + return self.playlist_from_matches(panopto_urls, video_id, video_title) # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py new file mode 100644 index 000000000..d458dfe50 --- /dev/null +++ b/yt_dlp/extractor/panopto.py @@ -0,0 +1,445 @@ +import re +import calendar +import json +import functools +from datetime import datetime +from random import random + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlparse, + compat_urlparse +) + +from ..utils import ( + bug_reports_message, + ExtractorError, + get_first, + int_or_none, + OnDemandPagedList, + parse_qs, + traverse_obj, +) + + +class PanoptoBaseIE(InfoExtractor): + BASE_URL_RE = r'(?Phttps?://[\w.]+\.panopto.(?:com|eu)/Panopto)' + + def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs): + response = self._download_json( + base_url + path, video_id, data=json.dumps(data).encode('utf8') if data else None, + fatal=fatal, headers={'accept': 'application/json', 'content-type': 'application/json'}, **kwargs) + if not response: + return + error_code = response.get('ErrorCode') + if error_code == 2: + self.raise_login_required(method='cookies') + elif error_code is not None: + msg = f'Panopto said: {response.get("ErrorMessage")}' + if fatal: + raise ExtractorError(msg, video_id=video_id, expected=True) + else: + self.report_warning(msg, video_id=video_id) + return response + + @staticmethod + def _parse_fragment(url): + return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()} + + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r']+src=["\'](?P%s/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)' % PanoptoIE.BASE_URL_RE, + webpage)] + + +class PanoptoIE(PanoptoBaseIE): + _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P[a-f0-9-]+)' + _TESTS = [ + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', + 'info_dict': { + 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', + 'title': 'Panopto for Business - Use Cases', + 'timestamp': 1459184200, + 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb&mode=Delivery&random=[\d.]+', + 'upload_date': '20160328', + 'ext': 'mp4', + 'cast': [], + 'duration': 88.17099999999999, + 'average_rating': int, + 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e', + 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', + 'channel': 'Showcase Videos' + }, + }, + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59', + 'info_dict': { + 'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59', + 'title': 'Overcoming Top 4 Challenges of Enterprise Video', + 'uploader': 'Panopto Support', + 'timestamp': 1449409251, + 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59&mode=Delivery&random=[\d.]+', + 'upload_date': '20151206', + 'ext': 'mp4', + 'chapters': 'count:21', + 'cast': ['Panopto Support'], + 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c', + 'average_rating': int, + 'description': 'md5:4391837802b3fc856dadf630c4b375d1', + 'duration': 1088.2659999999998, + 'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546', + 'channel': 'Webcasts', + }, + }, + { + # Extra params in URL + 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true', + 'info_dict': { + 'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed', + 'ext': 'mp4', + 'duration': 129.513, + 'cast': ['Kathryn Kelly'], + 'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56', + 'timestamp': 1569845768, + 'tags': ['Viewer', 'Enterprise'], + 'upload_date': '20190930', + 'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=5fa74e93-3d87-4694-b60e-aaa4012214ed&mode=Delivery&random=[\d.]+', + 'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f', + 'title': 'Getting Started: View a Video', + 'average_rating': int, + 'uploader': 'Kathryn Kelly', + 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3', + 'channel': 'Getting Started', + } + }, + { + # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream. + 'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4', + 'info_dict': { + 'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4', + 'ext': 'mp4', + 'cast': ['LTS CLI Script'], + 'duration': 2178.45, + 'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa', + 'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8', + 'average_rating': int, + 'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7', + 'uploader': 'LTS CLI Script', + 'timestamp': 1572458134, + 'title': 'WW2 Vets Interview 3 Ronald Stanley George', + 'thumbnail': r're:https://unisa\.au\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4&mode=Delivery&random=[\d.]+', + 'channel': 'World War II Veteran Interviews', + 'upload_date': '20191030', + }, + }, + { + 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb', + 'only_matching': True + }, + { + 'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638', + 'only_matching': True + }, + ] + + @classmethod + def suitable(cls, url): + return False if PanoptoPlaylistIE.suitable(url) else super().suitable(url) + + def _mark_watched(self, base_url, video_id, delivery_info): + duration = traverse_obj(delivery_info, ('Delivery', 'Duration'), expected_type=float) + invocation_id = delivery_info.get('InvocationId') + stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str) + if invocation_id and stream_id and duration: + timestamp_str = f'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/' + data = { + 'streamRequests': [ + { + 'ClientTimeStamp': timestamp_str, + 'ID': 0, + 'InvocationID': invocation_id, + 'PlaybackSpeed': 1, + 'SecondsListened': duration - 1, + 'SecondsRejected': 0, + 'StartPosition': 0, + 'StartReason': 2, + 'StopReason': None, + 'StreamID': stream_id, + 'TimeStamp': timestamp_str, + 'UpdatesRejected': 0 + }, + ]} + + self._download_webpage( + base_url + '/Services/Analytics.svc/AddStreamRequests', video_id, + fatal=False, data=json.dumps(data).encode('utf8'), headers={'content-type': 'application/json'}, + note='Marking watched', errnote='Unable to mark watched') + + @staticmethod + def _extract_chapters(delivery): + chapters = [] + for timestamp in delivery.get('Timestamps', []): + start, duration = int_or_none(timestamp.get('Time')), int_or_none(timestamp.get('Duration')) + if start is None or duration is None: + continue + chapters.append({ + 'start_time': start, + 'end_time': start + duration, + 'title': timestamp.get('Caption') + }) + return chapters + + def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs): + formats = [] + subtitles = {} + for stream in streams or []: + stream_formats = [] + http_stream_url = stream.get('StreamHttpUrl') + stream_url = stream.get('StreamUrl') + + if http_stream_url: + stream_formats.append({'url': http_stream_url}) + + if stream_url: + media_type = stream.get('ViewerMediaFileTypeName') + if media_type in ('hls', ): + m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id) + stream_formats.extend(m3u8_formats) + subtitles = self._merge_subtitles(subtitles, stream_subtitles) + else: + stream_formats.append({ + 'url': stream_url + }) + for fmt in stream_formats: + fmt.update({ + 'format_note': stream.get('Tag'), + **fmt_kwargs + }) + formats.extend(stream_formats) + + return formats, subtitles + + def _real_extract(self, url): + base_url, video_id = self._match_valid_url(url).group('base_url', 'id') + delivery_info = self._call_api( + base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id, + query={ + 'deliveryId': video_id, + 'invocationId': '', + 'isLiveNotes': 'false', + 'refreshAuthCookie': 'true', + 'isActiveBroadcast': 'false', + 'isEditing': 'false', + 'isKollectiveAgentInstalled': 'false', + 'isEmbed': 'false', + 'responseType': 'json', + } + ) + + delivery = delivery_info['Delivery'] + session_start_time = int_or_none(delivery.get('SessionStartTime')) + + # Podcast stream is usually the combined streams. We will prefer that by default. + podcast_formats, podcast_subtitles = self._extract_streams_formats_and_subtitles( + video_id, delivery.get('PodcastStreams'), format_note='PODCAST') + + streams_formats, streams_subtitles = self._extract_streams_formats_and_subtitles( + video_id, delivery.get('Streams'), preference=-10) + + formats = podcast_formats + streams_formats + subtitles = self._merge_subtitles(podcast_subtitles, streams_subtitles) + self._sort_formats(formats) + + self.mark_watched(base_url, video_id, delivery_info) + + return { + 'id': video_id, + 'title': delivery.get('SessionName'), + 'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), default=[], expected_type=lambda x: x or None), + 'timestamp': session_start_time - 11640000000 if session_start_time else None, + 'duration': delivery.get('Duration'), + 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}', + 'average_rating': delivery.get('AverageRating'), + 'chapters': self._extract_chapters(delivery) or None, + 'uploader': delivery.get('OwnerDisplayName') or None, + 'uploader_id': delivery.get('OwnerId'), + 'description': delivery.get('SessionAbstract'), + 'tags': traverse_obj(delivery, ('Tags', ..., 'Content')), + 'channel_id': delivery.get('SessionGroupPublicID'), + 'channel': traverse_obj(delivery, 'SessionGroupLongName', 'SessionGroupShortName', get_all=False), + 'formats': formats, + 'subtitles': subtitles + } + + +class PanoptoPlaylistIE(PanoptoBaseIE): + _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P[a-f0-9-]+)' + _TESTS = [ + { + 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true', + 'info_dict': { + 'title': 'Featured Video Tutorials', + 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36', + 'description': '', + }, + 'playlist_mincount': 36 + }, + { + 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190', + 'info_dict': { + 'title': 'Library Website Introduction Playlist', + 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190', + 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb', + }, + 'playlist_mincount': 4 + }, + + ] + + def _entries(self, base_url, playlist_id, session_list_id): + session_list_info = self._call_api( + base_url, f'/Api/SessionLists/{session_list_id}?collections[0].maxCount=500&collections[0].name=items', playlist_id) + + items = session_list_info['Items'] + for item in items: + if item.get('TypeName') != 'Session': + self.report_warning('Got an item in the playlist that is not a Session' + bug_reports_message(), only_once=True) + continue + yield { + '_type': 'url', + 'id': item.get('Id'), + 'url': item.get('ViewerUri'), + 'title': item.get('Name'), + 'description': item.get('Description'), + 'duration': item.get('Duration'), + 'channel': traverse_obj(item, ('Parent', 'Name')), + 'channel_id': traverse_obj(item, ('Parent', 'Id')) + } + + def _real_extract(self, url): + base_url, playlist_id = self._match_valid_url(url).group('base_url', 'id') + + video_id = get_first(parse_qs(url), 'id') + if video_id: + if self.get_param('noplaylist'): + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + return self.url_result(base_url + f'/Pages/Viewer.aspx?id={video_id}', ie_key=PanoptoIE.ie_key(), video_id=video_id) + else: + self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}') + + playlist_info = self._call_api(base_url, f'/Api/Playlists/{playlist_id}', playlist_id) + return self.playlist_result( + self._entries(base_url, playlist_id, playlist_info['SessionListId']), + playlist_id=playlist_id, playlist_title=playlist_info.get('Name'), + playlist_description=playlist_info.get('Description')) + + +class PanoptoListIE(PanoptoBaseIE): + _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/Sessions/List\.aspx' + _PAGE_SIZE = 250 + _TESTS = [ + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22', + 'info_dict': { + 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', + 'title': 'Showcase Videos' + }, + 'playlist_mincount': 140 + + }, + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250', + 'info_dict': { + 'id': 'panopto_list', + 'title': 'panopto_list' + }, + 'playlist_mincount': 300 + }, + { + # Folder that contains 8 folders and a playlist + 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22', + 'info_dict': { + 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e', + 'title': 'Video Tutorials' + }, + 'playlist_mincount': 9 + } + + ] + + def _fetch_page(self, base_url, query_params, display_id, page): + + params = { + 'sortColumn': 1, + 'getFolderData': True, + 'includePlaylists': True, + **query_params, + 'page': page, + 'maxResults': self._PAGE_SIZE, + } + + response = self._call_api( + base_url, '/Services/Data.svc/GetSessions', f'{display_id} page {page+1}', + data={'queryParameters': params}, fatal=False) + + for result in get_first(response, 'Results', default=[]): + # This could be a video, playlist (or maybe something else) + item_id = result.get('DeliveryID') + yield { + '_type': 'url', + 'id': item_id, + 'title': result.get('SessionName'), + 'url': traverse_obj(result, 'ViewerUrl', 'EmbedUrl', get_all=False) or (base_url + f'/Pages/Viewer.aspx?id={item_id}'), + 'duration': result.get('Duration'), + 'channel': result.get('FolderName'), + 'channel_id': result.get('FolderID'), + } + + for folder in get_first(response, 'Subfolders', default=[]): + folder_id = folder.get('ID') + yield self.url_result( + base_url + f'/Pages/Sessions/List.aspx#folderID="{folder_id}"', + ie_key=PanoptoListIE.ie_key(), video_id=folder_id, title=folder.get('Name')) + + def _extract_folder_metadata(self, base_url, folder_id): + response = self._call_api( + base_url, '/Services/Data.svc/GetFolderInfo', folder_id, + data={'folderID': folder_id}, fatal=False) + return { + 'title': get_first(response, 'Name', default=[]) + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + base_url = mobj.group('base_url') + + query_params = self._parse_fragment(url) + folder_id, display_id = query_params.get('folderID'), 'panopto_list' + + if query_params.get('isSubscriptionsPage'): + display_id = 'subscriptions' + if not query_params.get('subscribableTypes'): + query_params['subscribableTypes'] = [0, 1, 2] + elif query_params.get('isSharedWithMe'): + display_id = 'sharedwithme' + elif folder_id: + display_id = folder_id + + query = query_params.get('query') + if query: + display_id += f': query "{query}"' + + info = { + '_type': 'playlist', + 'id': display_id, + 'title': display_id, + } + if folder_id: + info.update(self._extract_folder_metadata(base_url, folder_id)) + + info['entries'] = OnDemandPagedList( + functools.partial(self._fetch_page, base_url, query_params, display_id), self._PAGE_SIZE) + + return info diff --git a/yt_dlp/options.py b/yt_dlp/options.py index ce234d298..646ccebcd 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -323,7 +323,7 @@ def create_parser(): general.add_option( '--mark-watched', action='store_true', dest='mark_watched', default=False, - help='Mark videos watched (even with --simulate). Currently only supported for YouTube') + help='Mark videos watched (even with --simulate)') general.add_option( '--no-mark-watched', action='store_false', dest='mark_watched', From 7a0ba75857e077bbadc43ea7d9ba63519fe3e82a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 9 Mar 2022 06:19:06 +0530 Subject: [PATCH 0788/2552] [build] Add `requirements.txt` to pip distributions Closes #2995 --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index 38d83a9a5..bc2f056c0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,5 +5,6 @@ include README.md include completions/*/* include supportedsites.md include yt-dlp.1 +include requirements.txt recursive-include devscripts * recursive-include test * From e491d06d344dde805bf5ea14d63310428cc7edf4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 9 Mar 2022 06:42:14 +0530 Subject: [PATCH 0789/2552] [utils] ExtractorError: Fix for older python versions Closes #2993 --- yt_dlp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9b130e109..97bd33506 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1085,7 +1085,7 @@ class ExtractorError(YoutubeDLError): def format_traceback(self): return join_nonempty( self.traceback and ''.join(traceback.format_tb(self.traceback)), - self.cause and ''.join(traceback.format_exception(self.cause)[1:]), + self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]), delim='\n') or None From 6e6beffd0481bc8e8de950dfb617004ba4ab651e Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Wed, 9 Mar 2022 21:08:09 +0900 Subject: [PATCH 0790/2552] [openrec] Refactor extractors (#2941) Authored by: Lesmiscore --- yt_dlp/extractor/openrec.py | 68 ++++++++++++++----------------------- 1 file changed, 26 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 0525b4830..b476c0986 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -4,10 +4,11 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, + get_first, int_or_none, traverse_obj, unified_strdate, - unified_timestamp + unified_timestamp, ) from ..compat import compat_str @@ -19,42 +20,34 @@ class OpenRecBaseIE(InfoExtractor): def _extract_movie(self, webpage, video_id, name, is_live): window_stores = self._extract_pagestore(webpage, video_id) - movie_store = traverse_obj( - window_stores, - ('v8', 'state', 'movie'), - ('v8', 'movie'), - expected_type=dict) - if not movie_store: + movie_stores = [ + # extract all three important data (most of data are duplicated each other, but slightly different!) + traverse_obj(window_stores, ('v8', 'state', 'movie'), expected_type=dict), + traverse_obj(window_stores, ('v8', 'movie'), expected_type=dict), + traverse_obj(window_stores, 'movieStore', expected_type=dict), + ] + if not any(movie_stores): raise ExtractorError(f'Failed to extract {name} info') - title = movie_store.get('title') - description = movie_store.get('introduction') - thumbnail = movie_store.get('thumbnailUrl') - - uploader = traverse_obj(movie_store, ('channel', 'user', 'name'), expected_type=compat_str) - uploader_id = traverse_obj(movie_store, ('channel', 'user', 'id'), expected_type=compat_str) - - timestamp = int_or_none(traverse_obj(movie_store, ('publishedAt', 'time')), scale=1000) - - m3u8_playlists = movie_store.get('media') or {} + m3u8_playlists = get_first(movie_stores, 'media') or {} formats = [] for name, m3u8_url in m3u8_playlists.items(): if not m3u8_url: continue formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', live=is_live, m3u8_id='hls-%s' % name)) + m3u8_url, video_id, ext='mp4', live=is_live, m3u8_id=name)) self._sort_formats(formats) return { 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'title': get_first(movie_stores, 'title'), + 'description': get_first(movie_stores, 'introduction'), + 'thumbnail': get_first(movie_stores, 'thumbnailUrl'), 'formats': formats, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'timestamp': timestamp, + 'uploader': get_first(movie_stores, ('channel', 'user', 'name')), + 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')), + 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')), 'is_live': is_live, } @@ -72,7 +65,7 @@ class OpenRecIE(OpenRecBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://www.openrec.tv/live/%s' % video_id, video_id) + webpage = self._download_webpage(f'https://www.openrec.tv/live/{video_id}', video_id) return self._extract_movie(webpage, video_id, 'live', True) @@ -96,7 +89,7 @@ class OpenRecCaptureIE(OpenRecBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://www.openrec.tv/capture/%s' % video_id, video_id) + webpage = self._download_webpage(f'https://www.openrec.tv/capture/{video_id}', video_id) window_stores = self._extract_pagestore(webpage, video_id) movie_store = window_stores.get('movie') @@ -104,15 +97,6 @@ class OpenRecCaptureIE(OpenRecBaseIE): capture_data = window_stores.get('capture') if not capture_data: raise ExtractorError('Cannot extract title') - title = capture_data.get('title') - thumbnail = capture_data.get('thumbnailUrl') - upload_date = unified_strdate(capture_data.get('createdAt')) - - uploader = traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str) - uploader_id = traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str) - - timestamp = traverse_obj(movie_store, 'createdAt', expected_type=compat_str) - timestamp = unified_timestamp(timestamp) formats = self._extract_m3u8_formats( capture_data.get('source'), video_id, ext='mp4') @@ -120,13 +104,13 @@ class OpenRecCaptureIE(OpenRecBaseIE): return { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, + 'title': capture_data.get('title'), + 'thumbnail': capture_data.get('thumbnailUrl'), 'formats': formats, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'upload_date': upload_date, + 'timestamp': unified_timestamp(traverse_obj(movie_store, 'createdAt', expected_type=compat_str)), + 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str), + 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str), + 'upload_date': unified_strdate(capture_data.get('createdAt')), } @@ -148,6 +132,6 @@ class OpenRecMovieIE(OpenRecBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://www.openrec.tv/movie/%s' % video_id, video_id) + webpage = self._download_webpage(f'https://www.openrec.tv/movie/{video_id}', video_id) return self._extract_movie(webpage, video_id, 'movie', False) From 10331a2672bd1fdcbe72f7ca60d1b6202c3783a6 Mon Sep 17 00:00:00 2001 From: MMM Date: Wed, 9 Mar 2022 22:12:23 +0100 Subject: [PATCH 0791/2552] Fix `--print` with `--ignore-no-formats` when url is `None` (#3000) Authored by: flashdagger --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3ee3ed7d2..355369c21 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2777,7 +2777,7 @@ class YoutubeDL(object): if info_dict.get('requested_formats') is not None: # For RTMP URLs, also include the playpath info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) - elif 'url' in info_dict: + elif info_dict.get('url'): info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') if (self.params.get('forcejson') From 33b8c411bc240fb7860ff2220344e9cbd4989933 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Thu, 10 Mar 2022 04:54:26 +0700 Subject: [PATCH 0792/2552] [MangoTV] Improve extractor (#2971) Authored by: hatienl0i261299 --- yt_dlp/extractor/mgtv.py | 59 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py index cab3aa045..4ac70ea57 100644 --- a/yt_dlp/extractor/mgtv.py +++ b/yt_dlp/extractor/mgtv.py @@ -13,12 +13,15 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + try_get, + url_or_none, ) class MGTVIE(InfoExtractor): _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P\d+)\.html' IE_DESC = '芒果TV' + IE_NAME = 'MangoTV' _TESTS = [{ 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', @@ -30,6 +33,32 @@ class MGTVIE(InfoExtractor): 'duration': 7461, 'thumbnail': r're:^https?://.*\.jpg$', }, + }, { + 'url': 'https://w.mgtv.com/b/427837/15588271.html', + 'info_dict': { + 'id': '15588271', + 'ext': 'mp4', + 'title': '春日迟迟再出发 沉浸版', + 'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6', + 'thumbnail': r're:^https?://.+\.jpg', + 'duration': 4026, + }, + }, { + 'url': 'https://w.mgtv.com/b/333652/7329822.html', + 'info_dict': { + 'id': '7329822', + 'ext': 'mp4', + 'title': '拜托,请你爱我', + 'description': 'md5:cd81be6499bafe32e4d143abd822bf9c', + 'thumbnail': r're:^https?://.+\.jpg', + 'duration': 2656, + }, + }, { + 'url': 'https://w.mgtv.com/b/427837/15591647.html', + 'only_matching': True, + }, { + 'url': 'https://w.mgtv.com/b/388252/15634192.html?fpa=33318&fpos=4&lastp=ch_home', + 'only_matching': True, }, { 'url': 'http://www.mgtv.com/b/301817/3826653.html', 'only_matching': True, @@ -40,12 +69,14 @@ class MGTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1] + tk2 = base64.urlsafe_b64encode( + f'did={compat_str(uuid.uuid4()).encode()}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1] try: api_data = self._download_json( 'https://pcweb.api.mgtv.com/player/video', video_id, query={ 'tk2': tk2, 'video_id': video_id, + 'type': 'pch5' }, headers=self.geo_verification_headers())['data'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: @@ -61,6 +92,7 @@ class MGTVIE(InfoExtractor): 'pm2': api_data['atc']['pm2'], 'tk2': tk2, 'video_id': video_id, + 'src': 'intelmgtv', }, headers=self.geo_verification_headers())['data'] stream_domain = stream_data['stream_domain'][0] @@ -71,7 +103,7 @@ class MGTVIE(InfoExtractor): continue format_data = self._download_json( stream_domain + stream_path, video_id, - note='Download video info for format #%d' % idx) + note=f'Download video info for format #{idx}') format_url = format_data.get('info') if not format_url: continue @@ -79,7 +111,7 @@ class MGTVIE(InfoExtractor): r'_(\d+)_mp4/', format_url, 'tbr', default=None)) formats.append({ 'format_id': compat_str(tbr or idx), - 'url': format_url, + 'url': url_or_none(format_url), 'ext': 'mp4', 'tbr': tbr, 'protocol': 'm3u8_native', @@ -97,4 +129,25 @@ class MGTVIE(InfoExtractor): 'description': info.get('desc'), 'duration': int_or_none(info.get('duration')), 'thumbnail': info.get('thumb'), + 'subtitles': self.extract_subtitles(video_id, stream_domain), } + + def _get_subtitles(self, video_id, domain): + info = self._download_json(f'https://pcweb.api.mgtv.com/video/title?videoId={video_id}', + video_id, fatal=False) or {} + subtitles = {} + for sub in try_get(info, lambda x: x['data']['title']) or []: + url_sub = sub.get('url') + if not url_sub: + continue + locale = sub.get('captionCountrySimpleName') + sub = self._download_json(f'{domain}{url_sub}', video_id, fatal=False, + note=f'Download subtitle for locale {sub.get("name")} ({locale})') or {} + sub_url = url_or_none(sub.get('info')) + if not sub_url: + continue + subtitles.setdefault(locale or 'en', []).append({ + 'url': sub_url, + 'ext': 'srt' + }) + return subtitles From 51c22ef4e2af966d6100d0d97d9e8019022df8ad Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 10 Mar 2022 03:25:38 +0530 Subject: [PATCH 0793/2552] Fix `--throttled-rate` Typo in d1b5f70bc9f9dcda1544b88b42ecc25f7f7aa1c7 Closes #2996 --- yt_dlp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 8221ec544..0fc517b99 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -255,7 +255,7 @@ def validate_options(opts): return numeric_limit opts.ratelimit = parse_bytes('rate limit', opts.ratelimit) - opts.ratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit) + opts.throttledratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit) opts.min_filesize = parse_bytes('min filesize', opts.min_filesize) opts.max_filesize = parse_bytes('max filesize', opts.max_filesize) opts.buffersize = parse_bytes('buffer size', opts.buffersize) From 07ff290dce6713a80e4fe92fcf24cb23d07c5ce6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 10 Mar 2022 11:38:34 +0530 Subject: [PATCH 0794/2552] Fix `--sleep-interval` Bug in d1b5f70bc9f9dcda1544b88b42ecc25f7f7aa1c7 Closes #3012 --- yt_dlp/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 0fc517b99..2408d4129 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -198,10 +198,13 @@ def validate_options(opts): validate_positive('requests sleep interval', opts.sleep_interval_requests) validate_positive('sleep interval', opts.sleep_interval) validate_positive('max sleep interval', opts.max_sleep_interval) - if opts.max_sleep_interval is not None: + if opts.sleep_interval is None: validate( - opts.sleep_interval is not None, 'min sleep interval', + opts.max_sleep_interval is None, 'min sleep interval', msg='{name} must be specified; use --min-sleep-interval') + elif opts.max_sleep_interval is None: + opts.max_sleep_interval = opts.sleep_interval + else: validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') if opts.wait_for_video is not None: From 63b2f88bc77f3fa8388f2ec329f270672d4430b1 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Thu, 10 Mar 2022 13:13:19 +0700 Subject: [PATCH 0795/2552] [Zingmp3] Fix signature (#3004) Authored by: hatienl0i261299 --- yt_dlp/extractor/zingmp3.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index 22c62e22e..419bf30d8 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -9,7 +9,6 @@ from .common import InfoExtractor from ..utils import ( int_or_none, traverse_obj, - HEADRequest, ) @@ -106,18 +105,17 @@ class ZingMp3BaseIE(InfoExtractor): def _real_initialize(self): if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'): - self._request_webpage(HEADRequest(self._DOMAIN), None, note='Updating cookies') + self._request_webpage(self.get_api_with_signature(name_api=self._SLUG_API['bai-hat'], param={'id': ''}), + None, note='Updating cookies') def _real_extract(self, url): song_id, type_url = self._match_valid_url(url).group('id', 'type') - api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id}) - return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url) def get_api_with_signature(self, name_api, param): - sha256 = hashlib.sha256(''.join(f'{k}={v}' for k, v in param.items()).encode('utf-8')).hexdigest() - + param.update({'ctime': '1'}) + sha256 = hashlib.sha256(''.join(f'{i}={param[i]}' for i in sorted(param)).encode('utf-8')).hexdigest() data = { 'apiKey': self._API_KEY, 'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(), @@ -161,6 +159,20 @@ class ZingMp3IE(ZingMp3BaseIE): 'album': 'Sương Hoa Đưa Lối (Single)', 'album_artist': 'K-ICM, RYO', }, + }, { + 'url': 'https://zingmp3.vn/bai-hat/Nguoi-Yeu-Toi-Lanh-Lung-Sat-Da-Mr-Siro/ZZ6IW7OU.html', + 'md5': '3e9f7a9bd0d965573dbff8d7c68b629d', + 'info_dict': { + 'id': 'ZZ6IW7OU', + 'title': 'Người Yêu Tôi Lạnh Lùng Sắt Đá', + 'ext': 'mp3', + 'thumbnail': r're:^https?://.+\.jpg', + 'duration': 303, + 'track': 'Người Yêu Tôi Lạnh Lùng Sắt Đá', + 'artist': 'Mr. Siro', + 'album': 'Người Yêu Tôi Lạnh Lùng Sắt Đá (Single)', + 'album_artist': 'Mr. Siro', + }, }, { 'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false', 'only_matching': True, @@ -185,6 +197,14 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): 'title': 'Lâu Đài Tình Ái', }, 'playlist_count': 9, + }, { + 'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html', + 'info_dict': { + '_type': 'playlist', + 'id': 'ZWZAEZZD', + 'title': 'Những Bài Hát Hay Nhất Của Mr. Siro', + }, + 'playlist_count': 49, }, { 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 'only_matching': True, From 24e3d87431855fd30ecf738ed7ddc6d89c6b5ef9 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Thu, 10 Mar 2022 22:24:50 +0900 Subject: [PATCH 0796/2552] [PokemonSoundLibrary] Add extractor (#3001) Authored by: Lesmiscore --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/pokemon.py | 40 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1b7bbf989..09b795c56 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1225,6 +1225,7 @@ from .podomatic import PodomaticIE from .pokemon import ( PokemonIE, PokemonWatchIE, + PokemonSoundLibraryIE, ) from .pokergo import ( PokerGoIE, diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py index 402b574a7..b411390e2 100644 --- a/yt_dlp/extractor/pokemon.py +++ b/yt_dlp/extractor/pokemon.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor from ..utils import ( @@ -138,3 +139,42 @@ class PokemonWatchIE(InfoExtractor): 'episode': video_data.get('title'), 'episode_number': int_or_none(video_data.get('episode')), }) + + +class PokemonSoundLibraryIE(InfoExtractor): + _VALID_URL = r'https?://soundlibrary\.pokemon\.co\.jp' + + _TESTS = [{ + 'url': 'https://soundlibrary.pokemon.co.jp/', + 'info_dict': { + 'title': 'Pokémon Diamond and Pearl Sound Tracks', + }, + 'playlist_mincount': 149, + }] + + def _real_extract(self, url): + musicbox_webpage = self._download_webpage( + 'https://soundlibrary.pokemon.co.jp/musicbox', None, + 'Downloading list of songs') + song_titles = [x.group(1) for x in re.finditer(r'([^>]+?)
をてもち曲に加えます。', musicbox_webpage)] + song_titles = song_titles[4::2] + + # each songs don't have permalink; instead we return all songs at once + song_entries = [{ + 'id': f'pokemon-soundlibrary-{song_id}', + 'url': f'https://soundlibrary.pokemon.co.jp/api/assets/signing/sounds/wav/{song_id}.wav', + # note: the server always serves MP3 files, despite its extension of the URL above + 'ext': 'mp3', + 'acodec': 'mp3', + 'vcodec': 'none', + 'title': song_title, + 'track': song_title, + 'artist': 'Nintendo / Creatures Inc. / GAME FREAK inc.', + 'uploader': 'Pokémon', + 'release_year': 2006, + 'release_date': '20060928', + 'track_number': song_id, + 'album': 'Pokémon Diamond and Pearl', + } for song_id, song_title in enumerate(song_titles, 1)] + + return self.playlist_result(song_entries, playlist_title='Pokémon Diamond and Pearl Sound Tracks') From 76aa99137475698941a86edd010d67ff4caa88a2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 11 Mar 2022 14:24:45 +0530 Subject: [PATCH 0797/2552] Fix case of `http_headers` Bug in 8b7539d27c0a47d8d08e0522bdb66c571483377b Fixes https://github.com/yt-dlp/yt-dlp/issues/1346#issuecomment-1064527765 --- yt_dlp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 97bd33506..38aeb6918 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5489,4 +5489,4 @@ has_websockets = bool(compat_websockets) def merge_headers(*dicts): """Merge dicts of http headers case insensitively, prioritizing the latter ones""" - return {k.capitalize(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} + return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} From 2b38f7b2bcb327b376c896211cd727690dbe5aa1 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 11 Mar 2022 14:28:27 +0530 Subject: [PATCH 0798/2552] [MetadataParser] Validate outtmpl early --- yt_dlp/postprocessor/metadataparser.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py index 5452b92d8..5bc435da3 100644 --- a/yt_dlp/postprocessor/metadataparser.py +++ b/yt_dlp/postprocessor/metadataparser.py @@ -1,5 +1,4 @@ import re - from enum import Enum from .common import PostProcessor @@ -26,12 +25,17 @@ class MetadataParserPP(PostProcessor): ''' if not isinstance(action, cls.Actions): raise ValueError(f'{action!r} is not a valid action') - getattr(cls, action.value)(cls, *data) + getattr(cls, action.value)(cls, *data) # So this can raise error to validate @staticmethod def field_to_template(tmpl): if re.match(r'[a-zA-Z_]+$', tmpl): return f'%({tmpl})s' + + from ..YoutubeDL import YoutubeDL + err = YoutubeDL.validate_outtmpl(tmpl) + if err: + raise err return tmpl @staticmethod From da1d734fbe267711aeeb9a10493b5b58d9179a0f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 11 Mar 2022 14:29:11 +0530 Subject: [PATCH 0799/2552] Remove incorrect warning for `--dateafter` Closes #3030 --- yt_dlp/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 2408d4129..9138ec464 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -358,7 +358,6 @@ def validate_options(opts): raise ValueError('unsupported geo-bypass country or ip-block') opts.match_filter = match_filter_func(opts.match_filter) - opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore) if opts.download_archive is not None: opts.download_archive = expand_path(opts.download_archive) @@ -406,8 +405,8 @@ def validate_options(opts): setattr(opts, opt1, default) # Conflicting options - report_conflict('--date-after', 'dateafter', '--date', 'date', default=None) - report_conflict('--date-before', 'datebefore', '--date', 'date', default=None) + report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None) + report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None) report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl')) report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') @@ -446,6 +445,8 @@ def validate_options(opts): # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it # Dependent options + opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore) + if opts.exec_before_dl_cmd: opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd From cf4f42cb9776eaa3166d2d234c3ec7651f05d7a9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 11 Mar 2022 19:28:21 +0530 Subject: [PATCH 0800/2552] Protect stdout from unexpected progress and console-title Closes #3023 --- yt_dlp/YoutubeDL.py | 73 ++++++++++++++++++++----------------- yt_dlp/downloader/common.py | 6 +-- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 355369c21..014b9db0c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -547,15 +547,20 @@ class YoutubeDL(object): self._download_retcode = 0 self._num_downloads = 0 self._num_videos = 0 - self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] - self._err_file = sys.stderr self.params = params self.cache = Cache(self) windows_enable_vt_mode() + self._out_files = { + 'error': sys.stderr, + 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout, + 'console': None if compat_os_name == 'nt' else next( + filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) + } + self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print'] self._allow_colors = { - 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file), - 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file), + type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_]) + for type_ in ('screen', 'error') } if sys.version_info < (3, 6): @@ -620,7 +625,7 @@ class YoutubeDL(object): sp_kwargs = dict( stdin=subprocess.PIPE, stdout=slave, - stderr=self._err_file) + stderr=self._out_files['error']) try: self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) except OSError: @@ -788,14 +793,24 @@ class YoutubeDL(object): self._printed_messages.add(message) write_string(message, out=out, encoding=self.params.get('encoding')) - def to_stdout(self, message, skip_eol=False, quiet=False): + def to_stdout(self, message, skip_eol=False, quiet=None): """Print message to stdout""" + if quiet is not None: + self.deprecation_warning('"ydl.to_stdout" no longer accepts the argument quiet. Use "ydl.to_screen" instead') + self._write_string( + '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + self._out_files['print']) + + def to_screen(self, message, skip_eol=False, quiet=None): + """Print message to screen if not in quiet mode""" if self.params.get('logger'): self.params['logger'].debug(message) - elif not quiet or self.params.get('verbose'): - self._write_string( - '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), - self._err_file if quiet else self._screen_file) + return + if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'): + return + self._write_string( + '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + self._out_files['screen']) def to_stderr(self, message, only_once=False): """Print message to stderr""" @@ -803,7 +818,12 @@ class YoutubeDL(object): if self.params.get('logger'): self.params['logger'].error(message) else: - self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once) + self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once) + + def _send_console_code(self, code): + if compat_os_name == 'nt' or not self._out_files['console']: + return + self._write_string(code, self._out_files['console']) def to_console_title(self, message): if not self.params.get('consoletitle', False): @@ -814,26 +834,18 @@ class YoutubeDL(object): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) - elif 'TERM' in os.environ: - self._write_string('\033]0;%s\007' % message, self._screen_file) + else: + self._send_console_code(f'\033]0;{message}\007') def save_console_title(self): - if not self.params.get('consoletitle', False): + if not self.params.get('consoletitle') or self.params.get('simulate'): return - if self.params.get('simulate'): - return - if compat_os_name != 'nt' and 'TERM' in os.environ: - # Save the title on stack - self._write_string('\033[22;0t', self._screen_file) + self._send_console_code('\033[22;0t') # Save the title on stack def restore_console_title(self): - if not self.params.get('consoletitle', False): + if not self.params.get('consoletitle') or self.params.get('simulate'): return - if self.params.get('simulate'): - return - if compat_os_name != 'nt' and 'TERM' in os.environ: - # Restore the title from stack - self._write_string('\033[23;0t', self._screen_file) + self._send_console_code('\033[23;0t') # Restore the title from stack def __enter__(self): self.save_console_title() @@ -879,11 +891,6 @@ class YoutubeDL(object): raise DownloadError(message, exc_info) self._download_retcode = 1 - def to_screen(self, message, skip_eol=False): - """Print message to stdout if not in quiet mode""" - self.to_stdout( - message, skip_eol, quiet=self.params.get('quiet', False)) - class Styles(Enum): HEADERS = 'yellow' EMPHASIS = 'light blue' @@ -907,11 +914,11 @@ class YoutubeDL(object): def _format_screen(self, *args, **kwargs): return self._format_text( - self._screen_file, self._allow_colors['screen'], *args, **kwargs) + self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs) def _format_err(self, *args, **kwargs): return self._format_text( - self._err_file, self._allow_colors['err'], *args, **kwargs) + self._out_files['error'], self._allow_colors['error'], *args, **kwargs) def report_warning(self, message, only_once=False): ''' @@ -3604,7 +3611,7 @@ class YoutubeDL(object): encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), - get_encoding(self._screen_file), get_encoding(self._err_file), + get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']), self.get_encoding()) logger = self.params.get('logger') diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 3a949d38a..afd2f2e38 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -159,7 +159,7 @@ class FileDownloader(object): return int(round(number * multiplier)) def to_screen(self, *args, **kargs): - self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs) + self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) def to_stderr(self, message): self.ydl.to_stderr(message) @@ -277,9 +277,9 @@ class FileDownloader(object): elif self.ydl.params.get('logger'): self._multiline = MultilineLogger(self.ydl.params['logger'], lines) elif self.params.get('progress_with_newline'): - self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) + self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines) else: - self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) + self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet')) self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') def _finish_multiline_status(self): From 592b748582f67309ac79670ba6a4bb7a62c4506f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 11 Mar 2022 19:35:26 +0530 Subject: [PATCH 0801/2552] [cleanup] Minor cleanup Closes #3006 --- README.md | 2 +- yt_dlp/YoutubeDL.py | 19 +++++-------------- yt_dlp/downloader/youtube_live_chat.py | 2 +- yt_dlp/utils.py | 2 +- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 08c4df38f..9d63a75bc 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t # NEW FEATURES -* Based on **youtube-dl 2021.12.17 [commit/5add3f4](https://github.com/ytdl-org/youtube-dl/commit/5add3f4373287e6346ca3551239edab549284db3)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) +* Based on **youtube-dl 2021.12.17 [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a)** and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 014b9db0c..d6b284529 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -518,17 +518,6 @@ class YoutubeDL(object): 'storyboards': {'mhtml'}, } - params = None - _ies = {} - _pps = {k: [] for k in POSTPROCESS_WHEN} - _printed_messages = set() - _first_webpage_request = True - _download_retcode = None - _num_downloads = None - _playlist_level = 0 - _playlist_urls = set() - _screen_file = None - def __init__(self, params=None, auto_init=True): """Create a FileDownloader object with the given options. @param auto_init Whether to load the default extractors and print header (if verbose). @@ -536,6 +525,7 @@ class YoutubeDL(object): """ if params is None: params = {} + self.params = params self._ies = {} self._ies_instances = {} self._pps = {k: [] for k in POSTPROCESS_WHEN} @@ -547,7 +537,8 @@ class YoutubeDL(object): self._download_retcode = 0 self._num_downloads = 0 self._num_videos = 0 - self.params = params + self._playlist_level = 0 + self._playlist_urls = set() self.cache = Cache(self) windows_enable_vt_mode() @@ -840,12 +831,12 @@ class YoutubeDL(object): def save_console_title(self): if not self.params.get('consoletitle') or self.params.get('simulate'): return - self._send_console_code('\033[22;0t') # Save the title on stack + self._send_console_code('\033[22;0t') # Save the title on stack def restore_console_title(self): if not self.params.get('consoletitle') or self.params.get('simulate'): return - self._send_console_code('\033[23;0t') # Restore the title from stack + self._send_console_code('\033[23;0t') # Restore the title from stack def __enter__(self): self.save_console_title() diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index fbf1c3d5a..b28d1ec17 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -22,7 +22,7 @@ class YoutubeLiveChatFD(FragmentFD): def real_download(self, filename, info_dict): video_id = info_dict['video_id'] self.to_screen('[%s] Downloading live chat' % self.FD_NAME) - if not self.params.get('skip_download'): + if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': self.report_warning('Live chat download runs until the livestream ends. ' 'If you wish to download the video simultaneously, run a separate yt-dlp instance') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 38aeb6918..c9b57c2f0 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1029,7 +1029,7 @@ def make_HTTPS_handler(params, **kwargs): def bug_reports_message(before=';'): msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , ' - 'filling out the "Broken site" issue template properly. ' + 'filling out the appropriate issue template. ' 'Confirm you are on the latest version using yt-dlp -U') before = before.rstrip() From a825ffbffa0bea322e3ccb44c6f8e01d8d9572fb Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 12 Mar 2022 11:20:58 +1300 Subject: [PATCH 0802/2552] [extractor] Support merging subtitles with data Authored-by: coletdjnz --- yt_dlp/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 354814433..345da9a72 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3651,11 +3651,11 @@ class InfoExtractor(object): @staticmethod def _merge_subtitle_items(subtitle_list1, subtitle_list2): - """ Merge subtitle items for one language. Items with duplicated URLs + """ Merge subtitle items for one language. Items with duplicated URLs/data will be dropped. """ - list1_urls = set([item['url'] for item in subtitle_list1]) + list1_data = set([item.get('url') or item['data'] for item in subtitle_list1]) ret = list(subtitle_list1) - ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls]) + ret.extend([item for item in subtitle_list2 if (item.get('url') or item['data']) not in list1_data]) return ret @classmethod From e880c92c659ef69f4e174bc0244dd34ebda741f4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 13 Mar 2022 14:38:06 +0530 Subject: [PATCH 0803/2552] Exit after `--dump-user-agent` Bug in d1b5f70bc9f9dcda1544b88b42ecc25f7f7aa1c7 Closes #3055 --- yt_dlp/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 9138ec464..a433c4513 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -822,6 +822,7 @@ def _real_main(argv=None): if opts.dump_user_agent: ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) write_string(f'{ua}\n', out=sys.stdout) + sys.exit(0) if print_extractor_information(opts, all_urls): sys.exit(0) From 5ca764c506adecf674d52948bf3f721727faf077 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 13 Mar 2022 22:25:44 +0530 Subject: [PATCH 0804/2552] [FFmpegVideoConvertor] Add more formats to `--remux-video` --- yt_dlp/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index aee84cf5b..234ddeff0 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -537,7 +537,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): class FFmpegVideoConvertorPP(FFmpegPostProcessor): - SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus') + SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mka', 'ogg', *FFmpegExtractAudioPP.SUPPORTED_EXTS) FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS))) _ACTION = 'converting' From 17322130a954577bb03b833d5c435638e51e19f2 Mon Sep 17 00:00:00 2001 From: coletdev Date: Mon, 14 Mar 2022 11:02:44 +1300 Subject: [PATCH 0805/2552] [youtube] Improve video upload date handling (#3029) * Don't prefer UTC upload date for past live streams/premieres * Improve regex (fixes a regression) Authored-by: coletdjnz --- yt_dlp/extractor/youtube.py | 179 ++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 90 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 66bb8d9f0..7e3f142c7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -730,11 +730,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): timestamp = ( unified_timestamp(text) or unified_timestamp( self._search_regex( - (r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'), + (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'), text.lower(), 'time text', default=None))) if text and timestamp is None: - self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True) + self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True) return timestamp, text def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, @@ -1204,7 +1204,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'Tq92D6wQ1mg', 'title': '[MMD] Adios - EVERGLOW [+Motion DL]', 'ext': 'mp4', - 'upload_date': '20191227', + 'upload_date': '20191228', 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', 'uploader': 'Projekt Melody', 'description': 'md5:17eccca93a786d51bc67646756894066', @@ -1297,6 +1297,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'expected_warnings': [ 'DASH manifest missing', + 'Some formats are possibly damaged' ] }, # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) @@ -1569,7 +1570,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'md5:e41008789470fc2533a3252216f1c1d1', 'description': 'md5:a677553cf0840649b731a3024aeff4cc', 'duration': 721, - 'upload_date': '20150127', + 'upload_date': '20150128', 'uploader_id': 'BerkmanCenter', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', 'uploader': 'The Berkman Klein Center for Internet & Society', @@ -1601,7 +1602,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders', 'description': 'md5:13a2503d7b5904ef4b223aa101628f39', 'duration': 4060, - 'upload_date': '20151119', + 'upload_date': '20151120', 'uploader': 'Bernie Sanders', 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', @@ -3565,86 +3566,84 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or self._extract_chapters_from_engagement_panel(initial_data, duration) or None) - contents = try_get( - initial_data, - lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], - list) or [] - for content in contents: - vpir = content.get('videoPrimaryInfoRenderer') - if vpir: - info['upload_date'] = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') - stl = vpir.get('superTitleLink') - if stl: - stl = self._get_text(stl) - if try_get( - vpir, - lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN': - info['location'] = stl - else: - mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl) - if mobj: - info.update({ - 'series': mobj.group(1), - 'season_number': int(mobj.group(2)), - 'episode_number': int(mobj.group(3)), - }) - for tlb in (try_get( - vpir, - lambda x: x['videoActions']['menuRenderer']['topLevelButtons'], - list) or []): - tbr = tlb.get('toggleButtonRenderer') or {} - for getter, regex in [( - lambda x: x['defaultText']['accessibility']['accessibilityData'], - r'(?P[\d,]+)\s*(?P(?:dis)?like)'), ([ - lambda x: x['accessibility'], - lambda x: x['accessibilityData']['accessibilityData'], - ], r'(?P(?:dis)?like) this video along with (?P[\d,]+) other people')]: - label = (try_get(tbr, getter, dict) or {}).get('label') - if label: - mobj = re.match(regex, label) - if mobj: - info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count')) - break - sbr_tooltip = try_get( - vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip']) - if sbr_tooltip: - like_count, dislike_count = sbr_tooltip.split(' / ') + contents = traverse_obj( + initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'), + expected_type=list, default=[]) + + vpir = get_first(contents, 'videoPrimaryInfoRenderer') + if vpir: + stl = vpir.get('superTitleLink') + if stl: + stl = self._get_text(stl) + if try_get( + vpir, + lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN': + info['location'] = stl + else: + mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl) + if mobj: info.update({ - 'like_count': str_to_int(like_count), - 'dislike_count': str_to_int(dislike_count), + 'series': mobj.group(1), + 'season_number': int(mobj.group(2)), + 'episode_number': int(mobj.group(3)), }) - vsir = content.get('videoSecondaryInfoRenderer') - if vsir: - vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer')) - info.update({ - 'channel': self._get_text(vor, 'title'), - 'channel_follower_count': self._get_count(vor, 'subscriberCountText')}) - - rows = try_get( - vsir, - lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'], - list) or [] - multiple_songs = False - for row in rows: - if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True: - multiple_songs = True + for tlb in (try_get( + vpir, + lambda x: x['videoActions']['menuRenderer']['topLevelButtons'], + list) or []): + tbr = tlb.get('toggleButtonRenderer') or {} + for getter, regex in [( + lambda x: x['defaultText']['accessibility']['accessibilityData'], + r'(?P[\d,]+)\s*(?P(?:dis)?like)'), ([ + lambda x: x['accessibility'], + lambda x: x['accessibilityData']['accessibilityData'], + ], r'(?P(?:dis)?like) this video along with (?P[\d,]+) other people')]: + label = (try_get(tbr, getter, dict) or {}).get('label') + if label: + mobj = re.match(regex, label) + if mobj: + info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count')) break - for row in rows: - mrr = row.get('metadataRowRenderer') or {} - mrr_title = mrr.get('title') - if not mrr_title: - continue - mrr_title = self._get_text(mrr, 'title') - mrr_contents_text = self._get_text(mrr, ('contents', 0)) - if mrr_title == 'License': - info['license'] = mrr_contents_text - elif not multiple_songs: - if mrr_title == 'Album': - info['album'] = mrr_contents_text - elif mrr_title == 'Artist': - info['artist'] = mrr_contents_text - elif mrr_title == 'Song': - info['track'] = mrr_contents_text + sbr_tooltip = try_get( + vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip']) + if sbr_tooltip: + like_count, dislike_count = sbr_tooltip.split(' / ') + info.update({ + 'like_count': str_to_int(like_count), + 'dislike_count': str_to_int(dislike_count), + }) + vsir = get_first(contents, 'videoSecondaryInfoRenderer') + if vsir: + vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer')) + info.update({ + 'channel': self._get_text(vor, 'title'), + 'channel_follower_count': self._get_count(vor, 'subscriberCountText')}) + + rows = try_get( + vsir, + lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'], + list) or [] + multiple_songs = False + for row in rows: + if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True: + multiple_songs = True + break + for row in rows: + mrr = row.get('metadataRowRenderer') or {} + mrr_title = mrr.get('title') + if not mrr_title: + continue + mrr_title = self._get_text(mrr, 'title') + mrr_contents_text = self._get_text(mrr, ('contents', 0)) + if mrr_title == 'License': + info['license'] = mrr_contents_text + elif not multiple_songs: + if mrr_title == 'Album': + info['album'] = mrr_contents_text + elif mrr_title == 'Artist': + info['artist'] = mrr_contents_text + elif mrr_title == 'Song': + info['track'] = mrr_contents_text fallbacks = { 'channel': 'uploader', @@ -3652,15 +3651,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_url': 'uploader_url', } - # The upload date for scheduled and current live streams / premieres in microformats - # is generally the true upload date. Although not in UTC, we will prefer that in this case. - # Note this changes to the published date when the stream/premiere has finished. + # The upload date for scheduled, live and past live streams / premieres in microformats + # may be different from the stream date. Although not in UTC, we will prefer it in this case. # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139 - if not info.get('upload_date') or info.get('is_live') or info.get('live_status') == 'is_upcoming': - info['upload_date'] = ( - unified_strdate(get_first(microformats, 'uploadDate')) - or unified_strdate(search_meta('uploadDate')) - or info.get('upload_date')) + upload_date = ( + unified_strdate(get_first(microformats, 'uploadDate')) + or unified_strdate(search_meta('uploadDate'))) + if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'): + upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') + info['upload_date'] = upload_date for to, frm in fallbacks.items(): if not info.get(to): From b3edc8068e00d558d5fecf79ac36a1c8c7365e3a Mon Sep 17 00:00:00 2001 From: coletdev Date: Mon, 14 Mar 2022 11:03:40 +1300 Subject: [PATCH 0806/2552] [downloader/mhtml] Fix fragments with absolute urls (#3044) Authored-by: coletdjnz --- yt_dlp/downloader/mhtml.py | 6 +++++- yt_dlp/extractor/francetv.py | 2 +- yt_dlp/extractor/youtube.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index 1477f65a6..bc86fd1bf 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -166,7 +166,11 @@ body > figure > img { if (i + 1) <= ctx['fragment_index']: continue - fragment_url = urljoin(fragment_base_url, fragment['path']) + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) if not success: continue diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 877c5c055..347a766d8 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -187,7 +187,7 @@ class FranceTVIE(InfoExtractor): 'protocol': 'mhtml', 'url': 'about:invalid', 'fragments': [{ - 'path': sheet, + 'url': sheet, # XXX: not entirely accurate; each spritesheet seems to be # a 10×10 grid of thumbnails corresponding to approximately # 2 seconds of the video; the last spritesheet may be shorter diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7e3f142c7..4fe9cec5b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3208,7 +3208,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'width': width, 'height': height, 'fragments': [{ - 'path': url.replace('$M', str(j)), + 'url': url.replace('$M', str(j)), 'duration': min(fragment_duration, duration - (j * fragment_duration)), } for j in range(math.ceil(fragment_count))], } From aeb21b98f1a43b9a2e89d7ea99183d6267f0899d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 14 Mar 2022 16:10:37 +0530 Subject: [PATCH 0807/2552] [phantomjs] Fix bug in 8b7539d27c0a47d8d08e0522bdb66c571483377b Closes #3066 --- yt_dlp/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index 36927009d..fe4740aae 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -207,7 +207,7 @@ class PhantomJSwrapper(object): replaces = self.options replaces['url'] = url - user_agent = headers.get('User-Agent') or self.get_param('http_headers')['User-Agent'] + user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent'] replaces['ua'] = user_agent.replace('"', '\\"') replaces['jscode'] = jscode From 9f2a6352ea49a4d60a00c550930474d97899695b Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Mon, 14 Mar 2022 16:17:01 +0000 Subject: [PATCH 0808/2552] [docs] Remove readthedocs --- .readthedocs.yml | 22 ----- docs/.gitignore | 1 - docs/Changelog.md | 5 -- docs/Collaborators.md | 5 -- docs/Contributing.md | 5 -- docs/LICENSE.md | 6 -- docs/Makefile | 177 ----------------------------------------- docs/README.md | 2 - docs/conf.py | 68 ---------------- docs/requirements.txt | 1 - docs/supportedsites.md | 5 -- docs/ytdlp_plugins.md | 6 -- 12 files changed, 303 deletions(-) delete mode 100644 .readthedocs.yml delete mode 100644 docs/.gitignore delete mode 100644 docs/Changelog.md delete mode 100644 docs/Collaborators.md delete mode 100644 docs/Contributing.md delete mode 100644 docs/LICENSE.md delete mode 100644 docs/Makefile delete mode 100644 docs/README.md delete mode 100644 docs/conf.py delete mode 100644 docs/requirements.txt delete mode 100644 docs/supportedsites.md delete mode 100644 docs/ytdlp_plugins.md diff --git a/.readthedocs.yml b/.readthedocs.yml deleted file mode 100644 index 052f7bfca..000000000 --- a/.readthedocs.yml +++ /dev/null @@ -1,22 +0,0 @@ -# .readthedocs.yaml -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Build documentation in the docs/ directory with Sphinx -sphinx: - configuration: docs/conf.py - -# Optionally build your docs in additional formats such as PDF -formats: - - epub - - pdf - - htmlzip - -# Optionally set the version of Python and requirements required to build your docs -python: - version: 3 - install: - - requirements: docs/requirements.txt diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index 69fa449dd..000000000 --- a/docs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -_build/ diff --git a/docs/Changelog.md b/docs/Changelog.md deleted file mode 100644 index 99de25fb1..000000000 --- a/docs/Changelog.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -orphan: true ---- -```{include} ../Changelog.md -``` diff --git a/docs/Collaborators.md b/docs/Collaborators.md deleted file mode 100644 index 5f493d814..000000000 --- a/docs/Collaborators.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -orphan: true ---- -```{include} ../Collaborators.md -``` diff --git a/docs/Contributing.md b/docs/Contributing.md deleted file mode 100644 index 60fe46909..000000000 --- a/docs/Contributing.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -orphan: true ---- -```{include} ../Contributing.md -``` diff --git a/docs/LICENSE.md b/docs/LICENSE.md deleted file mode 100644 index 8521669f8..000000000 --- a/docs/LICENSE.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -orphan: true ---- -# LICENSE -```{include} ../LICENSE -``` diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 1a8e3cb1c..000000000 --- a/docs/Makefile +++ /dev/null @@ -1,177 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build - -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) -endif - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - -clean: - rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/yt-dlp.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/yt-dlp.qhc" - -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/yt-dlp" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/yt-dlp" - @echo "# devhelp" - -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 451bedaec..000000000 --- a/docs/README.md +++ /dev/null @@ -1,2 +0,0 @@ -```{include} ../README.md -``` diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index c4010bbc7..000000000 --- a/docs/conf.py +++ /dev/null @@ -1,68 +0,0 @@ -# coding: utf-8 -# -# yt-dlp documentation build configuration file - -import sys -import os - -# Allows to import yt-dlp -sys.path.insert(0, os.path.abspath('..')) - -# -- General configuration ------------------------------------------------ - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'myst_parser', -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The master toctree document. -master_doc = 'README' - -# General information about the project. -project = u'yt-dlp' -author = u'yt-dlp' -copyright = u'UNLICENSE' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -from yt_dlp.version import __version__ -version = __version__ -# The full version, including alpha/beta/rc tags. -release = version - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = ['_build'] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'default' - -# Disable highlights -highlight_language = 'none' - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = ['_static'] - -# Enable heading anchors -myst_heading_anchors = 4 - -# Suppress heading warnings -suppress_warnings = [ - 'myst.header', -] diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index f0694bdc0..000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -myst-parser diff --git a/docs/supportedsites.md b/docs/supportedsites.md deleted file mode 100644 index 55c023415..000000000 --- a/docs/supportedsites.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -orphan: true ---- -```{include} ../supportedsites.md -``` diff --git a/docs/ytdlp_plugins.md b/docs/ytdlp_plugins.md deleted file mode 100644 index 483b9c46e..000000000 --- a/docs/ytdlp_plugins.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -orphan: true ---- -# ytdlp_plugins - -See [https://github.com/yt-dlp/yt-dlp/tree/master/ytdlp_plugins](https://github.com/yt-dlp/yt-dlp/tree/master/ytdlp_plugins). From d69e55c1d84ddec79736a6704d1507a9a4879142 Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Mon, 14 Mar 2022 12:19:33 -0400 Subject: [PATCH 0809/2552] [cleanup] Remove readthedocs from README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 9d63a75bc..db50790d4 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ [![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=blue&label=Download&style=for-the-badge)](#release-files "Release") [![License: Unlicense](https://img.shields.io/badge/-Unlicense-brightgreen.svg?style=for-the-badge)](LICENSE "License") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") -[![Docs](https://img.shields.io/badge/-Docs-blue.svg?color=blue&style=for-the-badge)](https://readthedocs.org/projects/yt-dlp/ "Docs") [![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") [![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") [![CI Status](https://img.shields.io/github/workflow/status/yt-dlp/yt-dlp/Core%20Tests/master?label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") From d71fd412495af9ebccef807379859a0baa97ddee Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Tue, 15 Mar 2022 12:27:41 +0900 Subject: [PATCH 0810/2552] [fragment] Read downloaded fragments only when needed (#3069) Authored by: Lesmiscore --- yt_dlp/downloader/fragment.py | 46 +++++++++++--------------- yt_dlp/downloader/ism.py | 4 ++- yt_dlp/downloader/mhtml.py | 3 +- yt_dlp/downloader/youtube_live_chat.py | 6 ++-- 4 files changed, 29 insertions(+), 30 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 95fb2f9e7..6b75dfc62 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -133,19 +133,19 @@ class FragmentFD(FileDownloader): } success = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: - return False, None + return False if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') ctx['fragment_filename_sanitized'] = fragment_filename - try: - return True, self._read_fragment(ctx) - except FileNotFoundError: - if not info_dict.get('is_live'): - raise - return False, None + return True def _read_fragment(self, ctx): - down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + try: + down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + except FileNotFoundError: + if ctx.get('live'): + return None + raise ctx['fragment_filename_sanitized'] = frag_sanitized frag_content = down.read() down.close() @@ -457,7 +457,7 @@ class FragmentFD(FileDownloader): def download_fragment(fragment, ctx): if not interrupt_trigger[0]: - return False, fragment['frag_index'] + return frag_index = ctx['fragment_index'] = fragment['frag_index'] ctx['last_error'] = None @@ -467,14 +467,12 @@ class FragmentFD(FileDownloader): headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) # Never skip the first fragment - fatal = is_fatal(fragment.get('index') or (frag_index - 1)) - count, frag_content = 0, None + fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0 while count <= fragment_retries: try: - success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers) - if not success: - return False, frag_index - break + if self._download_fragment(ctx, fragment['url'], info_dict, headers): + break + return except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. @@ -491,13 +489,9 @@ class FragmentFD(FileDownloader): break raise - if count > fragment_retries: - if not fatal: - return False, frag_index + if count > fragment_retries and fatal: ctx['dest_stream'].close() self.report_error('Giving up after %s fragment retries' % fragment_retries) - return False, frag_index - return frag_content, frag_index def append_fragment(frag_content, frag_index, ctx): if not frag_content: @@ -520,23 +514,23 @@ class FragmentFD(FileDownloader): def _download_fragment(fragment): ctx_copy = ctx.copy() - frag_content, frag_index = download_fragment(fragment, ctx_copy) - return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') + download_fragment(fragment, ctx_copy) + return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: - for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): + for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): ctx['fragment_filename_sanitized'] = frag_filename ctx['fragment_index'] = frag_index - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx) if not result: return False else: for fragment in fragments: if not interrupt_trigger[0]: break - frag_content, frag_index = download_fragment(fragment, ctx) - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + download_fragment(fragment, ctx) + result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) if not result: return False diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index 09516abe5..4d5618c83 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -263,9 +263,11 @@ class IsmFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) + success = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False + frag_content = self._read_fragment(ctx) + if not extra_state['ism_track_written']: tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index bc86fd1bf..54e711792 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -171,9 +171,10 @@ body > figure > img { assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) - success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) + success = self._download_fragment(ctx, fragment_url, info_dict) if not success: continue + frag_content = self._read_fragment(ctx) mime_type = b'image/jpeg' if frag_content.startswith(b'\x89PNG\r\n\x1a\n'): diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index b28d1ec17..cfca686ee 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -115,9 +115,10 @@ class YoutubeLiveChatFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success, raw_fragment = dl_fragment(url, request_data, headers) + success = dl_fragment(url, request_data, headers) if not success: return False, None, None, None + raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: @@ -145,9 +146,10 @@ class YoutubeLiveChatFD(FragmentFD): self._prepare_and_start_frag_download(ctx, info_dict) - success, raw_fragment = dl_fragment(info_dict['url']) + success = dl_fragment(info_dict['url']) if not success: return False + raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: From 5dbc77df267b4b3b91af0559d2e72f616551f445 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Wed, 16 Mar 2022 09:28:22 +1300 Subject: [PATCH 0811/2552] [youtube:api] Prefer minified JSON response Authored-by: coletdjnz --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4fe9cec5b..199225985 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -458,7 +458,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep), video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, - query={'key': api_key or self._extract_api_key()}) + query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'}) def extract_yt_initial_data(self, item_id, webpage, fatal=True): data = self._search_regex( From a1b2d84360f625701eb1ae67452ea8a3c7e276e8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 16 Mar 2022 19:46:19 +0530 Subject: [PATCH 0812/2552] [youtube] Avoid false positives when detecting damaged formats Closes #3083 --- yt_dlp/extractor/youtube.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 199225985..4143116ef 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1297,7 +1297,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'expected_warnings': [ 'DASH manifest missing', - 'Some formats are possibly damaged' ] }, # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) @@ -3013,7 +3012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.report_warning(last_error) return prs, player_url - def _extract_formats(self, streaming_data, video_id, player_url, is_live): + def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration): itags, stream_ids = {}, [] itag_qualities, res_qualities = {}, {} q = qualities([ @@ -3024,7 +3023,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' ]) streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[]) - approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None for fmt in streaming_formats: if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): @@ -3091,7 +3089,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else -1) # Some formats may have much smaller duration than others (possibly damaged during encoding) # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 - is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000) + # Make sure to avoid false positives with small duration differences. + # Eg: __2ABJjxzNo, ySuUZEjARPY + is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500) if is_damaged: self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) dct = { @@ -3227,14 +3227,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return webpage, master_ytcfg, player_responses, player_url - def _list_formats(self, video_id, microformats, video_details, player_responses, player_url): + def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None): live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails')) is_live = get_first(video_details, 'isLive') if is_live is None: is_live = get_first(live_broadcast_details, 'isLiveNow') streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[]) - formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live)) + formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration)) return live_broadcast_details, is_live, streaming_data, formats @@ -3315,7 +3315,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self.playlist_result( entries, video_id, video_title, video_description) - live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url) + duration = int_or_none( + get_first(video_details, 'lengthSeconds') + or get_first(microformats, 'lengthSeconds') + or parse_duration(search_meta('duration'))) or None + + live_broadcast_details, is_live, streaming_data, formats = self._list_formats( + video_id, microformats, video_details, player_responses, player_url, duration) if not formats: if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')): @@ -3387,10 +3393,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): get_first(video_details, 'channelId') or get_first(microformats, 'externalChannelId') or search_meta('channelId')) - duration = int_or_none( - get_first(video_details, 'lengthSeconds') - or get_first(microformats, 'lengthSeconds') - or parse_duration(search_meta('duration'))) or None owner_profile_url = get_first(microformats, 'ownerProfileUrl') live_content = get_first(video_details, 'isLiveContent') From 4e34889f1c25b0a8da0dbe947741137e2b2b6534 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 17 Mar 2022 01:37:04 +0530 Subject: [PATCH 0813/2552] [rumble] unescape title --- yt_dlp/extractor/rumble.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index 49c1f4485..a0d5f88d9 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -11,6 +11,7 @@ from ..utils import ( int_or_none, parse_iso8601, try_get, + unescapeHTML, ExtractorError, ) @@ -27,6 +28,20 @@ class RumbleEmbedIE(InfoExtractor): 'timestamp': 1571611968, 'upload_date': '20191020', } + }, { + 'url': 'https://rumble.com/embed/vslb7v', + 'md5': '7418035de1a30a178b8af34dc2b6a52b', + 'info_dict': { + 'id': 'vslb7v', + 'ext': 'mp4', + 'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'', + 'timestamp': 1645142135, + 'upload_date': '20220217', + 'channel_url': 'https://rumble.com/c/CyberTechNews', + 'channel': 'CTNews', + 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', + 'duration': 901, + } }, { 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'only_matching': True, @@ -45,7 +60,7 @@ class RumbleEmbedIE(InfoExtractor): video = self._download_json( 'https://rumble.com/embedJS/', video_id, query={'request': 'video', 'v': video_id}) - title = video['title'] + title = unescapeHTML(video['title']) formats = [] for height, ua in (video.get('ua') or {}).items(): From 25791435b74fa32663644a4327ccd3ffa7e734ed Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 17 Mar 2022 02:00:47 +0530 Subject: [PATCH 0814/2552] [arte] Add `format_note` to m3u8 formats Related: #3086 --- yt_dlp/extractor/arte.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index a7ffdc24c..c2f2c1bd3 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -138,6 +138,7 @@ class ArteTVIE(ArteTVBaseIE): break else: lang_pref = -1 + format_note = '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')) media_type = f.get('mediaType') if media_type == 'hls': @@ -145,14 +146,17 @@ class ArteTVIE(ArteTVBaseIE): format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) for m3u8_format in m3u8_formats: - m3u8_format['language_preference'] = lang_pref + m3u8_format.update({ + 'language_preference': lang_pref, + 'format_note': format_note, + }) formats.extend(m3u8_formats) continue format = { 'format_id': format_id, 'language_preference': lang_pref, - 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')), + 'format_note': format_note, 'width': int_or_none(f.get('width')), 'height': int_or_none(f.get('height')), 'tbr': int_or_none(f.get('bitrate')), From ec47c12f69857f09a79787a7346c957f5b3d4e16 Mon Sep 17 00:00:00 2001 From: Dorian Westacott Date: Wed, 16 Mar 2022 18:54:20 -0500 Subject: [PATCH 0815/2552] [ParamountPlusSeries] Support multiple pages (#3026) Authored by: dodrian --- yt_dlp/extractor/paramountplus.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py index 17138985a..a1d7cd724 100644 --- a/yt_dlp/extractor/paramountplus.py +++ b/yt_dlp/extractor/paramountplus.py @@ -1,4 +1,5 @@ from __future__ import unicode_literals +import itertools from .common import InfoExtractor from .cbs import CBSBaseIE @@ -128,11 +129,13 @@ class ParamountPlusSeriesIE(InfoExtractor): 'id': 'spongebob-squarepants', } }] - _API_URL = 'https://www.paramountplus.com/shows/{}/xhr/episodes/page/0/size/100000/xs/0/season/0/' def _entries(self, show_name): - show_json = self._download_json(self._API_URL.format(show_name), video_id=show_name) - if show_json.get('success'): + for page in itertools.count(): + show_json = self._download_json( + f'https://www.paramountplus.com/shows/{show_name}/xhr/episodes/page/{page}/size/50/xs/0/season/0', show_name) + if not show_json.get('success'): + return for episode in show_json['result']['data']: yield self.url_result( 'https://www.paramountplus.com%s' % episode['url'], From 4b3c5d1b81b1fa1ed580eec8c920cd281da946fc Mon Sep 17 00:00:00 2001 From: BohwaZ Date: Thu, 17 Mar 2022 01:40:27 +0100 Subject: [PATCH 0816/2552] [FranceCulture] Support playlists (#1872) Authored by: bohwaz --- yt_dlp/extractor/franceculture.py | 101 +++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 23 deletions(-) diff --git a/yt_dlp/extractor/franceculture.py b/yt_dlp/extractor/franceculture.py index 14f4cb489..9dc28d801 100644 --- a/yt_dlp/extractor/franceculture.py +++ b/yt_dlp/extractor/franceculture.py @@ -1,18 +1,45 @@ # coding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor from ..utils import ( determine_ext, extract_attributes, int_or_none, + traverse_obj, + unified_strdate, ) class FranceCultureIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks', + # playlist + 'url': 'https://www.franceculture.fr/emissions/serie/hasta-dente', + 'playlist_count': 12, + 'info_dict': { + 'id': 'hasta-dente', + 'title': 'Hasta Dente', + 'description': 'md5:57479af50648d14e9bb649e6b1f8f911', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20201024', + }, + 'playlist': [{ + 'info_dict': { + 'id': '3c1c2e55-41a0-11e5-9fe0-005056a87c89', + 'ext': 'mp3', + 'title': 'Jeudi, vous avez dit bizarre ?', + 'description': 'md5:47cf1e00cc21c86b0210279996a812c6', + 'duration': 604, + 'upload_date': '20201024', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1603576680 + }, + }, + ], + }, { + 'url': 'https://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks', 'info_dict': { 'id': 'rendez-vous-au-pays-des-geeks', 'display_id': 'rendez-vous-au-pays-des-geeks', @@ -20,9 +47,9 @@ class FranceCultureIE(InfoExtractor): 'title': 'Rendez-vous au pays des geeks', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140301', - 'timestamp': 1393700400, 'vcodec': 'none', - } + 'duration': 3569, + }, }, { # no thumbnail 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018', @@ -31,9 +58,54 @@ class FranceCultureIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + info = { + 'id': display_id, + 'title': self._html_search_regex( + r'(?s)]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)', + webpage, 'title', default=self._og_search_title(webpage)), + 'description': self._html_search_regex( + r'(?s)]+class="excerpt"[^>]*>(.*?)', webpage, 'description', default=None), + 'thumbnail': self._og_search_thumbnail(webpage), + 'uploader': self._html_search_regex( + r'(?s)(.*?)', webpage, 'uploader', default=None), + 'upload_date': unified_strdate(self._html_search_regex( + r'(?s)class="teaser-text-date".*?(\d{2}/\d{2}/\d{4})', webpage, 'date', default=None)), + } + + playlist_data = self._search_regex( + r'''(?sx) + ]+data-xiti-place="[^"]*?liste_episodes[^"?]*?"[^>]*> + (.*?) + + ''', + webpage, 'playlist data', fatal=False, default=None) + + if playlist_data: + entries = [] + for item, item_description in re.findall( + r'(?s)(]*>).*?]*class="[^"]*teaser-text-chapo[^>]*>(.*?)

', + playlist_data): + + item_attributes = extract_attributes(item) + entries.append({ + 'id': item_attributes.get('data-emission-uuid'), + 'url': item_attributes.get('data-url'), + 'title': item_attributes.get('data-diffusion-title'), + 'duration': int_or_none(traverse_obj(item_attributes, 'data-duration-seconds', 'data-duration-seconds')), + 'description': item_description, + 'timestamp': int_or_none(item_attributes.get('data-start-time')), + 'thumbnail': info['thumbnail'], + 'uploader': info['uploader'], + }) + + return { + '_type': 'playlist', + 'entries': entries, + **info + } + video_data = extract_attributes(self._search_regex( r'''(?sx) (?: @@ -43,31 +115,14 @@ class FranceCultureIE(InfoExtractor): (]+data-(?:url|asset-source)="[^"]+"[^>]+>) ''', webpage, 'video data')) - - video_url = video_data.get('data-url') or video_data['data-asset-source'] - title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage) - - description = self._html_search_regex( - r'(?s)]+class="intro"[^>]*>.*?

(.+?)

', - webpage, 'description', default=None) - thumbnail = self._search_regex( - r'(?s)]+itemtype="https://schema.org/ImageObject"[^>]*>.*?]+(?:data-dejavu-)?src="([^"]+)"', - webpage, 'thumbnail', default=None) - uploader = self._html_search_regex( - r'(?s)(.*?)', - webpage, 'uploader', default=None) + video_url = traverse_obj(video_data, 'data-url', 'data-asset-source') ext = determine_ext(video_url.lower()) return { - 'id': display_id, 'display_id': display_id, 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, 'ext': ext, 'vcodec': 'none' if ext == 'mp3' else None, - 'uploader': uploader, - 'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')), 'duration': int_or_none(video_data.get('data-duration')), + **info } From 497a6c5f573b1d8b7cdc93af5ed2f76ed548d0b6 Mon Sep 17 00:00:00 2001 From: Soebb <87156166+Soebb@users.noreply.github.com> Date: Thu, 17 Mar 2022 04:14:21 +0330 Subject: [PATCH 0817/2552] [daftsex] Fix extractor (#2757) Closes #2637 Authored by: Soebb --- yt_dlp/extractor/daftsex.py | 97 +++++++++++++++++++++++++++++++------ 1 file changed, 82 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/daftsex.py b/yt_dlp/extractor/daftsex.py index 03672b35d..6037fd9ca 100644 --- a/yt_dlp/extractor/daftsex.py +++ b/yt_dlp/extractor/daftsex.py @@ -4,30 +4,50 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( - get_elements_by_class, int_or_none, js_to_json, parse_count, parse_duration, + traverse_obj, try_get, + unified_timestamp, ) class DaftsexIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?daftsex\.com/watch/(?P-?\d+_\d+)' _TESTS = [{ + 'url': 'https://daftsex.com/watch/-35370899_456246186', + 'md5': 'd95135e6cea2d905bea20dbe82cda64a', + 'info_dict': { + 'id': '-35370899_456246186', + 'ext': 'mp4', + 'title': 'just relaxing', + 'description': 'just relaxing - Watch video Watch video in high quality', + 'upload_date': '20201113', + 'timestamp': 1605261911, + 'thumbnail': r're:https://[^/]+/impf/-43BuMDIawmBGr3GLcZ93CYwWf2PBv_tVWoS1A/dnu41DnARU4\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=6af2c26ff4a45e55334189301c867384&type=video_thumb', + }, + }, { 'url': 'https://daftsex.com/watch/-156601359_456242791', 'info_dict': { 'id': '-156601359_456242791', 'ext': 'mp4', 'title': 'Skye Blue - Dinner And A Show', + 'description': 'Skye Blue - Dinner And A Show - Watch video Watch video in high quality', + 'upload_date': '20200916', + 'timestamp': 1600250735, + 'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ', }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = get_elements_by_class('heading', webpage)[-1] + title = self._html_search_meta('name', webpage, 'title') + timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None)) + description = self._html_search_meta('description', webpage, 'Description', default=None) + duration = parse_duration(self._search_regex( r'Duration: ((?:[0-9]{2}:){0,2}[0-9]{2})', webpage, 'duration', fatal=False)) @@ -52,28 +72,75 @@ class DaftsexIE(InfoExtractor): video_id, transform_source=js_to_json) server_domain = 'https://%s' % compat_b64decode(video_params['server'][::-1]).decode('utf-8') + + cdn_files = traverse_obj(video_params, ('video', 'cdn_files')) or {} + if cdn_files: + formats = [] + for format_id, format_data in cdn_files.items(): + ext, height = format_id.split('_') + formats.append({ + 'format_id': format_id, + 'url': f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={format_data.split(".")[-1]}', + 'height': int_or_none(height), + 'ext': ext, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'duration': duration, + 'thumbnail': try_get(video_params, lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8')), + 'timestamp': timestamp, + 'view_count': views, + 'age_limit': 18, + } + + item = self._download_json( + f'{server_domain}/method/video.get/{video_id}', video_id, + headers={'Referer': url}, query={ + 'token': video_params['video']['access_token'], + 'videos': video_id, + 'ckey': video_params['c_key'], + 'credentials': video_params['video']['credentials'], + })['response']['items'][0] + formats = [] - for format_id, format_data in video_params['video']['cdn_files'].items(): - ext, height = format_id.split('_') - extra_quality_data = format_data.split('.')[-1] - url = f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={extra_quality_data}' - formats.append({ - 'format_id': format_id, - 'url': url, - 'height': int_or_none(height), - 'ext': ext, - }) + for f_id, f_url in item.get('files', {}).items(): + if f_id == 'external': + return self.url_result(f_url) + ext, height = f_id.split('_') + height_extra_key = traverse_obj(video_params, ('video', 'partial', 'quality', height)) + if height_extra_key: + formats.append({ + 'format_id': f'{height}p', + 'url': f'{server_domain}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}', + 'height': int_or_none(height), + 'ext': ext, + }) self._sort_formats(formats) - thumbnail = try_get(video_params, - lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8')) + thumbnails = [] + for k, v in item.items(): + if k.startswith('photo_') and v: + width = k.replace('photo_', '') + thumbnails.append({ + 'id': width, + 'url': v, + 'width': int_or_none(width), + }) return { 'id': video_id, 'title': title, 'formats': formats, + 'comment_count': int_or_none(item.get('comments')), + 'description': description, 'duration': duration, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, + 'timestamp': timestamp, 'view_count': views, 'age_limit': 18, } From 7bdcb4a40eb7168ea9f921c38025641d41e38a60 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Thu, 17 Mar 2022 21:22:14 +0900 Subject: [PATCH 0818/2552] [niconico] Rewrite NiconicoIE (#3018) Closes https://github.com/yt-dlp/yt-dlp/issues/2636, partially fixes https://github.com/yt-dlp/yt-dlp/issues/367 Authored by: Lesmiscore --- README.md | 3 + yt_dlp/extractor/niconico.py | 539 +++++++++++++++-------------------- 2 files changed, 236 insertions(+), 306 deletions(-) diff --git a/README.md b/README.md index db50790d4..af9ff2c6d 100644 --- a/README.md +++ b/README.md @@ -1683,6 +1683,9 @@ The following extractors use this feature: #### vikichannel * `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` +#### niconico +* `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.** + #### youtubewebarchive * `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures` diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 8f56fc95b..77c07417b 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -2,36 +2,36 @@ from __future__ import unicode_literals import datetime -import itertools import functools +import itertools import json import re +import time from .common import InfoExtractor, SearchInfoExtractor -from ..postprocessor.ffmpeg import FFmpegPostProcessor from ..compat import ( - compat_str, compat_parse_qs, compat_urllib_parse_urlparse, compat_HTTPError, ) from ..utils import ( ExtractorError, - dict_get, + OnDemandPagedList, + bug_reports_message, + clean_html, float_or_none, int_or_none, - OnDemandPagedList, + join_nonempty, parse_duration, + parse_filesize, parse_iso8601, - PostProcessingError, remove_start, - str_or_none, traverse_obj, try_get, unescapeHTML, - unified_timestamp, + update_url_query, + url_or_none, urlencode_postdata, - xpath_text, ) @@ -41,7 +41,7 @@ class NiconicoIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.nicovideo.jp/watch/sm22312215', - 'md5': 'a5bad06f1347452102953f323c69da34s', + 'md5': 'd1a75c0823e2f629128c43e1212760f9', 'info_dict': { 'id': 'sm22312215', 'ext': 'mp4', @@ -164,14 +164,23 @@ class NiconicoIE(InfoExtractor): }, { 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'only_matching': True, + }, { + 'note': 'a video that is only served as an ENCRYPTED HLS.', + 'url': 'https://www.nicovideo.jp/watch/so38016254', + 'only_matching': True, }] - _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' + _VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P(?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' - + _COMMENT_API_ENDPOINTS = ( + 'https://nvcomment.nicovideo.jp/legacy/api.json', + 'https://nmsg.nicovideo.jp/api.json',) _API_HEADERS = { 'X-Frontend-ID': '6', - 'X-Frontend-Version': '0' + 'X-Frontend-Version': '0', + 'X-Niconico-Language': 'en-us', + 'Referer': 'https://www.nicovideo.jp/', + 'Origin': 'https://www.nicovideo.jp', } def _real_initialize(self): @@ -189,10 +198,17 @@ class NiconicoIE(InfoExtractor): 'mail_tel': username, 'password': password, } + self._request_webpage( + 'https://account.nicovideo.jp/login', None, + note='Acquiring Login session') urlh = self._request_webpage( - 'https://account.nicovideo.jp/api/v1/login', None, + 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None, note='Logging in', errnote='Unable to log in', - data=urlencode_postdata(login_form_strs)) + data=urlencode_postdata(login_form_strs), + headers={ + 'Referer': 'https://account.nicovideo.jp/login', + 'Content-Type': 'application/x-www-form-urlencoded', + }) if urlh is False: login_ok = False else: @@ -204,8 +220,8 @@ class NiconicoIE(InfoExtractor): return login_ok def _get_heartbeat_info(self, info_dict): - video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/') + dmc_protocol = info_dict['_expected_protocol'] api_data = ( info_dict.get('_api_data') @@ -220,49 +236,50 @@ class NiconicoIE(InfoExtractor): session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0]) def ping(): - status = try_get( - self._download_json( - 'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id, - query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])}, - note='Acquiring permission for downloading video', - headers=self._API_HEADERS), - lambda x: x['meta']['status']) - if status != 200: - self.report_warning('Failed to acquire permission for playing video. The video may not download.') + tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId')) + if tracking_id: + tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id}) + watch_request_response = self._download_json( + tracking_url, video_id, + note='Acquiring permission for downloading video', fatal=False, + headers=self._API_HEADERS) + if traverse_obj(watch_request_response, ('meta', 'status')) != 200: + self.report_warning('Failed to acquire permission for playing video. Video download may fail.') yesno = lambda x: 'yes' if x else 'no' - # m3u8 (encryption) - if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None: + if dmc_protocol == 'http': + protocol = 'http' + protocol_parameters = { + 'http_output_download_parameters': { + 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']), + 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']), + } + } + elif dmc_protocol == 'hls': protocol = 'm3u8' - encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption'] - session_api_http_parameters = { - 'parameters': { - 'hls_parameters': { - 'encryption': { - encryption: { - 'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']), - 'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri']) - } - }, - 'transfer_preset': '', - 'use_ssl': yesno(session_api_endpoint['isSsl']), - 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']), - 'segment_duration': 6000, - } + segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000 + parsed_token = self._parse_json(session_api_data['token'], video_id) + encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption')) + protocol_parameters = { + 'hls_parameters': { + 'segment_duration': segment_duration, + 'transfer_preset': '', + 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']), + 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']), } } - # http - else: - protocol = 'http' - session_api_http_parameters = { - 'parameters': { - 'http_output_download_parameters': { - 'use_ssl': yesno(session_api_endpoint['isSsl']), - 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']), + if 'hls_encryption' in parsed_token and encryption: + protocol_parameters['hls_parameters']['encryption'] = { + parsed_token['hls_encryption']: { + 'encrypted_key': encryption['encryptedKey'], + 'key_uri': encryption['keyUri'], } } - } + else: + protocol = 'm3u8_native' + else: + raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}') session_response = self._download_json( session_api_endpoint['url'], video_id, @@ -296,11 +313,13 @@ class NiconicoIE(InfoExtractor): 'lifetime': session_api_data.get('heartbeatLifetime') } }, - 'priority': session_api_data.get('priority'), + 'priority': session_api_data['priority'], 'protocol': { 'name': 'http', 'parameters': { - 'http_parameters': session_api_http_parameters + 'http_parameters': { + 'parameters': protocol_parameters + } } }, 'recipe_id': session_api_data.get('recipeId'), @@ -328,36 +347,35 @@ class NiconicoIE(InfoExtractor): return info_dict, heartbeat_info_dict - def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality): - def parse_format_id(id_code): - mobj = re.match(r'''(?x) - (?:archive_)? - (?:(?P[^_]+)_)? - (?:(?P
[\d]+)kbps_)? - (?:(?P[\d+]+)p_)? - ''', '%s_' % id_code) - return mobj.groupdict() if mobj else {} - - protocol = 'niconico_dmc' - format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality])) - vdict = parse_format_id(video_quality['id']) - adict = parse_format_id(audio_quality['id']) - resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')} - vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float) + def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol): + + if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'): + return None + + def extract_video_quality(video_quality): + return parse_filesize('%sB' % self._search_regex( + r'\| ([0-9]*\.?[0-9]*[MK])', video_quality, 'vbr', default='')) + + format_id = '-'.join( + [remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol]) + + vid_qual_label = traverse_obj(video_quality, ('metadata', 'label')) + vid_quality = traverse_obj(video_quality, ('metadata', 'bitrate')) return { - 'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']), + 'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']), 'format_id': format_id, - 'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str), + 'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '), 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4 - 'vcodec': vdict.get('codec'), - 'acodec': adict.get('codec'), - 'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')), - 'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')), - 'height': int_or_none(resolution.get('height', vdict.get('res'))), - 'width': int_or_none(resolution.get('width')), - 'quality': -2 if 'low' in format_id else -1, # Default quality value is -1 - 'protocol': protocol, + 'acodec': 'aac', + 'vcodec': 'h264', + 'abr': float_or_none(traverse_obj(audio_quality, ('metadata', 'bitrate')), 1000), + 'vbr': float_or_none(vid_quality if vid_quality > 0 else extract_video_quality(vid_qual_label), 1000), + 'height': traverse_obj(video_quality, ('metadata', 'resolution', 'height')), + 'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')), + 'quality': -2 if 'low' in video_quality['id'] else None, + 'protocol': 'niconico_dmc', + '_expected_protocol': dmc_protocol, 'http_headers': { 'Origin': 'https://www.nicovideo.jp', 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, @@ -367,248 +385,157 @@ class NiconicoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - # Get video webpage for API data. - webpage, handle = self._download_webpage_handle( - 'http://www.nicovideo.jp/watch/' + video_id, video_id) - if video_id.startswith('so'): - video_id = self._match_id(handle.geturl()) - - api_data = self._parse_json(self._html_search_regex( - 'data-api-data="([^"]+)"', webpage, - 'API data', default='{}'), video_id) - - def get_video_info_web(items): - return dict_get(api_data['video'], items) - - # Get video info - video_info_xml = self._download_xml( - 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, - video_id, note='Downloading video info page') - - def get_video_info_xml(items): - if not isinstance(items, list): - items = [items] - for item in items: - ret = xpath_text(video_info_xml, './/' + item) - if ret: - return ret - - if get_video_info_xml('error'): - error_code = get_video_info_xml('code') - - if error_code == 'DELETED': - raise ExtractorError('The video has been deleted.', - expected=True) - elif error_code == 'NOT_FOUND': - raise ExtractorError('The video is not found.', - expected=True) - elif error_code == 'COMMUNITY': - self.to_screen('%s: The video is community members only.' % video_id) - else: - raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code)) + try: + webpage, handle = self._download_webpage_handle( + 'http://www.nicovideo.jp/watch/' + video_id, video_id) + if video_id.startswith('so'): + video_id = self._match_id(handle.geturl()) + + api_data = self._parse_json(self._html_search_regex( + 'data-api-data="([^"]+)"', webpage, + 'API data', default='{}'), video_id) + except ExtractorError as e: + try: + api_data = self._download_json( + 'https://www.nicovideo.jp/api/watch/v3/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id, + note='Downloading API JSON', errnote='Unable to fetch data')['data'] + except ExtractorError: + if not isinstance(e.cause, compat_HTTPError): + raise + webpage = e.cause.read().decode('utf-8', 'replace') + error_msg = self._html_search_regex( + r'(?s)(.+?)', + webpage, 'error reason', default=None) + if not error_msg: + raise + raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True) - # Start extracting video formats formats = [] - # Get HTML5 videos info - quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie']) - if not quality_info: - raise ExtractorError('The video can\'t be downloaded', expected=True) - - for audio_quality in quality_info.get('audios') or {}: - for video_quality in quality_info.get('videos') or {}: - if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'): - continue - formats.append(self._extract_format_for_quality( - api_data, video_id, audio_quality, video_quality)) + def get_video_info(*items, get_first=True, **kwargs): + return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs) - # Get flv/swf info - timestamp = None - video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url']) - if video_real_url: - is_economy = video_real_url.endswith('low') - - if is_economy: - self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams') - - # Invoking ffprobe to determine resolution - pp = FFmpegPostProcessor(self._downloader) - cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n') - - self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe')) - - try: - metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies]) - except PostProcessingError as err: - raise ExtractorError(err.msg, expected=True) - - v_stream = a_stream = {} - - # Some complex swf files doesn't have video stream (e.g. nm4809023) - for stream in metadata['streams']: - if stream['codec_type'] == 'video': - v_stream = stream - elif stream['codec_type'] == 'audio': - a_stream = stream - - # Community restricted videos seem to have issues with the thumb API not returning anything at all - filesize = int( - (get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low')) - or metadata['format']['size'] - ) - extension = ( - get_video_info_xml('movie_type') - or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name'] - ) - - # 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'. - timestamp = ( - parse_iso8601(get_video_info_web('first_retrieve')) - or unified_timestamp(get_video_info_web('postedDateTime')) - ) - metadata_timestamp = ( - parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time'])) - or timestamp if extension != 'mp4' else 0 - ) - - # According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts - smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00') - - is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0 - - # If movie file size is unstable, old server movie is not source movie. - if filesize > 1: - formats.append({ - 'url': video_real_url, - 'format_id': 'smile' if not is_economy else 'smile_low', - 'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality', - 'ext': extension, - 'container': extension, - 'vcodec': v_stream.get('codec_name'), - 'acodec': a_stream.get('codec_name'), - # Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209) - 'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000), - 'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000), - 'abr': int_or_none(a_stream.get('bit_rate'), scale=1000), - 'height': int_or_none(v_stream.get('height')), - 'width': int_or_none(v_stream.get('width')), - 'source_preference': 5 if not is_economy else -2, - 'quality': 5 if is_source and not is_economy else None, - 'filesize': filesize - }) + quality_info = api_data['media']['delivery']['movie'] + session_api_data = quality_info['session'] + for (audio_quality, video_quality, protocol) in itertools.product(quality_info['audios'], quality_info['videos'], session_api_data['protocols']): + fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol) + if fmt: + formats.append(fmt) self._sort_formats(formats) # Start extracting information - title = ( - get_video_info_xml('title') # prefer to get the untranslated original title - or get_video_info_web(['originalTitle', 'title']) - or self._og_search_title(webpage, default=None) - or self._html_search_regex( - r']+class="videoHeaderTitle"[^>]*>([^<]+)', - webpage, 'video title')) - - watch_api_data_string = self._html_search_regex( - r']+id="watchAPIDataContainer"[^>]+>([^<]+)', - webpage, 'watch api data', default=None) - watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} - video_detail = watch_api_data.get('videoDetail', {}) - - thumbnail = ( - self._html_search_regex(r'', webpage, 'thumbnail data', default=None) - or dict_get( # choose highest from 720p to 240p - get_video_info_web('thumbnail'), - ['ogp', 'player', 'largeUrl', 'middleUrl', 'url']) - or self._html_search_meta('image', webpage, 'thumbnail', default=None) - or video_detail.get('thumbnail')) - - description = get_video_info_web('description') - - if not timestamp: - match = self._html_search_meta('datePublished', webpage, 'date published', default=None) - if match: - timestamp = parse_iso8601(match.replace('+', ':00+')) - if not timestamp and video_detail.get('postedAt'): - timestamp = parse_iso8601( - video_detail['postedAt'].replace('/', '-'), - delimiter=' ', timezone=datetime.timedelta(hours=9)) - timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt'])) - - view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount'])) - if not view_count: - match = self._html_search_regex( - r'>Views: ]*>([^<]+)', - webpage, 'view count', default=None) - if match: - view_count = int_or_none(match.replace(',', '')) - view_count = ( - view_count - or video_detail.get('viewCount') - or try_get(api_data, lambda x: x['video']['count']['view'])) - - comment_count = ( - int_or_none(get_video_info_web('comment_num')) - or video_detail.get('commentCount') - or try_get(api_data, lambda x: x['video']['count']['comment'])) - - if not comment_count: - match = self._html_search_regex( - r'>Comments: ]*>([^<]+)', - webpage, 'comment count', default=None) - if match: - comment_count = int_or_none(match.replace(',', '')) - - duration = (parse_duration( - get_video_info_web('length') - or self._html_search_meta( - 'video:duration', webpage, 'video duration', default=None)) - or video_detail.get('length') - or get_video_info_web('duration')) - - webpage_url = get_video_info_web('watch_url') or url - - # for channel movie and community movie - channel_id = try_get( - api_data, - (lambda x: x['channel']['globalId'], - lambda x: x['community']['globalId'])) - channel = try_get( - api_data, - (lambda x: x['channel']['name'], - lambda x: x['community']['name'])) - - # Note: cannot use api_data.get('owner', {}) because owner may be set to "null" - # in the JSON, which will cause None to be returned instead of {}. - owner = try_get(api_data, lambda x: x.get('owner'), dict) or {} - uploader_id = str_or_none( - get_video_info_web(['ch_id', 'user_id']) - or owner.get('id') - or channel_id - ) - uploader = ( - get_video_info_web(['ch_name', 'user_nickname']) - or owner.get('nickname') - or channel - ) + tags = None + if webpage: + # use og:video:tag (not logged in) + og_video_tags = re.finditer(r'', webpage) + tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags))) + if not tags: + # use keywords and split with comma (not logged in) + kwds = self._html_search_meta('keywords', webpage, default=None) + if kwds: + tags = [x for x in kwds.split(',') if x] + if not tags: + # find in json (logged in) + tags = traverse_obj(api_data, ('tag', 'items', ..., 'name')) return { 'id': video_id, '_api_data': api_data, - 'title': title, + 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None), 'formats': formats, - 'thumbnail': thumbnail, - 'description': description, - 'uploader': uploader, - 'timestamp': timestamp, - 'uploader_id': uploader_id, - 'channel': channel, - 'channel_id': channel_id, - 'view_count': view_count, - 'comment_count': comment_count, - 'duration': duration, - 'webpage_url': webpage_url, + 'thumbnail': get_video_info('thumbnail', 'url') or self._html_search_meta( + ('image', 'og:image'), webpage, 'thumbnail', default=None), + 'description': clean_html(get_video_info('description')), + 'uploader': traverse_obj(api_data, ('owner', 'nickname')), + 'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601( + self._html_search_meta('video:release_date', webpage, 'date published', default=None)), + 'uploader_id': traverse_obj(api_data, ('owner', 'id')), + 'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')), + 'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')), + 'view_count': int_or_none(get_video_info('count', 'view')), + 'tags': tags, + 'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')), + 'comment_count': get_video_info('count', 'comment', expected_type=int), + 'duration': ( + parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None)) + or get_video_info('duration')), + 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}', + 'subtitles': self.extract_subtitles(video_id, api_data, session_api_data), } + def _get_subtitles(self, video_id, api_data, session_api_data): + comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey')) + user_id_str = session_api_data.get('serviceUserId') + + thread_ids = [x for x in traverse_obj(api_data, ('comment', 'threads')) or [] if x['isActive']] + raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key) + if not raw_danmaku: + self.report_warning(f'Failed to get comments. {bug_reports_message()}') + return + return { + 'comments': [{ + 'ext': 'json', + 'data': json.dumps(raw_danmaku), + }], + } + + def _extract_all_comments(self, video_id, threads, user_id, user_key): + auth_data = { + 'user_id': user_id, + 'userkey': user_key, + } if user_id and user_key else {'user_id': ''} + + # Request Start + post_data = [{'ping': {'content': 'rs:0'}}] + for i, thread in enumerate(threads): + thread_id = thread['id'] + thread_fork = thread['fork'] + # Post Start (2N) + post_data.append({'ping': {'content': f'ps:{i * 2}'}}) + post_data.append({'thread': { + 'fork': thread_fork, + 'language': 0, + 'nicoru': 3, + 'scores': 1, + 'thread': thread_id, + 'version': '20090904', + 'with_global': 1, + **auth_data, + }}) + # Post Final (2N) + post_data.append({'ping': {'content': f'pf:{i * 2}'}}) + + # Post Start (2N+1) + post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}}) + post_data.append({'thread_leaves': { + # format is '-:, Date: Thu, 17 Mar 2022 23:11:36 +0900 Subject: [PATCH 0819/2552] [RUTV] Fix format sorting (#3085) Closes #3084 Authored by: Lesmiscore --- yt_dlp/extractor/rutv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index 66ac32deb..0ea8253fa 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -181,7 +181,6 @@ class RUTVIE(InfoExtractor): 'rtmp_live': True, 'ext': 'flv', 'vbr': str_to_int(quality), - 'quality': preference, } elif transport == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -192,9 +191,10 @@ class RUTVIE(InfoExtractor): 'url': url } fmt.update({ - 'width': width, - 'height': height, + 'width': int_or_none(quality, default=height, invscale=width, scale=height), + 'height': int_or_none(quality, default=height), 'format_id': '%s-%s' % (transport, quality), + 'source_preference': preference, }) formats.append(fmt) From 7e6a187096b869f8ffc34fc11caf08cbd6c776f9 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Thu, 17 Mar 2022 21:24:15 +0700 Subject: [PATCH 0820/2552] [Huya] Add extractor (#3035) Closes #3033 Authored by: hatienl0i261299 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/huya.py | 138 +++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 yt_dlp/extractor/huya.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 09b795c56..4eda27cdc 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -622,6 +622,7 @@ from .hse import ( HSEProductIE, ) from .huajiao import HuajiaoIE +from .huya import HuyaLiveIE from .huffpost import HuffPostIE from .hungama import ( HungamaIE, diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py new file mode 100644 index 000000000..b81439682 --- /dev/null +++ b/yt_dlp/extractor/huya.py @@ -0,0 +1,138 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hashlib +import random + +from ..compat import compat_urlparse, compat_b64decode + +from ..utils import ( + ExtractorError, + int_or_none, + js_to_json, + str_or_none, + try_get, + unescapeHTML, + update_url_query, +) + +from .common import InfoExtractor + + +class HuyaLiveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P[^/#?&]+)(?:\D|$)' + IE_NAME = 'huya:live' + IE_DESC = 'huya.com' + TESTS = [{ + 'url': 'https://www.huya.com/572329', + 'info_dict': { + 'id': '572329', + 'title': str, + 'description': str, + 'is_live': True, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.huya.com/xiaoyugame', + 'only_matching': True + }] + + _RESOLUTION = { + '蓝光4M': { + 'width': 1920, + 'height': 1080, + }, + '超清': { + 'width': 1280, + 'height': 720, + }, + '流畅': { + 'width': 800, + 'height': 480 + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id=video_id) + json_stream = self._search_regex(r'"stream":\s+"([a-zA-Z0-9+=/]+)"', webpage, 'stream', default=None) + if not json_stream: + raise ExtractorError('Video is offline', expected=True) + stream_data = self._parse_json(compat_b64decode(json_stream).decode(), video_id=video_id, + transform_source=js_to_json) + room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo']) + if not room_info: + raise ExtractorError('Can not extract the room info', expected=True) + title = room_info.get('roomName') or room_info.get('introduction') or self._html_search_regex( + r'([^<]+)', webpage, 'title') + screen_type = room_info.get('screenType') + live_source_type = room_info.get('liveSourceType') + stream_info_list = stream_data['data'][0]['gameStreamInfoList'] + formats = [] + for stream_info in stream_info_list: + stream_url = stream_info.get('sFlvUrl') + if not stream_url: + continue + stream_name = stream_info.get('sStreamName') + re_secret = not screen_type and live_source_type in (0, 8, 13) + params = dict(compat_urlparse.parse_qsl(unescapeHTML(stream_info['sFlvAntiCode']))) + fm, ss = '', '' + if re_secret: + fm, ss = self.encrypt(params, stream_info, stream_name) + for si in stream_data.get('vMultiStreamInfo'): + rate = si.get('iBitRate') + if rate: + params['ratio'] = rate + else: + params.pop('ratio', None) + if re_secret: + params['wsSecret'] = hashlib.md5( + '_'.join([fm, params['u'], stream_name, ss, params['wsTime']])) + formats.append({ + 'ext': stream_info.get('sFlvUrlSuffix'), + 'format_id': str_or_none(stream_info.get('iLineIndex')), + 'tbr': rate, + 'url': update_url_query(f'{stream_url}/{stream_name}.{stream_info.get("sFlvUrlSuffix")}', + query=params), + **self._RESOLUTION.get(si.get('sDisplayName'), {}), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'view_count': room_info.get('totalCount'), + 'thumbnail': room_info.get('screenshot'), + 'description': room_info.get('contentIntro'), + 'http_headers': { + 'Origin': 'https://www.huya.com', + 'Referer': 'https://www.huya.com/', + }, + } + + def encrypt(self, params, stream_info, stream_name): + ct = int_or_none(params.get('wsTime'), 16) + random.random() + presenter_uid = stream_info['lPresenterUid'] + if not stream_name.startswith(str(presenter_uid)): + uid = presenter_uid + else: + uid = int_or_none(ct % 1e7 * 1e6 % 0xffffffff) + u1 = uid & 0xffffffff00000000 + u2 = uid & 0xffffffff + u3 = uid & 0xffffff + u = u1 | u2 >> 24 | u3 << 8 + params.update({ + 'u': str_or_none(u), + 'seqid': str_or_none(int_or_none(ct * 1000) + uid), + 'ver': '1', + 'uuid': int_or_none(ct % 1e7 * 1e6 % 0xffffffff), + 't': '100', + }) + fm = compat_b64decode(params['fm']).decode().split('_', 1)[0] + ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']])) + return fm, ss From 5a373d97686ae3876ba42f102cf3163f3680a1e4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 18 Mar 2022 03:01:00 +0530 Subject: [PATCH 0821/2552] [veo] Fix `_VALID_URL` Closes #3095 --- yt_dlp/extractor/veo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index 4e57a52d1..9bc41114a 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -12,7 +12,7 @@ from ..utils import ( class VeoIE(InfoExtractor): - _VALID_URL = r'https?://app\.veo\.co/matches/(?P[0-9A-Za-z-]+)' + _VALID_URL = r'https?://app\.veo\.co/matches/(?P[0-9A-Za-z-_]+)' _TESTS = [{ 'url': 'https://app.veo.co/matches/20201027-last-period/', @@ -25,6 +25,9 @@ class VeoIE(InfoExtractor): 'timestamp': 1603847208, 'duration': 1916, } + }, { + 'url': 'https://app.veo.co/matches/20220313-2022-03-13_u15m-plsjq-vs-csl/', + 'only_matching': True, }] def _real_extract(self, url): From 16c620bc55c762781ab579ed21e24df6937db63f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 18 Mar 2022 03:25:20 +0530 Subject: [PATCH 0822/2552] Handle float in `--wait-for-video` Closes #3082 --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d6b284529..38ff628b0 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1428,7 +1428,7 @@ class YoutubeDL(object): min_wait, max_wait = self.params.get('wait_for_video') diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time()) if diff is None and ie_result.get('live_status') == 'is_upcoming': - diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait) + diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0) self.report_warning('Release time of video is not known') elif (diff or 0) <= 0: self.report_warning('Video should already be available according to extracted info') From e4b98809cffd285c41cf938706e54596d9ba54b2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 18 Mar 2022 05:23:51 +0530 Subject: [PATCH 0823/2552] [youtube] Fix pagination of `membership` tab --- yt_dlp/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4143116ef..d6c74f455 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4026,6 +4026,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): continue known_renderers = { + 'videoRenderer': (self._grid_entries, 'items'), # for membership tab 'gridPlaylistRenderer': (self._grid_entries, 'items'), 'gridVideoRenderer': (self._grid_entries, 'items'), 'gridChannelRenderer': (self._grid_entries, 'items'), From 43c38abd1f151ca16e63d7d32c833bfd9c8d726d Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Fri, 18 Mar 2022 05:49:31 -0400 Subject: [PATCH 0824/2552] [ParamountPlus,CBS] Change VALID_URL (#3098) Closes #3096 Authored by: Sipherdrakon --- yt_dlp/extractor/cbs.py | 28 ++++++++++++++-------------- yt_dlp/extractor/paramountplus.py | 20 +++++++++++++------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index ae9ce5862..2af36ea82 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -77,21 +77,21 @@ class CBSIE(CBSBaseIE): (?: cbs:| https?://(?:www\.)?(?: - cbs\.com/(?:shows/[^/]+/video|movies/[^/]+)/| + cbs\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/| colbertlateshow\.com/(?:video|podcasts)/) )(?P[\w-]+)''' # All tests are blocked outside US _TESTS = [{ - 'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', + 'url': 'https://www.cbs.com/shows/video/xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R/', 'info_dict': { - 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_', + 'id': 'xrUyNLtl9wd8D_RWWAg9NU2F_V6QpB3R', 'ext': 'mp4', - 'title': 'Connect Chat feat. Garth Brooks', - 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', - 'duration': 1495, - 'timestamp': 1385585425, - 'upload_date': '20131127', + 'title': 'Tough As Nails - Dreams Never Die', + 'description': 'md5:a3535a62531cdd52b0364248a2c1ae33', + 'duration': 2588, + 'timestamp': 1639015200, + 'upload_date': '20211209', 'uploader': 'CBSI-NEW', }, 'params': { @@ -99,14 +99,14 @@ class CBSIE(CBSBaseIE): 'skip_download': True, }, }, { - 'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-', + 'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/', 'info_dict': { - 'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2', - 'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)', - 'timestamp': 1624507140, - 'description': 'md5:e01af24e95c74d55e8775aef86117b95', + 'id': 'sZH1MGgomIosZgxGJ1l263MFq16oMtW1', + 'title': 'The Late Show - 3/16/22 (Michael Buble, Rose Matafeo)', + 'timestamp': 1647488100, + 'description': 'md5:d0e6ec23c544b7fa8e39a8e6844d2439', 'uploader': 'CBSI-NEW', - 'upload_date': '20210624', + 'upload_date': '20220317', }, 'params': { 'ignore_no_formats_error': True, diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py index a1d7cd724..94a9319ea 100644 --- a/yt_dlp/extractor/paramountplus.py +++ b/yt_dlp/extractor/paramountplus.py @@ -14,12 +14,12 @@ class ParamountPlusIE(CBSBaseIE): (?: paramountplus:| https?://(?:www\.)?(?: - paramountplus\.com/(?:shows/[^/]+/video|movies/[^/]+)/ + paramountplus\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/ )(?P[\w-]+))''' # All tests are blocked outside US _TESTS = [{ - 'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/', + 'url': 'https://www.paramountplus.com/shows/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/', 'info_dict': { 'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k', 'ext': 'mp4', @@ -34,7 +34,7 @@ class ParamountPlusIE(CBSBaseIE): 'skip_download': 'm3u8', }, }, { - 'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/', + 'url': 'https://www.paramountplus.com/shows/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/', 'info_dict': { 'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd', 'ext': 'mp4', @@ -49,7 +49,7 @@ class ParamountPlusIE(CBSBaseIE): 'skip_download': 'm3u8', }, }, { - 'url': 'https://www.paramountplus.com/movies/daddys-home/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC', + 'url': 'https://www.paramountplus.com/movies/video/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC/', 'info_dict': { 'id': 'vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC', 'ext': 'mp4', @@ -64,7 +64,7 @@ class ParamountPlusIE(CBSBaseIE): }, 'expected_warnings': ['Ignoring subtitle tracks'], # TODO: Investigate this }, { - 'url': 'https://www.paramountplus.com/movies/sonic-the-hedgehog/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc', + 'url': 'https://www.paramountplus.com/movies/video/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc/', 'info_dict': { 'id': '5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc', 'ext': 'mp4', @@ -79,10 +79,16 @@ class ParamountPlusIE(CBSBaseIE): }, 'expected_warnings': ['Ignoring subtitle tracks'], }, { - 'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/', + 'url': 'https://www.paramountplus.com/shows/the-real-world/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/the-real-world-reunion/', 'only_matching': True, }, { - 'url': 'https://www.paramountplus.com/movies/million-dollar-american-princesses-meghan-and-harry/C0LpgNwXYeB8txxycdWdR9TjxpJOsdCq', + 'url': 'https://www.paramountplus.com/shows/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/', + 'only_matching': True, + }, { + 'url': 'https://www.paramountplus.com/movies/video/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/', + 'only_matching': True, + }, { + 'url': 'https://www.paramountplus.com/movies/paw-patrol-the-movie/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/', 'only_matching': True, }] From 028f6437f1cb45bb9b3b286cba173b0588337feb Mon Sep 17 00:00:00 2001 From: Luc Ritchie Date: Fri, 18 Mar 2022 05:53:07 -0400 Subject: [PATCH 0825/2552] [afreecatv] Match new vod url (#3097) Authored by: wlritchi --- yt_dlp/extractor/afreecatv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index f25fc47fa..e8118d931 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -32,7 +32,7 @@ class AfreecaTVIE(InfoExtractor): /app/(?:index|read_ucc_bbs)\.cgi| /player/[Pp]layer\.(?:swf|html) )\?.*?\bnTitleNo=| - vod\.afreecatv\.com/PLAYER/STATION/ + vod\.afreecatv\.com/(PLAYER/STATION|player)/ ) (?P\d+) ''' @@ -170,6 +170,9 @@ class AfreecaTVIE(InfoExtractor): }, { 'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030', 'only_matching': True, + }, { + 'url': 'http://vod.afreecatv.com/player/15055030', + 'only_matching': True, }] @staticmethod From 52efa4b31200119adaa8acf33e50b84fcb6948f0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 19 Mar 2022 02:23:33 +0530 Subject: [PATCH 0826/2552] [extractor] Add `_perform_login` function (#2943) * Adds new functions `_initialize_pre_login` and `_perform_login` as part of the extractor API * Adds `ie.supports_login` to the public API --- test/test_iqiyi_sdk_interpreter.py | 9 ++---- test/test_netrc.py | 13 ++++---- yt_dlp/extractor/abematv.py | 10 +----- yt_dlp/extractor/adn.py | 5 +-- yt_dlp/extractor/afreecatv.py | 9 +----- yt_dlp/extractor/alura.py | 9 +----- yt_dlp/extractor/animelab.py | 35 ++++++++------------ yt_dlp/extractor/animeondemand.py | 9 +----- yt_dlp/extractor/atresplayer.py | 9 +----- yt_dlp/extractor/bbc.py | 9 +----- yt_dlp/extractor/bilibili.py | 9 +----- yt_dlp/extractor/canvas.py | 9 +----- yt_dlp/extractor/common.py | 42 ++++++++++++++++++++---- yt_dlp/extractor/crunchyroll.py | 8 +---- yt_dlp/extractor/curiositystream.py | 7 ++-- yt_dlp/extractor/digitalconcerthall.py | 8 ++--- yt_dlp/extractor/eroprofile.py | 9 +----- yt_dlp/extractor/facebook.py | 11 ++----- yt_dlp/extractor/fancode.py | 38 ++++++++++------------ yt_dlp/extractor/frontendmasters.py | 9 +----- yt_dlp/extractor/funimation.py | 13 ++------ yt_dlp/extractor/gaia.py | 30 ++++++++--------- yt_dlp/extractor/generic.py | 1 + yt_dlp/extractor/hidive.py | 8 ++--- yt_dlp/extractor/hrti.py | 15 ++++----- yt_dlp/extractor/imggaming.py | 13 ++++---- yt_dlp/extractor/instagram.py | 8 ++--- yt_dlp/extractor/iprima.py | 16 ++++----- yt_dlp/extractor/iqiyi.py | 10 +----- yt_dlp/extractor/lecturio.py | 9 +----- yt_dlp/extractor/linkedin.py | 7 ++-- yt_dlp/extractor/linuxacademy.py | 9 +----- yt_dlp/extractor/lynda.py | 11 ++----- yt_dlp/extractor/nebula.py | 6 ++-- yt_dlp/extractor/niconico.py | 11 +------ yt_dlp/extractor/njpwworld.py | 10 +----- yt_dlp/extractor/noco.py | 9 +----- yt_dlp/extractor/packtpub.py | 5 +-- yt_dlp/extractor/patreon.py | 8 +---- yt_dlp/extractor/piapro.py | 10 ++---- yt_dlp/extractor/platzi.py | 9 +----- yt_dlp/extractor/playplustv.py | 12 +++---- yt_dlp/extractor/pluralsight.py | 9 +----- yt_dlp/extractor/pokergo.py | 10 +++--- yt_dlp/extractor/roosterteeth.py | 8 +---- yt_dlp/extractor/safari.py | 9 +----- yt_dlp/extractor/scte.py | 9 +----- yt_dlp/extractor/shahid.py | 8 ++--- yt_dlp/extractor/sonyliv.py | 16 ++++----- yt_dlp/extractor/soundcloud.py | 34 ++++++++----------- yt_dlp/extractor/teachable.py | 3 +- yt_dlp/extractor/teamtreehouse.py | 7 ++-- yt_dlp/extractor/tennistv.py | 9 +++--- yt_dlp/extractor/toutv.py | 7 ++-- yt_dlp/extractor/tubitv.py | 8 +---- yt_dlp/extractor/tumblr.py | 12 ++----- yt_dlp/extractor/twitch.py | 9 +----- yt_dlp/extractor/udemy.py | 9 +----- yt_dlp/extractor/vidio.py | 9 ++---- yt_dlp/extractor/viewlift.py | 3 -- yt_dlp/extractor/viki.py | 9 +----- yt_dlp/extractor/vimeo.py | 20 +++--------- yt_dlp/extractor/vk.py | 9 +----- yt_dlp/extractor/vlive.py | 16 +++------ yt_dlp/extractor/vrv.py | 12 ++----- yt_dlp/extractor/youtube.py | 23 ++++--------- yt_dlp/extractor/zattoo.py | 14 +++----- yt_dlp/extractor/zee5.py | 45 ++++++++++++-------------- 68 files changed, 254 insertions(+), 570 deletions(-) diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py index ee039f898..adbae4690 100644 --- a/test/test_iqiyi_sdk_interpreter.py +++ b/test/test_iqiyi_sdk_interpreter.py @@ -12,11 +12,6 @@ from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import IqiyiIE -class IqiyiIEWithCredentials(IqiyiIE): - def _get_login_info(self): - return 'foo', 'bar' - - class WarningLogger(object): def __init__(self): self.messages = [] @@ -40,8 +35,8 @@ class TestIqiyiSDKInterpreter(unittest.TestCase): If `sign` is incorrect, /validate call throws an HTTP 556 error ''' logger = WarningLogger() - ie = IqiyiIEWithCredentials(FakeYDL({'logger': logger})) - ie._login() + ie = IqiyiIE(FakeYDL({'logger': logger})) + ie._perform_login('foo', 'bar') self.assertTrue('unable to log in:' in logger.messages[0]) diff --git a/test/test_netrc.py b/test/test_netrc.py index 36b943591..94a703406 100644 --- a/test/test_netrc.py +++ b/test/test_netrc.py @@ -7,18 +7,19 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from yt_dlp.extractor import ( - gen_extractors, -) +from yt_dlp.extractor import gen_extractor_classes +from yt_dlp.extractor.common import InfoExtractor + +NO_LOGIN = InfoExtractor._perform_login class TestNetRc(unittest.TestCase): def test_netrc_present(self): - for ie in gen_extractors(): - if not hasattr(ie, '_login'): + for ie in gen_extractor_classes(): + if ie._perform_login is NO_LOGIN: continue self.assertTrue( - hasattr(ie, '_NETRC_MACHINE'), + ie._NETRC_MACHINE, 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME) diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 360fa4699..a839f0c1f 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -291,15 +291,7 @@ class AbemaTVIE(AbemaTVBaseIE): return self._MEDIATOKEN - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - # No authentication to be performed - if not username: - return True - + def _perform_login(self, username, password): if '@' in username: # don't strictly check if it's email address or not ep, method = 'user/email', 'email' else: diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 0863e0d85..fca6e605d 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -126,10 +126,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' }]) return subtitles - def _real_initialize(self): - username, password = self._get_login_info() - if not username: - return + def _perform_login(self, username, password): try: access_token = (self._download_json( self._API_BASE_URL + 'authentication/login', None, diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index e8118d931..77f0e3c10 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -184,14 +184,7 @@ class AfreecaTVIE(InfoExtractor): video_key['part'] = int(m.group('part')) return video_key - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_form = { 'szWork': 'login', 'szType': 'json', diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index f5325de2f..d2e2df270 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -74,14 +74,7 @@ class AluraIE(InfoExtractor): "formats": formats } - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - pass + def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login popup') diff --git a/yt_dlp/extractor/animelab.py b/yt_dlp/extractor/animelab.py index 4fb7ee424..1c2cc47dd 100644 --- a/yt_dlp/extractor/animelab.py +++ b/yt_dlp/extractor/animelab.py @@ -15,25 +15,21 @@ from ..compat import compat_HTTPError class AnimeLabBaseIE(InfoExtractor): - _LOGIN_REQUIRED = True _LOGIN_URL = 'https://www.animelab.com/login' _NETRC_MACHINE = 'animelab' + _LOGGED_IN = False - def _login(self): - def is_logged_in(login_webpage): - return 'Sign In' not in login_webpage + def _is_logged_in(self, login_page=None): + if not self._LOGGED_IN: + if not login_page: + login_page = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page') + AnimeLabBaseIE._LOGGED_IN = 'Sign In' not in login_page + return self._LOGGED_IN - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - - # Check if already logged in - if is_logged_in(login_page): + def _perform_login(self, username, password): + if self._is_logged_in(): return - (username, password) = self._get_login_info() - if username is None and self._LOGIN_REQUIRED: - self.raise_login_required('Login is required to access any AnimeLab content') - login_form = { 'email': username, 'password': password, @@ -47,17 +43,14 @@ class AnimeLabBaseIE(InfoExtractor): except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: raise ExtractorError('Unable to log in (wrong credentials?)', expected=True) - else: - raise + raise - # if login was successful - if is_logged_in(response): - return - - raise ExtractorError('Unable to login (cannot verify if logged in)') + if not self._is_logged_in(response): + raise ExtractorError('Unable to login (cannot verify if logged in)') def _real_initialize(self): - self._login() + if not self._is_logged_in(): + self.raise_login_required('Login is required to access any AnimeLab content') class AnimeLabIE(AnimeLabBaseIE): diff --git a/yt_dlp/extractor/animeondemand.py b/yt_dlp/extractor/animeondemand.py index 5694f7240..2e674d58f 100644 --- a/yt_dlp/extractor/animeondemand.py +++ b/yt_dlp/extractor/animeondemand.py @@ -53,11 +53,7 @@ class AnimeOnDemandIE(InfoExtractor): 'only_matching': True, }] - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -93,9 +89,6 @@ class AnimeOnDemandIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') - def _real_initialize(self): - self._login() - def _real_extract(self, url): anime_id = self._match_id(url) diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 6d843966a..465af4ed3 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -37,9 +37,6 @@ class AtresPlayerIE(InfoExtractor): ] _API_BASE = 'https://api.atresplayer.com/' - def _real_initialize(self): - self._login() - def _handle_error(self, e, code): if isinstance(e.cause, compat_HTTPError) and e.cause.code == code: error = self._parse_json(e.cause.read(), None) @@ -48,11 +45,7 @@ class AtresPlayerIE(InfoExtractor): raise ExtractorError(error['error_description'], expected=True) raise - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): self._request_webpage( self._API_BASE + 'login', None, 'Downloading login page') diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index b664a7007..823155730 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -264,11 +264,7 @@ class BBCCoUkIE(InfoExtractor): 'only_matching': True, }] - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading signin page') @@ -294,9 +290,6 @@ class BBCCoUkIE(InfoExtractor): 'Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') - def _real_initialize(self): - self._login() - class MediaSelectionError(Exception): def __init__(self, id): self.id = id diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 1bbf7ca1c..b4eb20642 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -821,11 +821,7 @@ class BiliIntlBaseIE(InfoExtractor): 'extractor_key': BiliIntlIE.ie_key(), } - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): try: from Cryptodome.PublicKey import RSA from Cryptodome.Cipher import PKCS1_v1_5 @@ -856,9 +852,6 @@ class BiliIntlBaseIE(InfoExtractor): else: raise ExtractorError('Unable to log in') - def _real_initialize(self): - self._login() - class BiliIntlIE(BiliIntlBaseIE): _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P\d+)/(?P\d+)' diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py index 82fded4e1..31e7d7de6 100644 --- a/yt_dlp/extractor/canvas.py +++ b/yt_dlp/extractor/canvas.py @@ -274,14 +274,7 @@ class VrtNUIE(GigyaBaseIE): _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' _CONTEXT_ID = 'R3595707040' - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): auth_info = self._gigya_login({ 'APIKey': self._APIKEY, 'targetEnv': 'jssdk', diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 345da9a72..f3ae3fd4c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -432,7 +432,15 @@ class InfoExtractor(object): Subclasses may also override suitable() if necessary, but ensure the function signature is preserved and that this function imports everything it needs - (except other extractors), so that lazy_extractors works correctly + (except other extractors), so that lazy_extractors works correctly. + + To support username + password (or netrc) login, the extractor must define a + _NETRC_MACHINE and re-define _perform_login(username, password) and + (optionally) _initialize_pre_login() methods. The _perform_login method will + be called between _initialize_pre_login and _real_initialize if credentials + are passed by the user. In cases where it is necessary to have the login + process as part of the extraction rather than initialization, _perform_login + can be left undefined. _GEO_BYPASS attribute may be set to False in order to disable geo restriction bypass mechanisms for a particular extractor. @@ -460,9 +468,10 @@ class InfoExtractor(object): _GEO_COUNTRIES = None _GEO_IP_BLOCKS = None _WORKING = True + _NETRC_MACHINE = None _LOGIN_HINTS = { - 'any': 'Use --cookies, --username and --password, or --netrc to provide account credentials', + 'any': 'Use --cookies, --cookies-from-browser, --username and --password, or --netrc to provide account credentials', 'cookies': ( 'Use --cookies-from-browser or --cookies for the authentication. ' 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), @@ -512,6 +521,10 @@ class InfoExtractor(object): """Getter method for _WORKING.""" return cls._WORKING + @classmethod + def supports_login(cls): + return bool(cls._NETRC_MACHINE) + def initialize(self): """Initializes an instance (authentication, etc).""" self._printed_messages = set() @@ -520,6 +533,13 @@ class InfoExtractor(object): 'ip_blocks': self._GEO_IP_BLOCKS, }) if not self._ready: + self._initialize_pre_login() + if self.supports_login(): + username, password = self._get_login_info() + if username: + self._perform_login(username, password) + elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE): + self.report_warning(f'Login with password is not supported for this website. {self._LOGIN_HINTS["cookies"]}') self._real_initialize() self._ready = True @@ -665,6 +685,14 @@ class InfoExtractor(object): """Sets a YoutubeDL instance as the downloader for this IE.""" self._downloader = downloader + def _initialize_pre_login(self): + """ Intialization before login. Redefine in subclasses.""" + pass + + def _perform_login(self, username, password): + """ Login with username and password. Redefine in subclasses.""" + pass + def _real_initialize(self): """Real initialization process. Redefine in subclasses.""" pass @@ -1098,12 +1126,15 @@ class InfoExtractor(object): def raise_login_required( self, msg='This video is only available for registered users', - metadata_available=False, method='any'): + metadata_available=False, method=NO_DEFAULT): if metadata_available and ( self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg) return + if method is NO_DEFAULT: + method = 'any' if self.supports_login() else 'cookies' if method is not None: + assert method in self._LOGIN_HINTS, 'Invalid login method' msg = '%s. %s' % (msg, self._LOGIN_HINTS[method]) raise ExtractorError(msg, expected=True) @@ -3680,9 +3711,8 @@ class InfoExtractor(object): def mark_watched(self, *args, **kwargs): if not self.get_param('mark_watched', False): return - if (hasattr(self, '_NETRC_MACHINE') and self._get_login_info()[0] is not None - or self.get_param('cookiefile') - or self.get_param('cookiesfrombrowser')): + if (self.supports_login() and self._get_login_info()[0] is not None + or self.get_param('cookiefile') or self.get_param('cookiesfrombrowser')): self._mark_watched(*args, **kwargs) def _mark_watched(self, *args, **kwargs): diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index b6ba5ef56..bf1bf8c1c 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -57,10 +57,7 @@ class CrunchyrollBaseIE(InfoExtractor): 'Content-Type': 'application/x-www-form-urlencoded', }) - def _login(self): - username, password = self._get_login_info() - if username is None: - return + def _perform_login(self, username, password): if self._get_cookies(self._LOGIN_URL).get('etp_rt'): return @@ -89,9 +86,6 @@ class CrunchyrollBaseIE(InfoExtractor): if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): raise ExtractorError('Login succeeded but did not set etp_rt cookie') - def _real_initialize(self): - self._login() - @staticmethod def _add_skip_wall(url): parsed_url = compat_urlparse.urlparse(url) diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 485b6031f..b8abcf7a5 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -33,14 +33,11 @@ class CuriosityStreamBaseIE(InfoExtractor): self._handle_errors(result) return result['data'] - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - return + def _perform_login(self, username, password): result = self._download_json( 'https://api.curiositystream.com/v1/login', None, note='Logging in', data=urlencode_postdata({ - 'email': email, + 'email': username, 'password': password, })) self._handle_errors(result) diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 9b302a9a0..8398ae30e 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -45,10 +45,7 @@ class DigitalConcertHallIE(InfoExtractor): 'playlist_count': 3, }] - def _login(self): - username, password = self._get_login_info() - if not username: - self.raise_login_required() + def _perform_login(self, username, password): token_response = self._download_json( self._OAUTH_URL, None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({ @@ -78,7 +75,8 @@ class DigitalConcertHallIE(InfoExtractor): self.raise_login_required(msg='Login info incorrect') def _real_initialize(self): - self._login() + if not self._ACCESS_TOKEN: + self.raise_login_required(method='password') def _entries(self, items, language, **kwargs): for item in items: diff --git a/yt_dlp/extractor/eroprofile.py b/yt_dlp/extractor/eroprofile.py index a8396f1d3..5d5e7f244 100644 --- a/yt_dlp/extractor/eroprofile.py +++ b/yt_dlp/extractor/eroprofile.py @@ -39,11 +39,7 @@ class EroProfileIE(InfoExtractor): 'skip': 'Requires login', }] - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): query = compat_urllib_parse_urlencode({ 'username': username, 'password': password, @@ -62,9 +58,6 @@ class EroProfileIE(InfoExtractor): r']+?src="([^"]+)"', login_page, 'login redirect url') self._download_webpage(redirect_url, None, False) - def _real_initialize(self): - self._login() - def _real_extract(self, url): display_id = self._match_id(url) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index ef57b221c..2deed585f 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -329,11 +329,7 @@ class FacebookIE(InfoExtractor): urls.append(mobj.group('url')) return urls - def _login(self): - useremail, password = self._get_login_info() - if useremail is None: - return - + def _perform_login(self, username, password): login_page_req = sanitized_Request(self._LOGIN_URL) self._set_cookie('facebook.com', 'locale', 'en_US') login_page = self._download_webpage(login_page_req, None, @@ -345,7 +341,7 @@ class FacebookIE(InfoExtractor): lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd') login_form = { - 'email': useremail, + 'email': username, 'pass': password, 'lsd': lsd, 'lgnrnd': lgnrnd, @@ -392,9 +388,6 @@ class FacebookIE(InfoExtractor): self.report_warning('unable to log in: %s' % error_to_compat_str(err)) return - def _real_initialize(self): - self._login() - def _extract_from_url(self, url, video_id): webpage = self._download_webpage( url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index 978df31ff..7ea16c61d 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -49,30 +49,26 @@ class FancodeVodIE(InfoExtractor): 'referer': 'https://fancode.com', } - def _login(self): + def _perform_login(self, username, password): # Access tokens are shortlived, so get them using the refresh token. - username, password = self._get_login_info() - if username == 'refresh' and password is not None: - self.report_login() - data = '''{ - "query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}", - "variables":{ - "refreshToken":"%s" - }, - "operationName":"RefreshToken" - }''' % password - - token_json = self.download_gql('refresh token', data, "Getting the Access token") - self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken']) - if self._ACCESS_TOKEN is None: - self.report_warning('Failed to get Access token') - else: - self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN}) - elif username is not None: + if username != 'refresh': self.report_warning(f'Login using username and password is not currently supported. {self._LOGIN_HINT}') - def _real_initialize(self): - self._login() + self.report_login() + data = '''{ + "query":"mutation RefreshToken($refreshToken: String\\u0021) { refreshToken(refreshToken: $refreshToken) { accessToken }}", + "variables":{ + "refreshToken":"%s" + }, + "operationName":"RefreshToken" + }''' % password + + token_json = self.download_gql('refresh token', data, "Getting the Access token") + self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken']) + if self._ACCESS_TOKEN is None: + self.report_warning('Failed to get Access token') + else: + self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN}) def _check_login_required(self, is_available, is_premium): msg = None diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index 0d29da29b..fc67a8437 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -28,14 +28,7 @@ class FrontendMastersBaseIE(InfoExtractor): 'high': {'width': 1920, 'height': 1080} } - def _real_initialize(self): - self._login() - - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 96dad2ca3..36a9c4772 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -36,9 +36,8 @@ class FunimationBaseIE(InfoExtractor): note='Checking geo-location', errnote='Unable to fetch geo-location information'), 'region') or 'US' - def _login(self): - username, password = self._get_login_info() - if username is None: + def _perform_login(self, username, password): + if self._TOKEN: return try: data = self._download_json( @@ -47,7 +46,7 @@ class FunimationBaseIE(InfoExtractor): 'username': username, 'password': password, })) - return data['token'] + FunimationBaseIE._TOKEN = data['token'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: error = self._parse_json(e.cause.read().decode(), None)['error'] @@ -90,8 +89,6 @@ class FunimationPageIE(FunimationBaseIE): def _real_initialize(self): if not self._REGION: FunimationBaseIE._REGION = self._get_region() - if not self._TOKEN: - FunimationBaseIE._TOKEN = self._login() def _real_extract(self, url): locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode') @@ -154,10 +151,6 @@ class FunimationIE(FunimationBaseIE): }, }] - def _real_initialize(self): - if not self._TOKEN: - FunimationBaseIE._TOKEN = self._login() - @staticmethod def _get_experiences(episode): for lang, lang_data in episode.get('languages', {}).items(): diff --git a/yt_dlp/extractor/gaia.py b/yt_dlp/extractor/gaia.py index 7821fb783..5b0195c63 100644 --- a/yt_dlp/extractor/gaia.py +++ b/yt_dlp/extractor/gaia.py @@ -56,24 +56,22 @@ class GaiaIE(InfoExtractor): def _real_initialize(self): auth = self._get_cookies('https://www.gaia.com/').get('auth') if auth: - auth = self._parse_json( - compat_urllib_parse_unquote(auth.value), - None, fatal=False) - if not auth: - username, password = self._get_login_info() - if username is None: - return - auth = self._download_json( - 'https://auth.gaia.com/v1/login', - None, data=urlencode_postdata({ - 'username': username, - 'password': password - })) - if auth.get('success') is False: - raise ExtractorError(', '.join(auth['messages']), expected=True) - if auth: + auth = self._parse_json(compat_urllib_parse_unquote(auth.value), None, fatal=False) self._jwt = auth.get('jwt') + def _perform_login(self, username, password): + if self._jwt: + return + auth = self._download_json( + 'https://auth.gaia.com/v1/login', + None, data=urlencode_postdata({ + 'username': username, + 'password': password + })) + if auth.get('success') is False: + raise ExtractorError(', '.join(auth['messages']), expected=True) + self._jwt = auth.get('jwt') + def _real_extract(self, url): display_id, vtype = self._match_valid_url(url).groups() node_id = self._download_json( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 6a8b8543b..97e34808f 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -153,6 +153,7 @@ class GenericIE(InfoExtractor): IE_DESC = 'Generic downloader that works on some sites' _VALID_URL = r'.*' IE_NAME = 'generic' + _NETRC_MACHINE = False # Supress username warning _TESTS = [ # Direct link to a video { diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 15bd444f9..46d7d62ab 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -35,18 +35,14 @@ class HiDiveIE(InfoExtractor): 'skip': 'Requires Authentication', }] - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - return - + def _perform_login(self, username, password): webpage = self._download_webpage(self._LOGIN_URL, None) form = self._search_regex( r'(?s)]+action="/account/login"[^>]*>(.+?)', webpage, 'login form') data = self._hidden_inputs(form) data.update({ - 'Email': email, + 'Email': username, 'Password': password, }) self._download_webpage( diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py index dc5b9670c..36d600773 100644 --- a/yt_dlp/extractor/hrti.py +++ b/yt_dlp/extractor/hrti.py @@ -27,8 +27,9 @@ class HRTiBaseIE(InfoExtractor): _APP_VERSION = '1.1' _APP_PUBLICATION_ID = 'all_in_one' _API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json' + _token = None - def _initialize_api(self): + def _initialize_pre_login(self): init_data = { 'application_publication_id': self._APP_PUBLICATION_ID } @@ -64,12 +65,7 @@ class HRTiBaseIE(InfoExtractor): self._logout_url = modules['user']['resources']['logout']['uri'] - def _login(self): - username, password = self._get_login_info() - # TODO: figure out authentication with cookies - if username is None or password is None: - self.raise_login_required() - + def _perform_login(self, username, password): auth_data = { 'username': username, 'password': password, @@ -94,8 +90,9 @@ class HRTiBaseIE(InfoExtractor): self._token = auth_info['secure_streaming_token'] def _real_initialize(self): - self._initialize_api() - self._login() + if not self._token: + # TODO: figure out authentication with cookies + self.raise_login_required(method='password') class HRTiIE(HRTiBaseIE): diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py index 230dc86d3..ce7b21ab2 100644 --- a/yt_dlp/extractor/imggaming.py +++ b/yt_dlp/extractor/imggaming.py @@ -21,25 +21,26 @@ class ImgGamingBaseIE(InfoExtractor): _REALM = None _VALID_URL_TEMPL = r'https?://(?P%s)/(?Plive|playlist|video)/(?P\d+)(?:\?.*?\bplaylistId=(?P\d+))?' - def _real_initialize(self): + def _initialize_pre_login(self): self._HEADERS = { 'Realm': 'dce.' + self._REALM, 'x-api-key': self._API_KEY, } - email, password = self._get_login_info() - if email is None: - self.raise_login_required() - + def _perform_login(self, username, password): p_headers = self._HEADERS.copy() p_headers['Content-Type'] = 'application/json' self._HEADERS['Authorization'] = 'Bearer ' + self._download_json( self._API_BASE + 'login', None, 'Logging in', data=json.dumps({ - 'id': email, + 'id': username, 'secret': password, }).encode(), headers=p_headers)['authorisationToken'] + def _real_initialize(self): + if not self._HEADERS.get('Authorization'): + self.raise_login_required(method='password') + def _call_api(self, path, media_id): return self._download_json( self._API_BASE + path + media_id, media_id, headers=self._HEADERS) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 3bb786d6a..970f2c8ab 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -29,9 +29,8 @@ class InstagramBaseIE(InfoExtractor): _NETRC_MACHINE = 'instagram' _IS_LOGGED_IN = False - def _login(self): - username, password = self._get_login_info() - if username is None or self._IS_LOGGED_IN: + def _perform_login(self, username, password): + if self._IS_LOGGED_IN: return login_webpage = self._download_webpage( @@ -72,9 +71,6 @@ class InstagramBaseIE(InfoExtractor): raise ExtractorError('Unable to login') InstagramBaseIE._IS_LOGGED_IN = True - def _real_initialize(self): - self._login() - def _get_count(self, media, kind, *keys): return traverse_obj( media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys), diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index 347fec1d5..1a2038453 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -65,11 +65,9 @@ class IPrimaIE(InfoExtractor): 'only_matching': True, }] - def _login(self): - username, password = self._get_login_info() - - if username is None or password is None: - self.raise_login_required('Login is required to access any iPrima content', method='password') + def _perform_login(self, username, password): + if self.access_token: + return login_page = self._download_webpage( self._LOGIN_URL, None, note='Downloading login page', @@ -105,16 +103,16 @@ class IPrimaIE(InfoExtractor): if self.access_token is None: raise ExtractorError('Getting token failed', expected=True) + def _real_initialize(self): + if not self.access_token: + self.raise_login_required('Login is required to access any iPrima content', method='password') + def _raise_access_error(self, error_code): if error_code == 'PLAY_GEOIP_DENIED': self.raise_geo_restricted(countries=['CZ'], metadata_available=True) elif error_code is not None: self.raise_no_formats('Access to stream infos forbidden', expected=True) - def _real_initialize(self): - if not self.access_token: - self._login() - def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index fdcf14469..dc4667744 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -241,9 +241,6 @@ class IqiyiIE(InfoExtractor): '18': 7, # 1080p } - def _real_initialize(self): - self._login() - @staticmethod def _rsa_fun(data): # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js @@ -252,12 +249,7 @@ class IqiyiIE(InfoExtractor): return ohdave_rsa_encrypt(data, e, N) - def _login(self): - username, password = self._get_login_info() - - # No authentication to be performed - if not username: - return True + def _perform_login(self, username, password): data = self._download_json( 'http://kylin.iqiyi.com/get_token', None, diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 9d2228700..0ee1eeb4d 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -22,14 +22,7 @@ class LecturioBaseIE(InfoExtractor): _LOGIN_URL = 'https://app.lecturio.com/en/login' _NETRC_MACHINE = 'lecturio' - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): # Sets some cookies _, urlh = self._download_webpage_handle( self._LOGIN_URL, None, 'Downloading login popup') diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index bd76ae166..bf549e164 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -25,12 +25,9 @@ class LinkedInBaseIE(InfoExtractor): _NETRC_MACHINE = 'linkedin' _logged_in = False - def _real_initialize(self): + def _perform_login(self, username, password): if self._logged_in: return - email, password = self._get_login_info() - if email is None: - return login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -39,7 +36,7 @@ class LinkedInBaseIE(InfoExtractor): default='https://www.linkedin.com/uas/login-submit', group='url')) data = self._hidden_inputs(login_page) data.update({ - 'session_key': email, + 'session_key': username, 'session_password': password, }) login_submit_page = self._download_webpage( diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py index 2053970d1..6aff88e13 100644 --- a/yt_dlp/extractor/linuxacademy.py +++ b/yt_dlp/extractor/linuxacademy.py @@ -75,14 +75,7 @@ class LinuxAcademyIE(InfoExtractor): _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' _NETRC_MACHINE = 'linuxacademy' - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): def random_string(): return ''.join([ random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py index 58cf17239..ce304743f 100644 --- a/yt_dlp/extractor/lynda.py +++ b/yt_dlp/extractor/lynda.py @@ -21,9 +21,6 @@ class LyndaBaseIE(InfoExtractor): _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' _NETRC_MACHINE = 'lynda' - def _real_initialize(self): - self._login() - @staticmethod def _check_error(json_string, key_or_keys): keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys @@ -32,7 +29,7 @@ class LyndaBaseIE(InfoExtractor): if error: raise ExtractorError('Unable to login: %s' % error, expected=True) - def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url): + def _perform_login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url): action_url = self._search_regex( r']+action=(["\'])(?P.+?)\1', form_html, 'post url', default=fallback_action_url, group='url') @@ -55,11 +52,7 @@ class LyndaBaseIE(InfoExtractor): return response, action_url - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): # Step 1: download signin page signin_page = self._download_webpage( self._SIGNIN_URL, None, 'Downloading signin page') diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index d235805c3..b77ef5f28 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -148,14 +148,12 @@ class NebulaBaseIE(InfoExtractor): 'creator': episode['channel_title'], } - def _login(self): + def _perform_login(self, username=None, password=None): + # FIXME: username should be passed from here to inner functions self._nebula_api_token = self._retrieve_nebula_api_token() self._nebula_bearer_token = self._fetch_nebula_bearer_token() self._zype_access_token = self._fetch_zype_access_token() - def _real_initialize(self): - self._login() - class NebulaIE(NebulaBaseIE): _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P[-\w]+)' diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 77c07417b..74828f833 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -183,16 +183,7 @@ class NiconicoIE(InfoExtractor): 'Origin': 'https://www.nicovideo.jp', } - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - # No authentication to be performed - if not username: - return True - - # Log in + def _perform_login(self, username, password): login_ok = True login_form_strs = { 'mail_tel': username, diff --git a/yt_dlp/extractor/njpwworld.py b/yt_dlp/extractor/njpwworld.py index 89380d039..68c8c8e52 100644 --- a/yt_dlp/extractor/njpwworld.py +++ b/yt_dlp/extractor/njpwworld.py @@ -43,15 +43,7 @@ class NJPWWorldIE(InfoExtractor): _LOGIN_URL = 'https://front.njpwworld.com/auth/login' - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - # No authentication to be performed - if not username: - return True - + def _perform_login(self, username, password): # Setup session (will set necessary cookies) self._request_webpage( 'https://njpwworld.com/', None, note='Setting up session') diff --git a/yt_dlp/extractor/noco.py b/yt_dlp/extractor/noco.py index 78c4952f4..28af909d5 100644 --- a/yt_dlp/extractor/noco.py +++ b/yt_dlp/extractor/noco.py @@ -61,14 +61,7 @@ class NocoIE(InfoExtractor): } ] - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login = self._download_json( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata({ diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py index c06fca795..62c52cd6e 100644 --- a/yt_dlp/extractor/packtpub.py +++ b/yt_dlp/extractor/packtpub.py @@ -47,10 +47,7 @@ class PacktPubIE(PacktPubBaseIE): _NETRC_MACHINE = 'packtpub' _TOKEN = None - def _real_initialize(self): - username, password = self._get_login_info() - if username is None: - return + def _perform_login(self, username, password): try: self._TOKEN = self._download_json( 'https://services.packtpub.com/auth-v1/users/tokens', None, diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index d3ee071e0..963a0d6fb 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -88,11 +88,7 @@ class PatreonIE(InfoExtractor): # Currently Patreon exposes download URL via hidden CSS, so login is not # needed. Keeping this commented for when this inevitably changes. ''' - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_form = { 'redirectUrl': 'http://www.patreon.com/', 'email': username, @@ -108,8 +104,6 @@ class PatreonIE(InfoExtractor): if re.search(r'onLoginFailed', login_page): raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) - def _real_initialize(self): - self._login() ''' def _real_extract(self, url): diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index 497e1edbc..c4eb4913f 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -29,13 +29,9 @@ class PiaproIE(InfoExtractor): } }] - def _real_initialize(self): - self._login_status = self._login() + _login_status = False - def _login(self): - username, password = self._get_login_info() - if not username: - return False + def _perform_login(self, username, password): login_ok = True login_form_strs = { '_username': username, @@ -57,7 +53,7 @@ class PiaproIE(InfoExtractor): if not login_ok: self.report_warning( 'unable to log in: bad username or password') - return login_ok + self._login_status = login_ok def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py index 23c8256b5..17f52e7f4 100644 --- a/yt_dlp/extractor/platzi.py +++ b/yt_dlp/extractor/platzi.py @@ -22,14 +22,7 @@ class PlatziBaseIE(InfoExtractor): _LOGIN_URL = 'https://platzi.com/login/' _NETRC_MACHINE = 'platzi' - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py index fd72a3717..cad2c3a0f 100644 --- a/yt_dlp/extractor/playplustv.py +++ b/yt_dlp/extractor/playplustv.py @@ -38,14 +38,10 @@ class PlayPlusTVIE(InfoExtractor): 'Authorization': 'Bearer ' + self._token, }, query=query) - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - self.raise_login_required() - + def _perform_login(self, username, password): req = PUTRequest( 'https://api.playplus.tv/api/web/login', json.dumps({ - 'email': email, + 'email': username, 'password': password, }).encode(), { 'Content-Type': 'application/json; charset=utf-8', @@ -61,6 +57,10 @@ class PlayPlusTVIE(InfoExtractor): self._profile = self._call_api('Profiles')['list'][0]['_id'] + def _real_initialize(self): + if not self._token: + self.raise_login_required(method='password') + def _real_extract(self, url): project_id, media_id = self._match_valid_url(url).groups() media = self._call_api( diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index 801057ee1..2a5e0e488 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -162,14 +162,7 @@ query viewClip { } }''' - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') diff --git a/yt_dlp/extractor/pokergo.py b/yt_dlp/extractor/pokergo.py index d27031c91..c9e2fed12 100644 --- a/yt_dlp/extractor/pokergo.py +++ b/yt_dlp/extractor/pokergo.py @@ -15,11 +15,9 @@ class PokerGoBaseIE(InfoExtractor): _AUTH_TOKEN = None _PROPERTY_ID = '1dfb3940-7d53-4980-b0b0-f28b369a000d' - def _login(self): - username, password = self._get_login_info() - if not username: - self.raise_login_required(method='password') - + def _perform_login(self, username, password): + if self._AUTH_TOKEN: + return self.report_login() PokerGoBaseIE._AUTH_TOKEN = self._download_json( f'https://subscription.pokergo.com/properties/{self._PROPERTY_ID}/sign-in', None, @@ -30,7 +28,7 @@ class PokerGoBaseIE(InfoExtractor): def _real_initialize(self): if not self._AUTH_TOKEN: - self._login() + self.raise_login_required(method='password') class PokerGoIE(PokerGoBaseIE): diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py index 652fdd116..a55dd4f8b 100644 --- a/yt_dlp/extractor/roosterteeth.py +++ b/yt_dlp/extractor/roosterteeth.py @@ -21,10 +21,7 @@ class RoosterTeethBaseIE(InfoExtractor): _API_BASE = 'https://svod-be.roosterteeth.com' _API_BASE_URL = f'{_API_BASE}/api/v1' - def _login(self): - username, password = self._get_login_info() - if username is None: - return + def _perform_login(self, username, password): if self._get_cookies(self._API_BASE_URL).get('rt_access_token'): return @@ -47,9 +44,6 @@ class RoosterTeethBaseIE(InfoExtractor): msg += ': ' + error self.report_warning(msg) - def _real_initialize(self): - self._login() - def _extract_video_info(self, data): thumbnails = [] for image in traverse_obj(data, ('included', 'images')): diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index cca4464ca..7b4571daa 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -25,14 +25,7 @@ class SafariBaseIE(InfoExtractor): LOGGED_IN = False - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): _, urlh = self._download_webpage_handle( 'https://learning.oreilly.com/accounts/login-check/', None, 'Downloading login page') diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py index ca1de63b6..7215cf5d1 100644 --- a/yt_dlp/extractor/scte.py +++ b/yt_dlp/extractor/scte.py @@ -14,14 +14,7 @@ class SCTEBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' _NETRC_MACHINE = 'scte' - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_popup = self._download_webpage( self._LOGIN_URL, None, 'Downloading login popup') diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index 42de41a11..ab45d9ce4 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -79,16 +79,12 @@ class ShahidIE(ShahidBaseIE): 'only_matching': True }] - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - return - + def _perform_login(self, username, password): try: user_data = self._download_json( 'https://shahid.mbc.net/wd/service/users/login', None, 'Logging in', data=json.dumps({ - 'email': email, + 'email': username, 'password': password, 'basic': 'false', }).encode('utf-8'), headers={ diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py index a5026b2e0..5b6849fc9 100644 --- a/yt_dlp/extractor/sonyliv.py +++ b/yt_dlp/extractor/sonyliv.py @@ -75,9 +75,12 @@ class SonyLIVIE(InfoExtractor): t[i] = '{:x}'.format(3 & n | 8) return ''.join(t) + '-' + str(int(time.time() * 1000)) - def _login(self, username, password): + def _perform_login(self, username, password): + self._HEADERS['device_id'] = self._get_device_id() + self._HEADERS['content-type'] = 'application/json' + if username.lower() == 'token' and len(password) > 1198: - return password + self._HEADERS['authorization'] = password elif len(username) != 10 or not username.isdigit(): raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}') @@ -99,7 +102,7 @@ class SonyLIVIE(InfoExtractor): None, note='Verifying OTP', data=data.encode(), headers=self._HEADERS) if otp_verify_json['resultCode'] == 'KO': raise ExtractorError(otp_request_json['message'], expected=True) - return otp_verify_json['resultObj']['accessToken'] + self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken'] def _call_api(self, version, path, video_id): try: @@ -118,13 +121,8 @@ class SonyLIVIE(InfoExtractor): raise ExtractorError(message) raise - def _real_initialize(self): + def _initialize_pre_login(self): self._HEADERS['security_token'] = self._call_api('1.4', 'ALL/GETTOKEN', None) - username, password = self._get_login_info() - if username: - self._HEADERS['device_id'] = self._get_device_id() - self._HEADERS['content-type'] = 'application/json' - self._HEADERS['authorization'] = self._login(username, password) def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 64b8a71b6..bbc79c2be 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -107,30 +107,24 @@ class SoundcloudBaseIE(InfoExtractor): return False raise - def _real_initialize(self): + def _initialize_pre_login(self): self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf' - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - if username == 'oauth' and password is not None: - self._access_token = password - query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID - payload = {'session': {'access_token': self._access_token}} - token_verification = sanitized_Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8')) - response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False) - if response is not False: - self._HEADERS = {'Authorization': 'OAuth ' + self._access_token} - self.report_login() - else: - self.report_warning('Provided authorization token seems to be invalid. Continue as guest') - elif username is not None: + + def _perform_login(self, username, password): + if username != 'oauth': self.report_warning( 'Login using username and password is not currently supported. ' 'Use "--username oauth --password " to login using an oauth token') + self._access_token = password + query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID + payload = {'session': {'access_token': self._access_token}} + token_verification = sanitized_Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8')) + response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False) + if response is not False: + self._HEADERS = {'Authorization': 'OAuth ' + self._access_token} + self.report_login() + else: + self.report_warning('Provided authorization token seems to be invalid. Continue as guest') r''' def genDevId(): diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 37eae82bc..232eaa521 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -40,8 +40,7 @@ class TeachableBaseIE(InfoExtractor): if self._logged_in: return - username, password = self._get_login_info( - netrc_machine=self._SITES.get(site, site)) + username, password = self._get_login_info(netrc_machine=self._SITES.get(site, site)) if username is None: return diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py index d347e97ef..64522ec4c 100644 --- a/yt_dlp/extractor/teamtreehouse.py +++ b/yt_dlp/extractor/teamtreehouse.py @@ -51,17 +51,14 @@ class TeamTreeHouseIE(InfoExtractor): }] _NETRC_MACHINE = 'teamtreehouse' - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - return + def _perform_login(self, username, password): signin_page = self._download_webpage( 'https://teamtreehouse.com/signin', None, 'Downloading signin page') data = self._form_hidden_inputs('new_user_session', signin_page) data.update({ - 'user_session[email]': email, + 'user_session[email]': username, 'user_session[password]': password, }) error_message = get_element_by_class('error-message', self._download_webpage( diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py index a39a2fc60..58fdecebe 100644 --- a/yt_dlp/extractor/tennistv.py +++ b/yt_dlp/extractor/tennistv.py @@ -30,11 +30,9 @@ class TennisTVIE(InfoExtractor): 'skip': 'Requires email and password of a subscribed account', } _NETRC_MACHINE = 'tennistv' + _session_token = None - def _login(self): - username, password = self._get_login_info() - if not username or not password: - raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) + def _perform_login(self, username, password): login_form = { 'Email': username, @@ -63,7 +61,8 @@ class TennisTVIE(InfoExtractor): self._session_token = login_result['sessionToken'] def _real_initialize(self): - self._login() + if not self._session_token: + raise self.raise_login_required('Login info is needed for this website', method='password') def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py index 6c84c211c..1d5da1040 100644 --- a/yt_dlp/extractor/toutv.py +++ b/yt_dlp/extractor/toutv.py @@ -40,17 +40,14 @@ class TouTvIE(RadioCanadaIE): }] _CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4' - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - return + def _perform_login(self, username, password): try: self._access_token = self._download_json( 'https://services.radio-canada.ca/toutv/profiling/accounts/login', None, 'Logging in', data=json.dumps({ 'ClientId': self._CLIENT_KEY, 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', - 'Email': email, + 'Email': username, 'Password': password, 'Scope': 'id.write media-validation.read', }).encode(), headers={ diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index e9b66ec77..31feb9a70 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -54,10 +54,7 @@ class TubiTvIE(InfoExtractor): }, }] - def _login(self): - username, password = self._get_login_info() - if username is None: - return + def _perform_login(self, username, password): self.report_login() form_data = { 'username': username, @@ -72,9 +69,6 @@ class TubiTvIE(InfoExtractor): raise ExtractorError( 'Login failed (invalid username/password)', expected=True) - def _real_initialize(self): - self._login() - def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index a3e0e15f2..8086f613d 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -247,11 +247,7 @@ class TumblrIE(InfoExtractor): _ACCESS_TOKEN = None - def _real_initialize(self): - self.get_access_token() - self._login() - - def get_access_token(self): + def _initialize_pre_login(self): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page', fatal=False) if login_page: @@ -260,11 +256,7 @@ class TumblrIE(InfoExtractor): if not self._ACCESS_TOKEN: self.report_warning('Failed to get access token; metadata will be missing and some videos may not work') - def _login(self): - username, password = self._get_login_info() - if not username: - return - + def _perform_login(self, username, password): if not self._ACCESS_TOKEN: return diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index bee26c3a3..10de74c8e 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -57,14 +57,7 @@ class TwitchBaseIE(InfoExtractor): 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41', } - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): def fail(message): raise ExtractorError( 'Unable to login. Twitch said: %s' % message, expected=True) diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 25b28e98e..235f89713 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -168,14 +168,7 @@ class UdemyIE(InfoExtractor): self._handle_error(response) return response - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_popup = self._download_webpage( self._LOGIN_URL, None, 'Downloading login popup') diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index e99dbdefa..6bfb8d442 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -23,11 +23,7 @@ class VidioBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.vidio.com/users/login' _NETRC_MACHINE = 'vidio' - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): def is_logged_in(): res = self._download_json( 'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {} @@ -63,10 +59,9 @@ class VidioBaseIE(InfoExtractor): 'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True) raise ExtractorError('Unable to log in') - def _real_initialize(self): + def _initialize_pre_login(self): self._api_key = self._download_json( 'https://www.vidio.com/auth', None, data=b'')['api_key'] - self._login() def _call_api(self, url, video_id, note=None): return self._download_json(url, video_id, note=note, headers={ diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index 5b558d890..4627f66fd 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -36,9 +36,6 @@ class ViewLiftBaseIE(InfoExtractor): def _fetch_token(self, site, url): if self._TOKENS.get(site): return - email, password = self._get_login_info(netrc_machine=site) - if email: - self.report_warning('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies']) cookies = self._get_cookies(url) if cookies and cookies.get('token'): diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py index 19b09121c..8234ba7df 100644 --- a/yt_dlp/extractor/viki.py +++ b/yt_dlp/extractor/viki.py @@ -99,14 +99,7 @@ class VikiBaseIE(InfoExtractor): self.raise_login_required(message) self._raise_error(message) - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): self._token = self._call_api( 'sessions.json', None, 'Logging in', fatal=False, data={'username': username, 'password': password}).get('token') diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 458a751fe..051cf1b17 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -44,12 +44,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): _LOGIN_REQUIRED = False _LOGIN_URL = 'https://vimeo.com/log_in' - def _login(self): - username, password = self._get_login_info() - if username is None: - if self._LOGIN_REQUIRED: - raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) - return + def _perform_login(self, username, password): webpage = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') token, vuid = self._extract_xsrft_and_vuid(webpage) @@ -75,6 +70,10 @@ class VimeoBaseInfoExtractor(InfoExtractor): expected=True) raise ExtractorError('Unable to log in') + def _real_initialize(self): + if self._LOGIN_REQUIRED and not self._get_cookies('https://vimeo.com').get('vuid'): + self._raise_login_required() + def _get_video_password(self): password = self.get_param('videopassword') if password is None: @@ -701,9 +700,6 @@ class VimeoIE(VimeoBaseInfoExtractor): raise ExtractorError('Wrong video password', expected=True) return checked - def _real_initialize(self): - self._login() - def _extract_from_api(self, video_id, unlisted_hash=None): token = self._download_json( 'https://vimeo.com/_rv/jwt', video_id, headers={ @@ -1231,9 +1227,6 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'skip': 'video gone', }] - def _real_initialize(self): - self._login() - def _real_extract(self, url): page_url, video_id = self._match_valid_url(url).groups() data = self._download_json( @@ -1275,9 +1268,6 @@ class VimeoWatchLaterIE(VimeoChannelIE): 'only_matching': True, }] - def _real_initialize(self): - self._login() - def _page_url(self, base_url, pagenum): url = '%s/page:%d/' % (base_url, pagenum) request = sanitized_Request(url) diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 18eb33b57..cbc315961 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -29,11 +29,7 @@ from .youtube import YoutubeIE class VKBaseIE(InfoExtractor): _NETRC_MACHINE = 'vk' - def _login(self): - username, password = self._get_login_info() - if username is None: - return - + def _perform_login(self, username, password): login_page, url_handle = self._download_webpage_handle( 'https://vk.com', None, 'Downloading login page') @@ -57,9 +53,6 @@ class VKBaseIE(InfoExtractor): raise ExtractorError( 'Unable to login, incorrect username and/or password', expected=True) - def _real_initialize(self): - self._login() - def _download_payload(self, path, video_id, data, fatal=True): data['al'] = 1 code, payload = self._download_json( diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index 74dc349d5..ae35c976c 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -26,22 +26,16 @@ class VLiveBaseIE(NaverBaseIE): _NETRC_MACHINE = 'vlive' _logged_in = False - def _real_initialize(self): - if not self._logged_in: - VLiveBaseIE._logged_in = self._login() - - def _login(self): - email, password = self._get_login_info() - if email is None: - return False - + def _perform_login(self, username, password): + if self._logged_in: + return LOGIN_URL = 'https://www.vlive.tv/auth/email/login' self._request_webpage( LOGIN_URL, None, note='Downloading login cookies') self._download_webpage( LOGIN_URL, None, note='Logging in', - data=urlencode_postdata({'email': email, 'pwd': password}), + data=urlencode_postdata({'email': username, 'pwd': password}), headers={ 'Referer': LOGIN_URL, 'Content-Type': 'application/x-www-form-urlencoded' @@ -54,7 +48,7 @@ class VLiveBaseIE(NaverBaseIE): if not try_get(login_info, lambda x: x['message']['login'], bool): raise ExtractorError('Unable to log in', expected=True) - return True + VLiveBaseIE._logged_in = True def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None): if note is None: diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 7bc55f333..10e6be7ed 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -85,7 +85,7 @@ class VRVBaseIE(InfoExtractor): 'resource_key': resource_key, })['__links__']['cms_resource']['href'] - def _real_initialize(self): + def _initialize_pre_login(self): webpage = self._download_webpage( 'https://vrv.co/', None, headers=self.geo_verification_headers()) self._API_PARAMS = self._parse_json(self._search_regex( @@ -124,16 +124,10 @@ class VRVIE(VRVBaseIE): }] _NETRC_MACHINE = 'vrv' - def _real_initialize(self): - super(VRVIE, self)._real_initialize() - - email, password = self._get_login_info() - if email is None: - return - + def _perform_login(self, username, password): token_credentials = self._call_api( 'authenticate/by:credentials', None, 'Token Credentials', data={ - 'email': email, + 'email': username, 'password': password, }) self._TOKEN = token_credentials['oauth_token'] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d6c74f455..d74d5b0e9 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -263,7 +263,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' - _NETRC_MACHINE = 'youtube' + # _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False @@ -334,21 +334,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', ) - def _login(self): - """ - Attempt to log in to YouTube. - If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised. - """ - - if (self._LOGIN_REQUIRED - and self.get_param('cookiefile') is None - and self.get_param('cookiesfrombrowser') is None): - self.raise_login_required( - 'Login details are needed to download this content', method='cookies') - username, password = self._get_login_info() - if username: - self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}') - def _initialize_consent(self): cookies = self._get_cookies('https://www.youtube.com/') if cookies.get('__Secure-3PSID'): @@ -379,7 +364,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _real_initialize(self): self._initialize_pref() self._initialize_consent() - self._login() + if (self._LOGIN_REQUIRED + and self.get_param('cookiefile') is None + and self.get_param('cookiesfrombrowser') is None): + self.raise_login_required('Login details are needed to download this content', method='cookies') _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' @@ -3928,6 +3916,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if entry: yield entry ''' + def _extract_entries(self, parent_renderer, continuation_list): # continuation_list is modified in-place with continuation_list = [continuation_token] continuation_list[:] = [None] diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index 9435920b2..c02b4ca14 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -25,13 +25,11 @@ class ZattooPlatformBaseIE(InfoExtractor): def _host_url(self): return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST) - def _login(self): - username, password = self._get_login_info() - if not username or not password: - self.raise_login_required( - 'A valid %s account is needed to access this media.' - % self._NETRC_MACHINE) + def _real_initialize(self): + if not self._power_guide_hash: + self.raise_login_required('An account is needed to access this media', method='password') + def _perform_login(self, username, password): try: data = self._download_json( '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in', @@ -52,7 +50,7 @@ class ZattooPlatformBaseIE(InfoExtractor): self._power_guide_hash = data['session']['power_guide_hash'] - def _real_initialize(self): + def _initialize_pre_login(self): webpage = self._download_webpage( self._host_url(), None, 'Downloading app token') app_token = self._html_search_regex( @@ -72,8 +70,6 @@ class ZattooPlatformBaseIE(InfoExtractor): 'format': 'json', })) - self._login() - def _extract_cid(self, video_id, channel_name): channel_groups = self._download_json( '%s/zapi/v2/cached/channels/%s' % (self._host_url(), diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index ebe393ec7..3e3f11b15 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -93,32 +93,27 @@ class Zee5IE(InfoExtractor): _NETRC_MACHINE = 'zee5' _GEO_COUNTRIES = ['IN'] - def _login(self): - username, password = self._get_login_info() - if username: - if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None: - self.report_login() - otp_request_json = self._download_json('https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{}'.format(username), - None, note='Sending OTP') - if otp_request_json['code'] == 0: - self.to_screen(otp_request_json['message']) - else: - raise ExtractorError(otp_request_json['message'], expected=True) - otp_code = self._get_tfa_info('OTP') - otp_verify_json = self._download_json('https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{}&otp={}&guest_token={}&platform=web'.format(username, otp_code, self._DEVICE_ID), - None, note='Verifying OTP', fatal=False) - if not otp_verify_json: - raise ExtractorError('Unable to verify OTP.', expected=True) - self._USER_TOKEN = otp_verify_json.get('token') - if not self._USER_TOKEN: - raise ExtractorError(otp_request_json['message'], expected=True) - elif username.lower() == 'token' and len(password) > 1198: - self._USER_TOKEN = password + def _perform_login(self, username, password): + if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None: + self.report_login() + otp_request_json = self._download_json('https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{}'.format(username), + None, note='Sending OTP') + if otp_request_json['code'] == 0: + self.to_screen(otp_request_json['message']) else: - raise ExtractorError(self._LOGIN_HINT, expected=True) - - def _real_initialize(self): - self._login() + raise ExtractorError(otp_request_json['message'], expected=True) + otp_code = self._get_tfa_info('OTP') + otp_verify_json = self._download_json('https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{}&otp={}&guest_token={}&platform=web'.format(username, otp_code, self._DEVICE_ID), + None, note='Verifying OTP', fatal=False) + if not otp_verify_json: + raise ExtractorError('Unable to verify OTP.', expected=True) + self._USER_TOKEN = otp_verify_json.get('token') + if not self._USER_TOKEN: + raise ExtractorError(otp_request_json['message'], expected=True) + elif username.lower() == 'token' and len(password) > 1198: + self._USER_TOKEN = password + else: + raise ExtractorError(self._LOGIN_HINT, expected=True) def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).group('id', 'display_id') From 0c14d66ad9ce1c517fd3fab09a96a16724d3d2ab Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 19 Mar 2022 01:36:56 +0530 Subject: [PATCH 0827/2552] Fix `autonumber` Bug in 09b49e1f688831c3ad7181decf38c90f8451e6c4 --- yt_dlp/YoutubeDL.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 38ff628b0..33f33ddfe 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2859,14 +2859,13 @@ class YoutubeDL(object): # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) + self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) temp_filename = self.prepare_filename(info_dict, 'temp') files_to_move = {} - self._num_downloads += 1 - # Forced printings self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) From eeb2a770f3d53a5484c18b06d40c0eb2616f8281 Mon Sep 17 00:00:00 2001 From: s0u1h <101902012+s0u1h@users.noreply.github.com> Date: Fri, 18 Mar 2022 17:03:09 -0400 Subject: [PATCH 0828/2552] [utils] `format_decimal_suffix`: Fix for very large numbers (#3109) Authored by: s0u1h --- test/test_utils.py | 1 + yt_dlp/utils.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 6be5bb642..a7f1b0e94 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1780,6 +1780,7 @@ Line 1 self.assertEqual(format_bytes(1024**6), '1.00EiB') self.assertEqual(format_bytes(1024**7), '1.00ZiB') self.assertEqual(format_bytes(1024**8), '1.00YiB') + self.assertEqual(format_bytes(1024**9), '1024.00YiB') def test_hide_login_info(self): self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']), diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index c9b57c2f0..da6f27801 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2279,8 +2279,9 @@ def format_decimal_suffix(num, fmt='%d%s', *, factor=1000): num, factor = float_or_none(num), float(factor) if num is None or num < 0: return None - exponent = 0 if num == 0 else int(math.log(num, factor)) - suffix = ['', *'kMGTPEZY'][exponent] + POSSIBLE_SUFFIXES = 'kMGTPEZY' + exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES)) + suffix = ['', *POSSIBLE_SUFFIXES][exponent] if factor == 1024: suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i') converted = num / (factor ** exponent) From f4ad919298968e480d14467ba20c0dca34e1a27c Mon Sep 17 00:00:00 2001 From: i6t <62123048+i6t@users.noreply.github.com> Date: Sat, 19 Mar 2022 06:06:52 +0900 Subject: [PATCH 0829/2552] [Veo] Fix extractor (#3101) Authored by: i6t --- yt_dlp/extractor/veo.py | 42 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index 9bc41114a..d87bb5b47 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, mimetype2ext, + str_or_none, unified_timestamp, url_or_none, ) @@ -24,6 +25,7 @@ class VeoIE(InfoExtractor): 'upload_date': '20201028', 'timestamp': 1603847208, 'duration': 1916, + 'view_count': int, } }, { 'url': 'https://app.veo.co/matches/20220313-2022-03-13_u15m-plsjq-vs-csl/', @@ -39,39 +41,41 @@ class VeoIE(InfoExtractor): video_data = self._download_json( 'https://app.veo.co/api/app/matches/%s/videos' % video_id, video_id, 'Downloading video data') - title = metadata.get('title') - thumbnail = url_or_none(metadata.get('thumbnail')) - - timestamp = unified_timestamp(metadata.get('created')) - duration = int_or_none(metadata.get('duration')) - view_count = int_or_none(metadata.get('view_count')) - formats = [] for fmt in video_data: - mimetype = fmt.get('mime_type') + mimetype = str_or_none(fmt.get('mime_type')) + format_url = url_or_none(fmt.get('url')) # skip configuration file for panoramic video - if mimetype == 'video/mp2t': + if not format_url or mimetype == 'video/mp2t': continue + height = int_or_none(fmt.get('height')) - bitrate = int_or_none(fmt.get('bit_rate'), scale=1000) - render_type = fmt.get('render_type') + render_type = str_or_none(fmt.get('render_type')) + format_id = f'{render_type}-{height}p' if render_type and height else None + + # Veo returns panoramic video information even if panoramic video is not available. + # e.g. https://app.veo.co/matches/20201027-last-period/ + if render_type == 'panorama': + if not self._is_valid_url(format_url, video_id, format_id): + continue + formats.append({ - 'url': url_or_none(fmt.get('url')), - 'format_id': '%s-%sp' % (render_type, height), + 'url': format_url, + 'format_id': format_id, 'ext': mimetype2ext(mimetype), 'width': int_or_none(fmt.get('width')), 'height': height, - 'vbr': bitrate + 'vbr': int_or_none(fmt.get('bit_rate'), scale=1000), }) self._sort_formats(formats) return { 'id': video_id, - 'title': title, + 'title': str_or_none(metadata.get('title')), 'formats': formats, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'view_count': view_count, - 'duration': duration + 'thumbnail': url_or_none(metadata.get('thumbnail')), + 'timestamp': unified_timestamp(metadata.get('created')), + 'view_count': int_or_none(metadata.get('view_count')), + 'duration': int_or_none(metadata.get('duration')), } From 510809f1aab331748185712646a867f087cfdb90 Mon Sep 17 00:00:00 2001 From: foghawk Date: Fri, 18 Mar 2022 16:08:38 -0500 Subject: [PATCH 0830/2552] [nitter] Minor fixes and update instance list (#3099) Authored by: foghawk --- yt_dlp/extractor/nitter.py | 221 ++++++++++++++++++++++--------------- 1 file changed, 135 insertions(+), 86 deletions(-) diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py index a0546cda0..8bb709cd7 100644 --- a/yt_dlp/extractor/nitter.py +++ b/yt_dlp/extractor/nitter.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( parse_count, - unified_strdate, unified_timestamp, remove_end, determine_ext, @@ -25,6 +24,16 @@ class NitterIE(InfoExtractor): 'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion', 'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion', '26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion', + 'vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad.onion', + 'iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd.onion', + 'erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad.onion', + 'ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd.onion', + 'jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid.onion', + 'nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad.onion', + 'nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd.onion', + 'nitter.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion', + 'ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad.onion', + 'ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd.onion', 'nitter.i2p', 'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p', @@ -36,28 +45,55 @@ class NitterIE(InfoExtractor): 'nitter.42l.fr', 'nitter.pussthecat.org', 'nitter.nixnet.services', - 'nitter.mastodont.cat', - 'nitter.tedomum.net', 'nitter.fdn.fr', 'nitter.1d4.us', 'nitter.kavin.rocks', - 'tweet.lambda.dance', - 'nitter.cc', - 'nitter.vxempire.xyz', 'nitter.unixfox.eu', 'nitter.domain.glass', - 'nitter.himiko.cloud', 'nitter.eu', 'nitter.namazso.eu', - 'nitter.mailstation.de', 'nitter.actionsack.com', - 'nitter.cattube.org', - 'nitter.dark.fail', 'birdsite.xanny.family', - 'nitter.40two.app', - 'nitter.skrep.in', + 'nitter.hu', + 'twitr.gq', + 'nitter.moomoo.me', + 'nittereu.moomoo.me', + 'bird.from.tf', + 'nitter.it', + 'twitter.censors.us', + 'twitter.grimneko.de', + 'nitter.alefvanoon.xyz', + 'n.hyperborea.cloud', + 'nitter.ca', + 'twitter.076.ne.jp', + 'twitter.mstdn.social', + 'nitter.fly.dev', + 'notabird.site', + 'nitter.weiler.rocks', + 'nitter.silkky.cloud', + 'nitter.sethforprivacy.com', + 'nttr.stream', + 'nitter.cutelab.space', + 'nitter.nl', + 'nitter.mint.lgbt', + 'nitter.bus-hit.me', + 'fuckthesacklers.network', + 'nitter.govt.land', + 'nitter.datatunnel.xyz', + 'nitter.esmailelbob.xyz', + 'tw.artemislena.eu', + 'de.nttr.stream', + 'nitter.winscloud.net', + 'nitter.tiekoetter.com', + 'nitter.spaceint.fr', + 'twtr.bch.bar', + 'nitter.exonip.de', + 'nitter.mastodon.pro', + 'nitter.notraxx.ch', + # not in the list anymore + 'nitter.skrep.in', 'nitter.snopyta.org', ) @@ -68,96 +104,121 @@ class NitterIE(InfoExtractor): # official, rate limited 'nitter.net', # offline + 'is-nitter.resolv.ee', + 'lu-nitter.resolv.ee', 'nitter.13ad.de', + 'nitter.40two.app', + 'nitter.cattube.org', + 'nitter.cc', + 'nitter.dark.fail', + 'nitter.himiko.cloud', + 'nitter.koyu.space', + 'nitter.mailstation.de', + 'nitter.mastodont.cat', + 'nitter.tedomum.net', + 'nitter.tokhmi.xyz', 'nitter.weaponizedhumiliation.com', + 'nitter.vxempire.xyz', + 'tweet.lambda.dance', ) INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES - _INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')' - _VALID_URL = r'https?://%(instance)s/(?P.+)/status/(?P[0-9]+)(#.)?' % {'instance': _INSTANCES_RE} + _INSTANCES_RE = f'(?:{"|".join(map(re.escape, INSTANCES))})' + _VALID_URL = fr'https?://{_INSTANCES_RE}/(?P.+)/status/(?P[0-9]+)(#.)?' current_instance = random.choice(HTTP_INSTANCES) _TESTS = [ { # GIF (wrapped in mp4) - 'url': 'https://%s/firefox/status/1314279897502629888#m' % current_instance, + 'url': f'https://{current_instance}/firefox/status/1314279897502629888#m', 'info_dict': { 'id': '1314279897502629888', 'ext': 'mp4', - 'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet', - 'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n\nReport harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg\n\n#UnfckTheInternet', + 'title': 'md5:7890a9277da4639ab624dd899424c5d8', + 'description': 'md5:5fea96a4d3716c350f8b95b21b3111fe', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Firefox 🔥', 'uploader_id': 'firefox', - 'uploader_url': 'https://%s/firefox' % current_instance, + 'uploader_url': f'https://{current_instance}/firefox', 'upload_date': '20201008', 'timestamp': 1602183720, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, }, }, { # normal video - 'url': 'https://%s/Le___Doc/status/1299715685392756737#m' % current_instance, + 'url': f'https://{current_instance}/Le___Doc/status/1299715685392756737#m', 'info_dict': { 'id': '1299715685392756737', 'ext': 'mp4', - 'title': 'Le Doc - "Je ne prédis jamais rien"\nD Raoult, Août 2020...', + 'title': 're:^.* - "Je ne prédis jamais rien"\nD Raoult, Août 2020...', 'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Le Doc', + 'uploader': 're:^Le *Doc', 'uploader_id': 'Le___Doc', - 'uploader_url': 'https://%s/Le___Doc' % current_instance, + 'uploader_url': f'https://{current_instance}/Le___Doc', 'upload_date': '20200829', - 'timestamp': 1598711341, + 'timestamp': 1598711340, 'view_count': int, 'like_count': int, 'repost_count': int, 'comment_count': int, }, }, { # video embed in a "Streaming Political Ads" box - 'url': 'https://%s/mozilla/status/1321147074491092994#m' % current_instance, + 'url': f'https://{current_instance}/mozilla/status/1321147074491092994#m', 'info_dict': { 'id': '1321147074491092994', 'ext': 'mp4', - 'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds", - 'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows?\n\nThis isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. \n\nLearn more ➡️ https://mzl.la/StreamingAds", + 'title': 'md5:8290664aabb43b9189145c008386bf12', + 'description': 'md5:9cf2762d49674bc416a191a689fb2aaa', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Mozilla', 'uploader_id': 'mozilla', - 'uploader_url': 'https://%s/mozilla' % current_instance, + 'uploader_url': f'https://{current_instance}/mozilla', 'upload_date': '20201027', - 'timestamp': 1603820982 + 'timestamp': 1603820940, + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], }, { # not the first tweet but main-tweet - 'url': 'https://%s/TheNaturalNu/status/1379050895539724290#m' % current_instance, + 'url': f'https://{current_instance}/firefox/status/1354848277481414657#m', 'info_dict': { - 'id': '1379050895539724290', + 'id': '1354848277481414657', 'ext': 'mp4', - 'title': 'Dorothy Zbornak - This had me hollering!!', - 'description': 'This had me hollering!!', + 'title': 'md5:bef647f03bd1c6b15b687ea70dfc9700', + 'description': 'md5:5efba25e2f9dac85ebcd21160cb4341f', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Dorothy Zbornak', - 'uploader_id': 'TheNaturalNu', - 'uploader_url': 'https://%s/TheNaturalNu' % current_instance, - 'timestamp': 1617626329, - 'upload_date': '20210405' + 'uploader': 'Firefox 🔥', + 'uploader_id': 'firefox', + 'uploader_url': f'https://{current_instance}/firefox', + 'upload_date': '20210128', + 'timestamp': 1611855960, + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, } } ] def _real_extract(self, url): - video_id = self._match_id(url) + video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') parsed_url = compat_urlparse.urlparse(url) - base_url = '%s://%s' % (parsed_url.scheme, parsed_url.netloc) + base_url = f'{parsed_url.scheme}://{parsed_url.netloc}' self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on') - full_webpage = self._download_webpage(url, video_id) + full_webpage = webpage = self._download_webpage(url, video_id) main_tweet_start = full_webpage.find('class="main-tweet"') if main_tweet_start > 0: webpage = full_webpage[main_tweet_start:] - if not webpage: - webpage = full_webpage - video_url = '%s%s' % (base_url, self._html_search_regex(r'(?:]+data-url|]+src)="([^"]+)"', webpage, 'video url')) + video_url = '%s%s' % (base_url, self._html_search_regex( + r'(?:]+data-url|]+src)="([^"]+)"', webpage, 'video url')) ext = determine_ext(video_url) if ext == 'unknown_video': @@ -168,61 +229,49 @@ class NitterIE(InfoExtractor): 'ext': ext }] - title = self._og_search_description(full_webpage) - if not title: - title = self._html_search_regex(r'
]+>([^<]+)
', webpage, 'title', fatal=False) - mobj = self._match_valid_url(url) - uploader_id = ( - mobj.group('uploader_id') - or self._html_search_regex(r']+title="([^"]+)"', webpage, 'uploader name', fatal=False) - ) + uploader_id = self._html_search_regex( + r']+title="@([^"]+)"', webpage, 'uploader id', fatal=False) or uploader_id - if uploader_id: - uploader_url = '%s/%s' % (base_url, uploader_id) + uploader = self._html_search_regex( + r']+title="([^"]+)"', webpage, 'uploader name', fatal=False) + if uploader: + title = f'{uploader} - {title}' - uploader = self._html_search_regex(r']+title="([^"]+)"', webpage, 'uploader name', fatal=False) + counts = { + f'{x[0]}_count': self._html_search_regex( + fr']+class="icon-{x[1]}[^>]*>([^<]*)', + webpage, f'{x[0]} count', fatal=False) + for x in (('view', 'play'), ('like', 'heart'), ('repost', 'retweet'), ('comment', 'comment')) + } + counts = {field: 0 if count == '' else parse_count(count) for field, count in counts.items()} - if uploader: - title = '%s - %s' % (uploader, title) - - view_count = parse_count(self._html_search_regex(r']+class="icon-play[^>]*>\s([^<]+)', webpage, 'view count', fatal=False)) - like_count = parse_count(self._html_search_regex(r']+class="icon-heart[^>]*>\s([^<]+)', webpage, 'like count', fatal=False)) - repost_count = parse_count(self._html_search_regex(r']+class="icon-retweet[^>]*>\s([^<]+)', webpage, 'repost count', fatal=False)) - comment_count = parse_count(self._html_search_regex(r']+class="icon-comment[^>]*>\s([^<]+)', webpage, 'repost count', fatal=False)) - - thumbnail = self._html_search_meta('og:image', full_webpage, 'thumbnail url') - if not thumbnail: - thumbnail = '%s%s' % (base_url, self._html_search_regex(r']+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)) - thumbnail = remove_end(thumbnail, '%3Asmall') - - thumbnails = [] - thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig') - for id in thumbnail_ids: - thumbnails.append({ - 'id': id, - 'url': thumbnail + '%3A' + id, - }) - - date = self._html_search_regex(r']+class="tweet-date"[^>]*>]+title="([^"]+)"', webpage, 'upload date', fatal=False) - upload_date = unified_strdate(date) - timestamp = unified_timestamp(date) + thumbnail = ( + self._html_search_meta('og:image', full_webpage, 'thumbnail url') + or remove_end('%s%s' % (base_url, self._html_search_regex( + r']+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)), '%3Asmall')) + + thumbnails = [ + {'id': id, 'url': f'{thumbnail}%3A{id}'} + for id in ('thumb', 'small', 'large', 'medium', 'orig') + ] + + date = self._html_search_regex( + r']+class="tweet-date"[^>]*>]+title="([^"]+)"', + webpage, 'upload date', default='').replace('·', '') return { 'id': video_id, 'title': title, 'description': description, 'uploader': uploader, - 'timestamp': timestamp, + 'timestamp': unified_timestamp(date), 'uploader_id': uploader_id, - 'uploader_url': uploader_url, - 'view_count': view_count, - 'like_count': like_count, - 'repost_count': repost_count, - 'comment_count': comment_count, + 'uploader_url': f'{base_url}/{uploader_id}', 'formats': formats, 'thumbnails': thumbnails, 'thumbnail': thumbnail, - 'upload_date': upload_date, + **counts, } From a2e77303e3385da640a0904cd6cb76235fa9691b Mon Sep 17 00:00:00 2001 From: coletdev Date: Sat, 19 Mar 2022 11:10:20 +1300 Subject: [PATCH 0831/2552] [downloader/http] Retry on more errors (#3065) Closes #3056, #2071 Related: #3034, #2969 Authored-by: coletdjnz --- yt_dlp/downloader/http.py | 40 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 10ba61024..c2a57726e 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals -import errno import os -import socket +import ssl import time import random @@ -10,6 +9,7 @@ from .common import FileDownloader from ..compat import ( compat_str, compat_urllib_error, + compat_http_client ) from ..utils import ( ContentTooShortError, @@ -23,6 +23,8 @@ from ..utils import ( XAttrUnavailableError, ) +RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException) + class HttpFD(FileDownloader): def real_download(self, filename, info_dict): @@ -125,14 +127,7 @@ class HttpFD(FileDownloader): set_range(request, range_start, range_end) # Establish connection try: - try: - ctx.data = self.ydl.urlopen(request) - except (compat_urllib_error.URLError, ) as err: - # reason may not be available, e.g. for urllib2.HTTPError on python 2.6 - reason = getattr(err, 'reason', None) - if isinstance(reason, socket.timeout): - raise RetryDownload(err) - raise err + ctx.data = self.ydl.urlopen(request) # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range @@ -202,13 +197,14 @@ class HttpFD(FileDownloader): # Unexpected HTTP error raise raise RetryDownload(err) - except socket.timeout as err: + except compat_urllib_error.URLError as err: + if isinstance(err.reason, ssl.CertificateError): + raise + raise RetryDownload(err) + # In urllib.request.AbstractHTTPHandler, the response is partially read on request. + # Any errors that occur during this will not be wrapped by URLError + except RESPONSE_READ_EXCEPTIONS as err: raise RetryDownload(err) - except socket.error as err: - if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT): - # Connection reset is no problem, just retry - raise RetryDownload(err) - raise def download(): nonlocal throttle_start @@ -254,16 +250,8 @@ class HttpFD(FileDownloader): try: # Download and write data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) - # socket.timeout is a subclass of socket.error but may not have - # errno set - except socket.timeout as e: - retry(e) - except socket.error as e: - # SSLError on python 2 (inherits socket.error) may have - # no errno set but this error message - if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out': - retry(e) - raise + except RESPONSE_READ_EXCEPTIONS as err: + retry(err) byte_counter += len(data_block) From e6552207da2727b8a5c61214a5c96e4556a221c5 Mon Sep 17 00:00:00 2001 From: coletdev Date: Sat, 19 Mar 2022 11:19:36 +1300 Subject: [PATCH 0832/2552] [panopto] Improve subtitle extraction and support slides (#3009) Related: #1946, #2908 Authored-by: coletdjnz --- yt_dlp/extractor/panopto.py | 190 +++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py index d458dfe50..3388f7f39 100644 --- a/yt_dlp/extractor/panopto.py +++ b/yt_dlp/extractor/panopto.py @@ -18,12 +18,39 @@ from ..utils import ( int_or_none, OnDemandPagedList, parse_qs, + srt_subtitles_timecode, traverse_obj, ) class PanoptoBaseIE(InfoExtractor): - BASE_URL_RE = r'(?Phttps?://[\w.]+\.panopto.(?:com|eu)/Panopto)' + BASE_URL_RE = r'(?Phttps?://[\w.-]+\.panopto.(?:com|eu)/Panopto)' + + # see panopto core.js + _SUB_LANG_MAPPING = { + 0: 'en-US', + 1: 'en-GB', + 2: 'es-MX', + 3: 'es-ES', + 4: 'de-DE', + 5: 'fr-FR', + 6: 'nl-NL', + 7: 'th-TH', + 8: 'zh-CN', + 9: 'zh-TW', + 10: 'ko-KR', + 11: 'ja-JP', + 12: 'ru-RU', + 13: 'pt-PT', + 14: 'pl-PL', + 15: 'en-AU', + 16: 'da-DK', + 17: 'fi-FI', + 18: 'hu-HU', + 19: 'nb-NO', + 20: 'sv-SE', + 21: 'it-IT' + } def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs): response = self._download_json( @@ -31,7 +58,7 @@ class PanoptoBaseIE(InfoExtractor): fatal=fatal, headers={'accept': 'application/json', 'content-type': 'application/json'}, **kwargs) if not response: return - error_code = response.get('ErrorCode') + error_code = traverse_obj(response, 'ErrorCode') if error_code == 2: self.raise_login_required(method='cookies') elif error_code is not None: @@ -62,10 +89,11 @@ class PanoptoIE(PanoptoBaseIE): 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', 'title': 'Panopto for Business - Use Cases', 'timestamp': 1459184200, - 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb&mode=Delivery&random=[\d.]+', + 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+', 'upload_date': '20160328', 'ext': 'mp4', 'cast': [], + 'chapters': [], 'duration': 88.17099999999999, 'average_rating': int, 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e', @@ -80,10 +108,10 @@ class PanoptoIE(PanoptoBaseIE): 'title': 'Overcoming Top 4 Challenges of Enterprise Video', 'uploader': 'Panopto Support', 'timestamp': 1449409251, - 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59&mode=Delivery&random=[\d.]+', + 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+', 'upload_date': '20151206', 'ext': 'mp4', - 'chapters': 'count:21', + 'chapters': 'count:12', 'cast': ['Panopto Support'], 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c', 'average_rating': int, @@ -104,8 +132,9 @@ class PanoptoIE(PanoptoBaseIE): 'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56', 'timestamp': 1569845768, 'tags': ['Viewer', 'Enterprise'], + 'chapters': [], 'upload_date': '20190930', - 'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=5fa74e93-3d87-4694-b60e-aaa4012214ed&mode=Delivery&random=[\d.]+', + 'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/.+', 'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f', 'title': 'Getting Started: View a Video', 'average_rating': int, @@ -121,6 +150,7 @@ class PanoptoIE(PanoptoBaseIE): 'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4', 'ext': 'mp4', 'cast': ['LTS CLI Script'], + 'chapters': [], 'duration': 2178.45, 'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa', 'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8', @@ -129,11 +159,77 @@ class PanoptoIE(PanoptoBaseIE): 'uploader': 'LTS CLI Script', 'timestamp': 1572458134, 'title': 'WW2 Vets Interview 3 Ronald Stanley George', - 'thumbnail': r're:https://unisa\.au\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4&mode=Delivery&random=[\d.]+', + 'thumbnail': r're:https://unisa\.au\.panopto\.com/.+', 'channel': 'World War II Veteran Interviews', 'upload_date': '20191030', }, }, + { + # Slides/storyboard + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=a7f12f1d-3872-4310-84b0-f8d8ab15326b', + 'info_dict': { + 'id': 'a7f12f1d-3872-4310-84b0-f8d8ab15326b', + 'ext': 'mhtml', + 'timestamp': 1448798857, + 'duration': 4712.681, + 'title': 'Cache Memory - CompSci 15-213, Lecture 12', + 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', + 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c', + 'upload_date': '20151129', + 'average_rating': 0, + 'uploader': 'Panopto Support', + 'channel': 'Showcase Videos', + 'description': 'md5:55e51d54233ddb0e6c2ed388ca73822c', + 'cast': ['ISR Videographer', 'Panopto Support'], + 'chapters': 'count:28', + 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+', + }, + 'params': {'format': 'mhtml', 'skip_download': True} + }, + { + 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9', + 'info_dict': { + 'id': '8285224a-9a2b-4957-84f2-acb0000c4ea9', + 'ext': 'mp4', + 'chapters': [], + 'title': 'Company Policy', + 'average_rating': 0, + 'timestamp': 1615058901, + 'channel': 'Human Resources', + 'tags': ['HumanResources'], + 'duration': 1604.243, + 'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+', + 'uploader_id': '8e8ba0a3-424f-40df-a4f1-ab3a01375103', + 'uploader': 'Cait M.', + 'upload_date': '20210306', + 'cast': ['Cait M.'], + 'subtitles': {'en-US': [{'ext': 'srt', 'data': 'md5:a3f4d25963fdeace838f327097c13265'}], + 'es-ES': [{'ext': 'srt', 'data': 'md5:57e9dad365fd0fbaf0468eac4949f189'}]}, + }, + 'params': {'writesubtitles': True, 'skip_download': True} + }, { + # On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped. + 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b', + 'info_dict': { + 'id': '940cbd41-f616-4a45-b13e-aaf1000c915b', + 'ext': 'mp4', + 'subtitles': 'count:1', + 'title': 'HR Benefits Review Meeting*', + 'cast': ['Panopto Support'], + 'chapters': [], + 'timestamp': 1575024251, + 'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+', + 'channel': 'Zoom', + 'description': 'md5:04f90a9c2c68b7828144abfb170f0106', + 'uploader': 'Panopto Support', + 'average_rating': 0, + 'duration': 409.34499999999997, + 'uploader_id': 'b6ac04ad-38b8-4724-a004-a851004ea3df', + 'upload_date': '20191129', + + }, + 'params': {'writesubtitles': True, 'skip_download': True} + }, { 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb', 'only_matching': True @@ -178,19 +274,82 @@ class PanoptoIE(PanoptoBaseIE): note='Marking watched', errnote='Unable to mark watched') @staticmethod - def _extract_chapters(delivery): + def _extract_chapters(timestamps): chapters = [] - for timestamp in delivery.get('Timestamps', []): + for timestamp in timestamps or []: + caption = timestamp.get('Caption') start, duration = int_or_none(timestamp.get('Time')), int_or_none(timestamp.get('Duration')) - if start is None or duration is None: + if not caption or start is None or duration is None: continue chapters.append({ 'start_time': start, 'end_time': start + duration, - 'title': timestamp.get('Caption') + 'title': caption }) return chapters + @staticmethod + def _extract_mhtml_formats(base_url, timestamps): + image_frags = {} + for timestamp in timestamps or []: + duration = timestamp.get('Duration') + obj_id, obj_sn = timestamp.get('ObjectIdentifier'), timestamp.get('ObjectSequenceNumber'), + if timestamp.get('EventTargetType') == 'PowerPoint' and obj_id is not None and obj_sn is not None: + image_frags.setdefault('slides', []).append({ + 'url': base_url + f'/Pages/Viewer/Image.aspx?id={obj_id}&number={obj_sn}', + 'duration': duration + }) + + obj_pid, session_id, abs_time = timestamp.get('ObjectPublicIdentifier'), timestamp.get('SessionID'), timestamp.get('AbsoluteTime') + if None not in (obj_pid, session_id, abs_time): + image_frags.setdefault('chapter', []).append({ + 'url': base_url + f'/Pages/Viewer/Thumb.aspx?eventTargetPID={obj_pid}&sessionPID={session_id}&number={obj_sn}&isPrimary=false&absoluteTime={abs_time}', + 'duration': duration, + }) + for name, fragments in image_frags.items(): + yield { + 'format_id': name, + 'ext': 'mhtml', + 'protocol': 'mhtml', + 'acodec': 'none', + 'vcodec': 'none', + 'url': 'about:invalid', + 'fragments': fragments + } + + @staticmethod + def _json2srt(data, delivery): + def _gen_lines(): + for i, line in enumerate(data): + start_time = line['Time'] + duration = line.get('Duration') + if duration: + end_time = start_time + duration + else: + end_time = traverse_obj(data, (i + 1, 'Time')) or delivery['Duration'] + yield f'{i + 1}\n{srt_subtitles_timecode(start_time)} --> {srt_subtitles_timecode(end_time)}\n{line["Caption"]}' + return '\n\n'.join(_gen_lines()) + + def _get_subtitles(self, base_url, video_id, delivery): + subtitles = {} + for lang in delivery.get('AvailableLanguages') or []: + response = self._call_api( + base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id, fatal=False, + note='Downloading captions JSON metadata', query={ + 'deliveryId': video_id, + 'getCaptions': True, + 'language': str(lang), + 'responseType': 'json' + } + ) + if not isinstance(response, list): + continue + subtitles.setdefault(self._SUB_LANG_MAPPING.get(lang) or 'default', []).append({ + 'ext': 'srt', + 'data': self._json2srt(response, delivery), + }) + return subtitles + def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs): formats = [] subtitles = {} @@ -240,6 +399,7 @@ class PanoptoIE(PanoptoBaseIE): delivery = delivery_info['Delivery'] session_start_time = int_or_none(delivery.get('SessionStartTime')) + timestamps = delivery.get('Timestamps') # Podcast stream is usually the combined streams. We will prefer that by default. podcast_formats, podcast_subtitles = self._extract_streams_formats_and_subtitles( @@ -249,9 +409,11 @@ class PanoptoIE(PanoptoBaseIE): video_id, delivery.get('Streams'), preference=-10) formats = podcast_formats + streams_formats - subtitles = self._merge_subtitles(podcast_subtitles, streams_subtitles) - self._sort_formats(formats) + formats.extend(self._extract_mhtml_formats(base_url, timestamps)) + subtitles = self._merge_subtitles( + podcast_subtitles, streams_subtitles, self.extract_subtitles(base_url, video_id, delivery)) + self._sort_formats(formats) self.mark_watched(base_url, video_id, delivery_info) return { @@ -262,7 +424,7 @@ class PanoptoIE(PanoptoBaseIE): 'duration': delivery.get('Duration'), 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}', 'average_rating': delivery.get('AverageRating'), - 'chapters': self._extract_chapters(delivery) or None, + 'chapters': self._extract_chapters(timestamps), 'uploader': delivery.get('OwnerDisplayName') or None, 'uploader_id': delivery.get('OwnerId'), 'description': delivery.get('SessionAbstract'), From be4685ab7b6acc8ba0d383a6ae27453566046228 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Sat, 19 Mar 2022 10:15:01 +0900 Subject: [PATCH 0833/2552] [http] Reject broken range before request (#3079) * And fix filesize estimate for byterange downloads Closes #2001 Authored by: Lesmiscore, Jules-A, pukkandan --- yt_dlp/downloader/http.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index c2a57726e..8e096b76b 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -18,6 +18,7 @@ from ..utils import ( parse_http_range, sanitized_Request, ThrottledDownload, + try_get, write_xattr, XAttrMetadataError, XAttrUnavailableError, @@ -55,7 +56,6 @@ class HttpFD(FileDownloader): ctx.open_mode = 'wb' ctx.resume_len = 0 - ctx.data_len = None ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() ctx.chunk_size = None @@ -102,6 +102,8 @@ class HttpFD(FileDownloader): if ctx.is_resume: self.report_resuming_byte(ctx.resume_len) ctx.open_mode = 'ab' + elif req_start is not None: + range_start = req_start elif ctx.chunk_size > 0: range_start = 0 else: @@ -118,11 +120,16 @@ class HttpFD(FileDownloader): else: range_end = None - if range_end and ctx.data_len is not None and range_end >= ctx.data_len: - range_end = ctx.data_len - 1 - has_range = range_start is not None - ctx.has_range = has_range + if try_get(None, lambda _: range_start > range_end): + ctx.resume_len = 0 + ctx.open_mode = 'wb' + raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})')) + + if try_get(None, lambda _: range_end >= ctx.content_len): + range_end = ctx.content_len - 1 + request = sanitized_Request(url, request_data, headers) + has_range = range_start is not None if has_range: set_range(request, range_start, range_end) # Establish connection @@ -146,7 +153,8 @@ class HttpFD(FileDownloader): or content_range_end == range_end or content_len < range_end) if accept_content_len: - ctx.data_len = content_len + ctx.content_len = content_len + ctx.data_len = min(content_len, req_end or content_len) - (req_start or 0) return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file @@ -154,8 +162,7 @@ class HttpFD(FileDownloader): self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' - ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) - return + ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) except (compat_urllib_error.HTTPError, ) as err: if err.code == 416: # Unable to resume (requested range not satisfiable) @@ -331,7 +338,7 @@ class HttpFD(FileDownloader): elif speed: throttle_start = None - if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: + if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter # ctx.block_size = block_size raise NextFragment() From 84842aee2ba8dc50601c86dc6fbb12d0fa438449 Mon Sep 17 00:00:00 2001 From: 1-Byte <1-Byte@users.noreply.github.com> Date: Sun, 20 Mar 2022 18:49:00 +0100 Subject: [PATCH 0834/2552] [azmedien] Add TVO Online to supported hosts (#3125) Authored by: 1-Byte --- yt_dlp/extractor/azmedien.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index fee640e14..b3cabbf94 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -15,7 +15,8 @@ class AZMedienIE(InfoExtractor): (?P telezueri\.ch| telebaern\.tv| - telem1\.ch + telem1\.ch| + tvo-online\.ch )/ [^/]+/ (?P From 1f1df1251e869cb539ed8ae9d72590d2b2e275a6 Mon Sep 17 00:00:00 2001 From: CplPwnies Date: Tue, 22 Mar 2022 16:09:38 -0500 Subject: [PATCH 0835/2552] [adobepass] Fix Suddenlink MSO (#3148) Authored by: CplPwnies --- yt_dlp/extractor/adobepass.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index f0eba8844..5d98301b8 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1650,21 +1650,27 @@ class AdobePassIE(InfoExtractor): hidden_data = self._hidden_inputs(first_bookend_page) hidden_data['history_val'] = 1 - provider_login_redirect_page = self._download_webpage( + provider_login_redirect_page_res = self._download_webpage_handle( urlh.geturl(), video_id, 'Sending First Bookend', query=hidden_data) - provider_tryauth_url = self._html_search_regex( - r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl') + provider_login_redirect_page, urlh = provider_login_redirect_page_res - provider_tryauth_page = self._download_webpage( - provider_tryauth_url, video_id, 'Submitting TryAuth', - query=hidden_data) + # Some website partners seem to not have the extra ajaxurl redirect step, so we check if we already + # have the login prompt or not + if 'id="password" type="password" name="password"' in provider_login_redirect_page: + provider_login_page_res = provider_login_redirect_page_res + else: + provider_tryauth_url = self._html_search_regex( + r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl') + provider_tryauth_page = self._download_webpage( + provider_tryauth_url, video_id, 'Submitting TryAuth', + query=hidden_data) - provider_login_page_res = self._download_webpage_handle( - f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}', - video_id, 'Getting Login Page', - query=hidden_data) + provider_login_page_res = self._download_webpage_handle( + f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}', + video_id, 'Getting Login Page', + query=hidden_data) provider_association_redirect, urlh = post_form( provider_login_page_res, 'Logging in', { From 8a23db95197282159efe970ca0ac69c97def60d3 Mon Sep 17 00:00:00 2001 From: coletdev Date: Wed, 23 Mar 2022 10:24:27 +1300 Subject: [PATCH 0836/2552] [wget] Fix proxy (#3152) Upstream PR: https://github.com/ytdl-org/youtube-dl/pull/29343 Authored-by: kikuyan, coletdjnz --- yt_dlp/downloader/external.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index be6202eef..cfa646ebc 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -230,7 +230,10 @@ class WgetFD(ExternalFD): retry[1] = '0' cmd += retry cmd += self._option('--bind-address', 'source_address') - cmd += self._option('--proxy', 'proxy') + proxy = self.params.get('proxy') + if proxy: + for var in ('http_proxy', 'https_proxy'): + cmd += ['--execute', '%s=%s' % (var, proxy)] cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] From d5820461e85a9d3d0b2a019f988d4469bfbcc3ee Mon Sep 17 00:00:00 2001 From: coletdev Date: Wed, 23 Mar 2022 12:26:55 +1300 Subject: [PATCH 0837/2552] Use certificates from `certifi` if installed (#3115) Fixes #3102 and most `CERTIFICATE_VERIFY_FAILED` issues Authored by: coletdjnz --- README.md | 14 ++++++++------ pyinst.py | 2 +- requirements.txt | 3 ++- yt_dlp/YoutubeDL.py | 2 ++ yt_dlp/options.py | 2 +- yt_dlp/utils.py | 37 +++++++++++++++++++++++-------------- 6 files changed, 37 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index af9ff2c6d..373711499 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead * Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this +* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options @@ -262,12 +263,13 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) -* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) -* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) -* [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE) +* [**mutagen**](https://github.com/quodlibet/mutagen)\* - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) +* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE) +* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) +* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen/ffmpeg cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) -* [**brotli**](https://github.com/google/brotli) or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) @@ -276,7 +278,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly To use or redistribute the dependencies, you must agree to their respective licensing terms. -The Windows and MacOS standalone release binaries are already built with the python interpreter, mutagen, pycryptodomex and websockets included. +The Windows and MacOS standalone release binaries are already built with the python interpreter and all optional python packages (marked with \*) included. **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds diff --git a/pyinst.py b/pyinst.py index ca115fd78..e5934e04f 100644 --- a/pyinst.py +++ b/pyinst.py @@ -74,7 +74,7 @@ def version_to_list(version): def dependency_options(): - dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets') + dependencies = [pycryptodome_module(), 'mutagen', 'brotli', 'certifi'] + collect_submodules('websockets') excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] yield from (f'--hidden-import={module}' for module in dependencies) diff --git a/requirements.txt b/requirements.txt index cb0eece46..a48b78d7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ mutagen pycryptodomex websockets brotli; platform_python_implementation=='CPython' -brotlicffi; platform_python_implementation!='CPython' \ No newline at end of file +brotlicffi; platform_python_implementation!='CPython' +certifi \ No newline at end of file diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 33f33ddfe..d075a82bc 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -72,6 +72,7 @@ from .utils import ( formatSeconds, GeoRestrictedError, get_domain, + has_certifi, HEADRequest, InAdvancePagedList, int_or_none, @@ -3676,6 +3677,7 @@ class YoutubeDL(object): lib_str = join_nonempty( compat_brotli and compat_brotli.__name__, + has_certifi and 'certifi', compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], SECRETSTORAGE_AVAILABLE and 'secretstorage', has_mutagen and 'mutagen', diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 646ccebcd..3c81da6c3 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -341,7 +341,7 @@ def create_parser(): 'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata', - 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', + 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', }, 'aliases': { 'youtube-dl': ['-multistreams', 'all'], 'youtube-dlc': ['-no-youtube-channel-redirect', '-no-live-chat', 'all'], diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index da6f27801..a08dc3c11 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -85,6 +85,12 @@ from .socks import ( sockssocket, ) +try: + import certifi + has_certifi = True +except ImportError: + has_certifi = False + def register_socks_protocols(): # "Register" SOCKS protocols @@ -1010,20 +1016,23 @@ def make_HTTPS_handler(params, **kwargs): context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE if opts_check_certificate: - try: - context.load_default_certs() - # Work around the issue in load_default_certs when there are bad certificates. See: - # https://github.com/yt-dlp/yt-dlp/issues/1060, - # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312 - except ssl.SSLError: - # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 - if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): - # Create a new context to discard any certificates that were already loaded - context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) - context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED - for storename in ('CA', 'ROOT'): - _ssl_load_windows_store_certs(context, storename) - context.set_default_verify_paths() + if has_certifi and 'no-certifi' not in params.get('compat_opts', []): + context.load_verify_locations(cafile=certifi.where()) + else: + try: + context.load_default_certs() + # Work around the issue in load_default_certs when there are bad certificates. See: + # https://github.com/yt-dlp/yt-dlp/issues/1060, + # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312 + except ssl.SSLError: + # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 + if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): + # Create a new context to discard any certificates that were already loaded + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED + for storename in ('CA', 'ROOT'): + _ssl_load_windows_store_certs(context, storename) + context.set_default_verify_paths() return YoutubeDLHTTPSHandler(params, context=context, **kwargs) From b9c7b1e9b449e93e252d1001f4f8d892b9c2be4d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 23 Mar 2022 03:53:39 +0530 Subject: [PATCH 0838/2552] [cleanup, vimeo] Fix tests --- yt_dlp/extractor/vimeo.py | 100 +++++++++++++++++++++++++++++++++----- 1 file changed, 89 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 051cf1b17..972fb480b 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -327,7 +327,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'info_dict': { 'id': '56015672', 'ext': 'mp4', - 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", + 'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc", 'description': 'md5:2d3305bad981a06ff79f027f19865021', 'timestamp': 1355990239, 'upload_date': '20121220', @@ -340,6 +340,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'params': { 'format': 'best[protocol=https]', }, + 'skip': 'No longer available' }, { 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', @@ -357,6 +358,10 @@ class VimeoIE(VimeoBaseInfoExtractor): 'upload_date': '20130610', 'timestamp': 1370893156, 'license': 'by', + 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960', + 'view_count': int, + 'comment_count': int, + 'like_count': int, }, 'params': { 'format': 'best[protocol=https]', @@ -364,7 +369,7 @@ class VimeoIE(VimeoBaseInfoExtractor): }, { 'url': 'http://player.vimeo.com/video/54469442', - 'md5': '619b811a4417aa4abe78dc653becf511', + 'md5': 'b3e7f4d2cbb53bd7dc3bb6ff4ed5cfbd', 'note': 'Videos that embed the url in the player page', 'info_dict': { 'id': '54469442', @@ -375,6 +380,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'businessofsoftware', 'duration': 3610, 'description': None, + 'thumbnail': 'https://i.vimeocdn.com/video/376682406-f34043e7b766af6bef2af81366eacd6724f3fc3173179a11a97a1e26587c9529-d_1280', }, 'params': { 'format': 'best[protocol=https]', @@ -395,6 +401,10 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', + 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_960', + 'view_count': int, + 'comment_count': int, + 'like_count': int, }, 'params': { 'format': 'best[protocol=https]', @@ -417,6 +427,10 @@ class VimeoIE(VimeoBaseInfoExtractor): 'timestamp': 1380339469, 'upload_date': '20130928', 'duration': 187, + 'thumbnail': 'https://i.vimeocdn.com/video/450239872-a05512d9b1e55d707a7c04365c10980f327b06d966351bc403a5d5d65c95e572-d_1280', + 'view_count': int, + 'comment_count': int, + 'like_count': int, }, 'params': {'format': 'http-1080p'}, }, @@ -425,7 +439,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'note': 'Video with subtitles', 'info_dict': { 'id': '76979871', - 'ext': 'mp4', + 'ext': 'mov', 'title': 'The New Vimeo Player (You Know, For Videos)', 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', 'timestamp': 1381846109, @@ -454,6 +468,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': 'Tulio Gonçalves', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593', 'uploader_id': 'user28849593', + 'duration': 118, + 'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280', }, }, { @@ -470,6 +486,11 @@ class VimeoIE(VimeoBaseInfoExtractor): 'timestamp': 1324343742, 'upload_date': '20111220', 'description': 'md5:ae23671e82d05415868f7ad1aec21147', + 'duration': 60, + 'comment_count': int, + 'view_count': int, + 'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280', + 'like_count': int, }, }, { @@ -485,6 +506,9 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': 'Framework Studio', 'description': 'md5:f2edc61af3ea7a5592681ddbb683db73', 'upload_date': '20200225', + 'duration': 176, + 'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280', + 'uploader_url': 'https://vimeo.com/frameworkla', }, }, { @@ -503,6 +527,11 @@ class VimeoIE(VimeoBaseInfoExtractor): 'timestamp': 1250886430, 'upload_date': '20090821', 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', + 'duration': 321, + 'comment_count': int, + 'view_count': int, + 'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280', + 'like_count': int, }, 'params': { 'skip_download': True, @@ -535,10 +564,17 @@ class VimeoIE(VimeoBaseInfoExtractor): 'id': '68375962', 'ext': 'mp4', 'title': 'youtube-dl password protected test video', + 'timestamp': 1371200155, + 'upload_date': '20130614', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, + 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f', + 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_960', + 'view_count': int, + 'comment_count': int, + 'like_count': int, }, 'params': { 'format': 'best[protocol=https]', @@ -568,12 +604,18 @@ class VimeoIE(VimeoBaseInfoExtractor): 'info_dict': { 'id': '119195465', 'ext': 'mp4', - 'title': 'youtube-dl test video \'ä"BaW_jenozKc', + 'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc", 'uploader': 'Philipp Hagemeister', 'uploader_id': 'user20132939', 'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b', 'upload_date': '20150209', 'timestamp': 1423518307, + 'thumbnail': 'https://i.vimeocdn.com/video/default_1280', + 'duration': 10, + 'like_count': int, + 'uploader_url': 'https://vimeo.com/user20132939', + 'view_count': int, + 'comment_count': int, }, 'params': { 'format': 'best[protocol=https]', @@ -596,6 +638,14 @@ class VimeoIE(VimeoBaseInfoExtractor): 'title': 'Harrisville New Hampshire', 'timestamp': 1459259666, 'upload_date': '20160329', + 'release_timestamp': 1459259666, + 'license': 'by-nc', + 'duration': 159, + 'comment_count': int, + 'thumbnail': 'https://i.vimeocdn.com/video/562802436-585eeb13b5020c6ac0f171a2234067938098f84737787df05ff0d767f6d54ee9-d_1280', + 'like_count': int, + 'uploader_url': 'https://vimeo.com/aliniamedia', + 'release_date': '20160329', }, 'params': {'skip_download': True}, }, @@ -627,6 +677,14 @@ class VimeoIE(VimeoBaseInfoExtractor): 'title': 'The Shoes - Submarine Feat. Blaine Harrison', 'uploader_id': 'karimhd', 'description': 'md5:8e2eea76de4504c2e8020a9bcfa1e843', + 'channel_id': 'staffpicks', + 'duration': 336, + 'comment_count': int, + 'view_count': int, + 'thumbnail': 'https://i.vimeocdn.com/video/541243181-b593db36a16db2f0096f655da3f5a4dc46b8766d77b0f440df937ecb0c418347-d_1280', + 'like_count': int, + 'uploader_url': 'https://vimeo.com/karimhd', + 'channel_url': 'https://vimeo.com/channels/staffpicks', }, 'params': {'skip_download': 'm3u8'}, }, @@ -641,13 +699,19 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/581039021/9603038895', 'info_dict': { 'id': '581039021', - # these have to be provided but we don't care 'ext': 'mp4', 'timestamp': 1627621014, - 'title': 're:.+', - 'uploader_id': 're:.+', - 'uploader': 're:.+', - 'upload_date': r're:\d+', + 'release_timestamp': 1627621014, + 'duration': 976, + 'comment_count': int, + 'thumbnail': 'https://i.vimeocdn.com/video/1202249320-4ddb2c30398c0dc0ee059172d1bd5ea481ad12f0e0e3ad01d2266f56c744b015-d_1280', + 'like_count': int, + 'uploader_url': 'https://vimeo.com/txwestcapital', + 'release_date': '20210730', + 'uploader': 'Christopher Inks', + 'title': 'Thursday, July 29, 2021 BMA Evening Video Update', + 'uploader_id': 'txwestcapital', + 'upload_date': '20210730', }, 'params': { 'skip_download': True, @@ -961,9 +1025,15 @@ class VimeoOndemandIE(VimeoIE): 'uploader': 'גם סרטים', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', - 'description': 'md5:4c027c965e439de4baab621e48b60791', + 'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998', 'upload_date': '20140906', 'timestamp': 1410032453, + 'thumbnail': 'https://i.vimeocdn.com/video/488238335-d7bf151c364cff8d467f1b73784668fe60aae28a54573a35d53a1210ae283bd8-d_1280', + 'comment_count': int, + 'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/', + 'duration': 53, + 'view_count': int, + 'like_count': int, }, 'params': { 'format': 'best[protocol=https]', @@ -982,6 +1052,11 @@ class VimeoOndemandIE(VimeoIE): 'description': 'md5:c3c46a90529612c8279fb6af803fc0df', 'upload_date': '20150502', 'timestamp': 1430586422, + 'duration': 121, + 'comment_count': int, + 'view_count': int, + 'thumbnail': 'https://i.vimeocdn.com/video/517077723-7066ae1d9a79d3eb361334fb5d58ec13c8f04b52f8dd5eadfbd6fb0bcf11f613-d_1280', + 'like_count': int, }, 'params': { 'skip_download': True, @@ -1011,7 +1086,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): 'id': 'tributes', 'title': 'Vimeo Tributes', }, - 'playlist_mincount': 25, + 'playlist_mincount': 22, }] _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s' @@ -1196,6 +1271,9 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'uploader': 'Richard Hardwick', 'uploader_id': 'user21297594', 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks", + 'duration': 304, + 'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280', + 'uploader_url': 'https://vimeo.com/user21297594', }, }, { 'note': 'video player needs Referer', From c70c418d33cb6b840f27f03ec020577bc86203c0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 23 Mar 2022 08:26:36 +0530 Subject: [PATCH 0839/2552] Fix `--abort-on-error` for subtitles Closes #3163 --- yt_dlp/YoutubeDL.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d075a82bc..80c7944f3 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3859,9 +3859,12 @@ class YoutubeDL(object): sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' if self.params.get('ignoreerrors') is not True: # False or 'only_download' - raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err) - self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') + if not self.params.get('ignoreerrors'): + self.report_error(msg) + raise DownloadError(msg) + self.report_warning(msg) return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): From c2d2ee40eb168ef9f433b645271a55d821c327e5 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Wed, 23 Mar 2022 15:28:53 +0900 Subject: [PATCH 0840/2552] [generic] Extract subtitles from video.js (#3156) Authored by: Lesmiscore --- yt_dlp/extractor/generic.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 97e34808f..4a2e30158 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -17,6 +17,7 @@ from ..compat import ( ) from ..utils import ( determine_ext, + dict_get, ExtractorError, float_or_none, HEADRequest, @@ -31,6 +32,7 @@ from ..utils import ( parse_resolution, sanitized_Request, smuggle_url, + str_or_none, unescapeHTML, unified_timestamp, unsmuggle_url, @@ -3778,11 +3780,12 @@ class GenericIE(InfoExtractor): # Video.js embed mobj = re.search( - r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;', + r'(?s)\bvideojs\s*\(.+?([a-zA-Z0-9_$]+)\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;', webpage) if mobj is not None: + varname = mobj.group(1) sources = self._parse_json( - mobj.group(1), video_id, transform_source=js_to_json, + mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [] if not isinstance(sources, list): sources = [sources] @@ -3819,6 +3822,21 @@ class GenericIE(InfoExtractor): 'Referer': full_response.geturl(), }, }) + # https://docs.videojs.com/player#addRemoteTextTrack + # https://html.spec.whatwg.org/multipage/media.html#htmltrackelement + for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage): + sub = self._parse_json( + sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {} + src = str_or_none(sub.get('src')) + if not src: + continue + subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({ + 'url': compat_urlparse.urljoin(url, src), + 'name': sub.get('label'), + 'http_headers': { + 'Referer': full_response.geturl(), + }, + }) if formats or subtitles: self.report_detected('video.js embed') self._sort_formats(formats) From ea5ca8e7fcf348a40e3eda70738738739123bded Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 23 Mar 2022 12:36:16 +0530 Subject: [PATCH 0841/2552] [ellentube] Extract subtitles from manifest Fixes https://github.com/ytdl-org/youtube-dl/issues/30761 --- yt_dlp/extractor/ellentube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/ellentube.py b/yt_dlp/extractor/ellentube.py index 544473274..d451bc048 100644 --- a/yt_dlp/extractor/ellentube.py +++ b/yt_dlp/extractor/ellentube.py @@ -26,7 +26,7 @@ class EllenTubeBaseIE(InfoExtractor): duration = None for entry in data.get('media'): if entry.get('id') == 'm3u8': - formats = self._extract_m3u8_formats( + formats, subtitles = self._extract_m3u8_formats_and_subtitles( entry['url'], video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') duration = int_or_none(entry.get('duration')) @@ -48,6 +48,7 @@ class EllenTubeBaseIE(InfoExtractor): 'view_count': get_insight('view'), 'like_count': get_insight('like'), 'formats': formats, + 'subtitles': subtitles, } From af14914baac50c7969bfd4fd9741cb5f4250c0e7 Mon Sep 17 00:00:00 2001 From: coletdev Date: Wed, 23 Mar 2022 20:47:02 +1300 Subject: [PATCH 0842/2552] Remove `Accept-Encoding` header from `std_headers` (#3153) This should be set by each downloader to what it supports. Fixes https://github.com/yt-dlp/yt-dlp/issues/3142 Authored-by: coletdjnz --- yt_dlp/downloader/external.py | 4 ++-- yt_dlp/utils.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index cfa646ebc..c900b3842 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -169,7 +169,7 @@ class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '--location', '-o', tmpfilename] + cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] @@ -219,7 +219,7 @@ class WgetFD(ExternalFD): AVAILABLE_OPT = '--version' def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] + cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a08dc3c11..c71a7b833 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -159,7 +159,6 @@ if compat_brotli: std_headers = { 'User-Agent': random_user_agent(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Encoding': ', '.join(SUPPORTED_ENCODINGS), 'Accept-Language': 'en-us,en;q=0.5', 'Sec-Fetch-Mode': 'navigate', } @@ -1401,6 +1400,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): if h.capitalize() not in req.headers: req.add_header(h, v) + if 'Accept-encoding' not in req.headers: + req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS)) + req.headers = handle_youtubedl_headers(req.headers) if sys.version_info < (2, 7) and '#' in req.get_full_url(): From b8f2f8f6b30e5d846a988e73bfe5a6f5c37da730 Mon Sep 17 00:00:00 2001 From: mehq <11481344+mehq@users.noreply.github.com> Date: Thu, 24 Mar 2022 00:35:42 +0600 Subject: [PATCH 0843/2552] [LastFM] Add extractors (#3141) Closes #2967 Authored by: mehq --- yt_dlp/extractor/extractors.py | 5 ++ yt_dlp/extractor/lastfm.py | 129 +++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 yt_dlp/extractor/lastfm.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4eda27cdc..867304e75 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -731,6 +731,11 @@ from .laola1tv import ( EHFTVIE, ITTFIE, ) +from .lastfm import ( + LastFMIE, + LastFMPlaylistIE, + LastFMUserIE, +) from .lbry import ( LBRYIE, LBRYChannelIE, diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py new file mode 100644 index 000000000..5215717e8 --- /dev/null +++ b/yt_dlp/extractor/lastfm.py @@ -0,0 +1,129 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none, format_field + + +class LastFMPlaylistBaseIE(InfoExtractor): + def _entries(self, url, playlist_id): + webpage = self._download_webpage(url, playlist_id) + start_page_number = int_or_none(self._search_regex( + r'\bpage=(\d+)', url, 'page', default=None)) or 1 + last_page_number = int_or_none(self._search_regex( + r'>(\d+)[^<]*[^<]*]+class="pagination-next', webpage, 'last_page', default=None)) + + for page_number in range(start_page_number, (last_page_number or start_page_number) + 1): + webpage = self._download_webpage( + url, playlist_id, + note='Downloading page %d%s' % (page_number, format_field(last_page_number, template=' of %d')), + query={'page': page_number}) + page_entries = [ + self.url_result(player_url, 'Youtube') + for player_url in set(re.findall(r'data-youtube-url="([^"]+)"', webpage)) + ] + + for e in page_entries: + yield e + + def _real_extract(self, url): + playlist_id = self._match_id(url) + return self.playlist_result(self._entries(url, playlist_id), playlist_id) + + +class LastFMPlaylistIE(LastFMPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?last\.fm/(music|tag)/(?P[^/]+)(?:/[^/]+)?/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.last.fm/music/Oasis/(What%27s+the+Story)+Morning+Glory%3F', + 'info_dict': { + 'id': 'Oasis', + }, + 'playlist_count': 11, + }, { + 'url': 'https://www.last.fm/music/Oasis', + 'only_matching': True, + }, { + 'url': 'https://www.last.fm/music/Oasis/', + 'only_matching': True, + }, { + 'url': 'https://www.last.fm/music/Oasis?top_tracks_date_preset=ALL#top-tracks', + 'only_matching': True, + }, { + 'url': 'https://www.last.fm/music/Oasis/+tracks', + 'only_matching': True, + }, { + 'url': 'https://www.last.fm/music/Oasis/+tracks?page=2', + 'only_matching': True, + }, { + 'url': 'https://www.last.fm/music/Oasis/+tracks?date_preset=LAST_90_DAYS#top-tracks', + 'only_matching': True, + }, { + 'url': 'https://www.last.fm/tag/rock', + 'only_matching': True, + }, { + 'url': 'https://www.last.fm/tag/rock/tracks', + 'only_matching': True, + }] + + +class LastFMUserIE(LastFMPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?last\.fm/user/[^/]+/playlists/(?P[^/#?]+)' + _TESTS = [{ + 'url': 'https://www.last.fm/user/mehq/playlists/12319471', + 'info_dict': { + 'id': '12319471', + }, + 'playlist_count': 30, + }] + + +class LastFMIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?last\.fm/music(?:/[^/]+){2}/(?P[^/#?]+)' + _TESTS = [{ + 'url': 'https://www.last.fm/music/Oasis/_/Wonderwall', + 'md5': '9c4a70c2e84c03d54fe24229b9e13b7b', + 'info_dict': { + 'id': '6hzrDeceEKc', + 'ext': 'mp4', + 'title': 'Oasis - Wonderwall (Official Video)', + 'thumbnail': r're:^https?://i.ytimg.com/.*\.jpg$', + 'description': 'md5:0848669853c10687cc28e88b5756738f', + 'uploader': 'Oasis', + 'uploader_id': 'oasisinetofficial', + 'upload_date': '20080207', + 'album': '(What\'s The Story) Morning Glory? (Remastered)', + 'track': 'Wonderwall (Remastered)', + 'channel_id': 'UCUDVBtnOQi4c7E8jebpjc9Q', + 'view_count': int, + 'live_status': 'not_live', + 'channel_url': 'https://www.youtube.com/channel/UCUDVBtnOQi4c7E8jebpjc9Q', + 'tags': 'count:39', + 'creator': 'Oasis', + 'uploader_url': 're:^https?://www.youtube.com/user/oasisinetofficial', + 'duration': 279, + 'alt_title': 'Wonderwall (Remastered)', + 'age_limit': 0, + 'channel': 'Oasis', + 'channel_follower_count': int, + 'categories': ['Music'], + 'availability': 'public', + 'like_count': int, + 'playable_in_embed': True, + 'artist': 'Oasis', + }, + 'add_ie': ['Youtube'], + }, { + 'url': 'https://www.last.fm/music/Oasis/_/Don%27t+Look+Back+In+Anger+-+Remastered/', + 'only_matching': True, + }, { + 'url': 'https://www.last.fm/music/Guns+N%27+Roses/_/Sweet+Child+o%27+Mine', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_url = self._search_regex(r'(?s)class="header-new-playlink"\s+href="([^"]+)"', webpage, 'player_url') + return self.url_result(player_url, 'Youtube') From 12a64f27772f699f61b50340370c5b44b71aa7ad Mon Sep 17 00:00:00 2001 From: vvto33 <54504675+vvto33@users.noreply.github.com> Date: Thu, 24 Mar 2022 10:11:13 +0900 Subject: [PATCH 0844/2552] [TVer] Support landing page (#3075) Authored by: vvto33 --- yt_dlp/extractor/tver.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index b8ac41483..9ff3136e2 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -14,7 +14,7 @@ from ..utils import ( class TVerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P(?:corner|episode|feature)/(?Pf?\d+))' + _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?Pcorner|episode|feature|lp|tokyo2020/video)/(?P[fc]?\d+)' # videos are only available for 7 days _TESTS = [{ 'url': 'https://tver.jp/corner/f0062178', @@ -29,6 +29,15 @@ class TVerIE(InfoExtractor): # subtitle = ' ' 'url': 'https://tver.jp/corner/f0068870', 'only_matching': True, + }, { + 'url': 'https://tver.jp/lp/f0009694', + 'only_matching': True, + }, { + 'url': 'https://tver.jp/lp/c0000239', + 'only_matching': True, + }, { + 'url': 'https://tver.jp/tokyo2020/video/6264525510001', + 'only_matching': True, }] _TOKEN = None BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' @@ -39,9 +48,11 @@ class TVerIE(InfoExtractor): def _real_extract(self, url): path, video_id = self._match_valid_url(url).groups() - api_response = self._download_json( - 'https://api.tver.jp/v4/' + path, video_id, - query={'token': self._TOKEN}) + if path == 'lp': + webpage = self._download_webpage(url, video_id) + redirect_path = self._search_regex(r'to_href="([^"]+)', webpage, 'redirect path') + path, video_id = self._match_valid_url(f'https://tver.jp{redirect_path}').groups() + api_response = self._download_json(f'https://api.tver.jp/v4/{path}/{video_id}', video_id, query={'token': self._TOKEN}) p_id = traverse_obj(api_response, ('main', 'publisher_id')) if not p_id: error_msg, expected = traverse_obj(api_response, ('episode', 0, 'textbar', 0, ('text', 'longer')), get_all=False), True From 700ccbe3f14e9b1f3708a562efc57361c14dee68 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 24 Mar 2022 05:13:57 +0530 Subject: [PATCH 0845/2552] [extractor] Allow control characters inside json Closes #3174 --- yt_dlp/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f3ae3fd4c..e1a661033 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1033,7 +1033,7 @@ class InfoExtractor(object): if transform_source: json_string = transform_source(json_string) try: - return json.loads(json_string) + return json.loads(json_string, strict=False) except ValueError as ve: errmsg = '%s: Failed to parse JSON ' % video_id if fatal: From 231025c4632d7a48540b18997c715a8df8a6ff8e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 24 Mar 2022 06:53:11 +0530 Subject: [PATCH 0846/2552] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0 Closes #3173 --- devscripts/make_supportedsites.py | 5 ++--- yt_dlp/__init__.py | 4 ++-- yt_dlp/extractor/common.py | 1 + 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 4c11e25f2..729f60a0e 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -24,10 +24,9 @@ def main(): def gen_ies_md(ies): for ie in ies: ie_md = '**{0}**'.format(ie.IE_NAME) - ie_desc = getattr(ie, 'IE_DESC', None) - if ie_desc is False: + if ie.IE_DESC is False: continue - if ie_desc is not None: + if ie.IE_DESC is not None: ie_md += ': {0}'.format(ie.IE_DESC) search_key = getattr(ie, 'SEARCH_KEY', None) if search_key is not None: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index a433c4513..a445d8621 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -94,9 +94,9 @@ def print_extractor_information(opts, urls): for ie in list_extractors(opts.age_limit): if not ie.working(): continue - desc = getattr(ie, 'IE_DESC', ie.IE_NAME) - if desc is False: + if ie.IE_DESC is False: continue + desc = ie.IE_DESC or ie.IE_NAME if getattr(ie, 'SEARCH_KEY', None) is not None: _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') _COUNTS = ('', '5', '10', 'all') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e1a661033..a2e41db3b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -469,6 +469,7 @@ class InfoExtractor(object): _GEO_IP_BLOCKS = None _WORKING = True _NETRC_MACHINE = None + IE_DESC = None _LOGIN_HINTS = { 'any': 'Use --cookies, --cookies-from-browser, --username and --password, or --netrc to provide account credentials', From ae6a1b95857c7ba1392e37be99b9ee2b66b14b1d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 24 Mar 2022 07:00:22 +0530 Subject: [PATCH 0847/2552] [docs] Minor improvements Closes #3127, Closes #3081, Closes #3177 --- Collaborators.md | 1 + README.md | 30 +++++++++++++++--------------- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/extractor/common.py | 6 +++--- yt_dlp/options.py | 2 +- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/Collaborators.md b/Collaborators.md index 29d320dd6..52e3b9cae 100644 --- a/Collaborators.md +++ b/Collaborators.md @@ -29,6 +29,7 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho * YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements * Added support for downloading YoutubeWebArchive videos +* Added support for new websites MainStreaming, PRX, nzherald, etc diff --git a/README.md b/README.md index 373711499..a2e2b34c3 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,14 @@ [![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) -[![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=blue&label=Download&style=for-the-badge)](#release-files "Release") -[![License: Unlicense](https://img.shields.io/badge/-Unlicense-brightgreen.svg?style=for-the-badge)](LICENSE "License") +[![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#release-files "Release") +[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") +[![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") +[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") [![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") -[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") +[![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") [![CI Status](https://img.shields.io/github/workflow/status/yt-dlp/yt-dlp/Core%20Tests/master?label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") -[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") -[![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") [![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") @@ -76,7 +76,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) -* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico improvements are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. +* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **Youtube improvements**: * All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) and private playlists supports downloading multiple pages of content @@ -202,7 +202,7 @@ python3 -m pip install --no-deps -U yt-dlp If you want to be on the cutting edge, you can also install the master branch with: ``` -python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip +python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz ``` Note that on some systems, you may need to use `py` or `python` instead of `python3` @@ -230,14 +230,14 @@ If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps File|Description :---|:--- -[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independant binary. Needs Python (recommended for **UNIX-like systems**) +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independant binary. Needs Python (recommended for **Linux/BSD**) [yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) +[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable (recommended for **MacOS**) #### Alternatives File|Description :---|:--- -[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32-bit) binary [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) @@ -1154,11 +1154,11 @@ The available fields are: - `license` (string): License name the video is licensed under - `creator` (string): The creator of the video - `timestamp` (numeric): UNIX timestamp of the moment the video became available - - `upload_date` (string): Video upload date (YYYYMMDD) + - `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - - `release_date` (string): The date (YYYYMMDD) when the video was released + - `release_date` (string): The date (YYYYMMDD) when the video was released in UTC - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified - - `modified_date` (string): The date (YYYYMMDD) when the video was last modified + - `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC - `uploader_id` (string): Nickname or id of the video uploader - `channel` (string): Full name of the channel the video is uploaded on - `channel_id` (string): Id of the channel @@ -1363,7 +1363,7 @@ You can also use special names to select particular edge case formats: - `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]` - `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]` - `ba`, `bestaudio`: Select the best quality **audio-only** format. Equivalent to `best*[vcodec=none]` - - `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]` + - `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]` ([Do not use!](https://github.com/yt-dlp/yt-dlp/issues/979#issuecomment-919629354)) - `w*`, `worst*`: Select the worst quality format that contains either a video or an audio - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]` - `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]` @@ -1371,7 +1371,7 @@ You can also use special names to select particular edge case formats: - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]` - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]` -For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details. +For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details. You can select the n'th best format of a type by using `best.`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream. @@ -1677,7 +1677,7 @@ The following extractors use this feature: * `language`: Languages to extract. Eg: `crunchyroll:language=jaJp` * `hardsub`: Which hard-sub versions to extract. Eg: `crunchyroll:hardsub=None,enUS` -#### crunchyroll:beta +#### crunchyrollbeta * `format`: Which stream type(s) to extract. Default is `adaptive_hls` Eg: `crunchyrollbeta:format=vo_adaptive_hls` * Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `trailer_hls`, `trailer_dash` * `hardsub`: Preference order for which hardsub versions to extract. Default is `None` (no hardsubs). Eg: `crunchyrollbeta:hardsub=en-US,None` diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 80c7944f3..740f9a7bd 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -788,7 +788,7 @@ class YoutubeDL(object): def to_stdout(self, message, skip_eol=False, quiet=None): """Print message to stdout""" if quiet is not None: - self.deprecation_warning('"ydl.to_stdout" no longer accepts the argument quiet. Use "ydl.to_screen" instead') + self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead') self._write_string( '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), self._out_files['print']) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a2e41db3b..d3d13c40c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -248,14 +248,14 @@ class InfoExtractor(object): license: License name the video is licensed under. creator: The creator of the video. timestamp: UNIX timestamp of the moment the video was uploaded - upload_date: Video upload date (YYYYMMDD). + upload_date: Video upload date in UTC (YYYYMMDD). If not explicitly set, calculated from timestamp release_timestamp: UNIX timestamp of the moment the video was released. If it is not clear whether to use timestamp or this, use the former - release_date: The date (YYYYMMDD) when the video was released. + release_date: The date (YYYYMMDD) when the video was released in UTC. If not explicitly set, calculated from release_timestamp modified_timestamp: UNIX timestamp of the moment the video was last modified. - modified_date: The date (YYYYMMDD) when the video was last modified. + modified_date: The date (YYYYMMDD) when the video was last modified in UTC. If not explicitly set, calculated from modified_timestamp uploader_id: Nickname or id of the video uploader. uploader_url: Full URL to a personal webpage of the video uploader. diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 3c81da6c3..123a07bfa 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1316,7 +1316,7 @@ def create_parser(): postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', - help='Specify ffmpeg audio quality, insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)') + help='Specify ffmpeg audio quality to use when converting the audio with -x. Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)') postproc.add_option( '--remux-video', metavar='FORMAT', dest='remuxvideo', default=None, From ae72962643aa43b1e1037293b226acc71fb34297 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 25 Mar 2022 05:00:41 +0530 Subject: [PATCH 0848/2552] [youtube] Try embedded client variants before agegate agegate variants appears to be broken, but don't remove them for the time-being --- yt_dlp/extractor/youtube.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d74d5b0e9..8ee688798 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2940,13 +2940,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') - original_clients = clients + all_clients = set(clients) clients = clients[::-1] prs = [] - def append_client(client_name): - if client_name in INNERTUBE_CLIENTS and client_name not in original_clients: - clients.append(client_name) + def append_client(*client_names): + """ Append the first client name that exists """ + for client_name in client_names: + if client_name in INNERTUBE_CLIENTS: + if client_name not in all_clients: + clients.append(client_name) + all_clients.add(client_name) + return # Android player_response does not have microFormats which are needed for # extraction of some data. So we return the initial_pr with formats @@ -2992,7 +2997,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated: append_client(client.replace('_agegate', '_creator')) elif self._is_agegated(pr): - append_client(f'{client}_agegate') + append_client(f'{client}_embedded', f'{client.replace("_embedded", "")}_agegate') if last_error: if not len(prs): From a3f2445e295134e3eb312dc464069eccc01b4880 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 25 Mar 2022 08:31:45 +0530 Subject: [PATCH 0849/2552] [postprocessor,cleanup] Create `_download_json` --- yt_dlp/postprocessor/common.py | 31 ++++++++++++++++++++++++++-- yt_dlp/postprocessor/sponsorblock.py | 28 ++----------------------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index d761c9303..8420ee864 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -1,13 +1,18 @@ from __future__ import unicode_literals import functools +import itertools +import json import os +import time +import urllib.error -from ..compat import compat_str from ..utils import ( _configuration_args, encodeFilename, + network_exceptions, PostProcessingError, + sanitized_Request, write_string, ) @@ -63,7 +68,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass): @classmethod def pp_key(cls): name = cls.__name__[:-2] - return compat_str(name[6:]) if name[:6].lower() == 'ffmpeg' else name + return name[6:] if name[:6].lower() == 'ffmpeg' else name def to_screen(self, text, prefix=True, *args, **kwargs): tag = '[%s] ' % self.PP_NAME if prefix else '' @@ -180,6 +185,28 @@ class PostProcessor(metaclass=PostProcessorMetaClass): progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s', progress_dict)) + def _download_json(self, url, *, expected_http_errors=(404,)): + # While this is not an extractor, it behaves similar to one and + # so obey extractor_retries and sleep_interval_requests + max_retries = self.get_param('extractor_retries', 3) + sleep_interval = self.get_param('sleep_interval_requests') or 0 + + self.write_debug(f'{self.PP_NAME} query: {url}') + for retries in itertools.count(): + try: + rsp = self._downloader.urlopen(sanitized_Request(url)) + return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) + except network_exceptions as e: + if isinstance(e, urllib.error.HTTPError) and e.code in expected_http_errors: + return None + if retries < max_retries: + self.report_warning(f'{e}. Retrying...') + if sleep_interval > 0: + self.to_screen(f'Sleeping {sleep_interval} seconds ...') + time.sleep(sleep_interval) + continue + raise PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}') + class AudioConversionError(PostProcessingError): pass diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index e7e04e86e..7943014e2 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -1,12 +1,9 @@ from hashlib import sha256 -import itertools import json import re -import time from .ffmpeg import FFmpegPostProcessor -from ..compat import compat_urllib_parse_urlencode, compat_HTTPError -from ..utils import PostProcessingError, network_exceptions, sanitized_Request +from ..compat import compat_urllib_parse_urlencode class SponsorBlockPP(FFmpegPostProcessor): @@ -94,28 +91,7 @@ class SponsorBlockPP(FFmpegPostProcessor): 'categories': json.dumps(self._categories), 'actionTypes': json.dumps(['skip', 'poi']) }) - self.write_debug(f'SponsorBlock query: {url}') - for d in self._get_json(url): + for d in self._download_json(url) or []: if d['videoID'] == video_id: return d['segments'] return [] - - def _get_json(self, url): - # While this is not an extractor, it behaves similar to one and - # so obey extractor_retries and sleep_interval_requests - max_retries = self.get_param('extractor_retries', 3) - sleep_interval = self.get_param('sleep_interval_requests') or 0 - for retries in itertools.count(): - try: - rsp = self._downloader.urlopen(sanitized_Request(url)) - return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) - except network_exceptions as e: - if isinstance(e, compat_HTTPError) and e.code == 404: - return [] - if retries < max_retries: - self.report_warning(f'{e}. Retrying...') - if sleep_interval > 0: - self.to_screen(f'Sleeping {sleep_interval} seconds ...') - time.sleep(sleep_interval) - continue - raise PostProcessingError(f'Unable to communicate with SponsorBlock API: {e}') From 1fb707badb35d01d4ad7831a19f3469de2ef9141 Mon Sep 17 00:00:00 2001 From: zackmark29 <62680932+zackmark29@users.noreply.github.com> Date: Fri, 25 Mar 2022 11:23:54 +0800 Subject: [PATCH 0850/2552] [viu] Fixed extractor (#3136) Closes #3133 Authored by: zackmark29, pukkandan --- yt_dlp/extractor/viu.py | 216 +++++++++++++++++++--------------------- 1 file changed, 103 insertions(+), 113 deletions(-) diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index b633df95d..abd553f18 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -1,55 +1,32 @@ # coding: utf-8 from __future__ import unicode_literals -import json import re +import json +import uuid +import random +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_kwargs, - compat_str, - compat_urlparse, - compat_urllib_request, -) +from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, + strip_or_none, try_get, smuggle_url, unsmuggle_url, + url_or_none, ) class ViuBaseIE(InfoExtractor): - def _real_initialize(self): - viu_auth_res = self._request_webpage( - 'https://www.viu.com/api/apps/v2/authenticate', None, - 'Requesting Viu auth', query={ - 'acct': 'test', - 'appid': 'viu_desktop', - 'fmt': 'json', - 'iid': 'guest', - 'languageid': 'default', - 'platform': 'desktop', - 'userid': 'guest', - 'useridtype': 'guest', - 'ver': '1.0' - }, headers=self.geo_verification_headers()) - self._auth_token = viu_auth_res.info()['X-VIU-AUTH'] - - def _call_api(self, path, *args, **kwargs): - headers = self.geo_verification_headers() - headers.update({ - 'X-VIU-AUTH': self._auth_token - }) - headers.update(kwargs.get('headers', {})) - kwargs['headers'] = headers + def _call_api(self, path, *args, headers={}, **kwargs): response = self._download_json( - 'https://www.viu.com/api/' + path, *args, - **compat_kwargs(kwargs))['response'] + f'https://www.viu.com/api/{path}', *args, **kwargs, + headers={**self.geo_verification_headers(), **headers})['response'] if response.get('status') != 'success': - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, response['message']), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {response["message"]}', expected=True) return response @@ -101,6 +78,7 @@ class ViuIE(ViuBaseIE): tdirforwhole = video_data.get('tdirforwhole') # #EXT-X-BYTERANGE is not supported by native hls downloader # and ffmpeg (#10955) + # FIXME: It is supported in yt-dlp # hls_file = video_data.get('hlsfile') hls_file = video_data.get('jwhlsfile') if url_path and tdirforwhole and hls_file: @@ -227,42 +205,63 @@ class ViuOTTIE(InfoExtractor): 'zh-cn': 2, 'en-us': 3, } - _user_info = None + + _user_token = None + _auth_codes = {} def _detect_error(self, response): - code = response.get('status', {}).get('code') - if code > 0: + code = try_get(response, lambda x: x['status']['code']) + if code and code > 0: message = try_get(response, lambda x: x['status']['message']) - raise ExtractorError('%s said: %s (%s)' % ( - self.IE_NAME, message, code), expected=True) - return response['data'] - - def _raise_login_required(self): - raise ExtractorError( - 'This video requires login. ' - 'Specify --username and --password or --netrc (machine: %s) ' - 'to provide account credentials.' % self._NETRC_MACHINE, - expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {message} ({code})', expected=True) + return response.get('data') or {} def _login(self, country_code, video_id): - if not self._user_info: + if self._user_token is None: username, password = self._get_login_info() - if username is None or password is None: + if username is None: return + headers = { + 'Authorization': f'Bearer {self._auth_codes[country_code]}', + 'Content-Type': 'application/json' + } + data = self._download_json( + 'https://api-gateway-global.viu.com/api/account/validate', + video_id, 'Validating email address', headers=headers, + data=json.dumps({ + 'principal': username, + 'provider': 'email' + }).encode()) + if not data.get('exists'): + raise ExtractorError('Invalid email address') data = self._download_json( - compat_urllib_request.Request( - 'https://www.viu.com/ott/%s/index.php' % country_code, method='POST'), - video_id, 'Logging in', errnote=False, fatal=False, - query={'r': 'user/login'}, + 'https://api-gateway-global.viu.com/api/auth/login', + video_id, 'Logging in', headers=headers, data=json.dumps({ - 'username': username, + 'email': username, 'password': password, - 'platform_flag_label': 'web', + 'provider': 'email', }).encode()) - self._user_info = self._detect_error(data)['user'] - - return self._user_info + self._detect_error(data) + self._user_token = data.get('identity') + # need to update with valid user's token else will throw an error again + self._auth_codes[country_code] = data.get('token') + return self._user_token + + def _get_token(self, country_code, video_id): + rand = ''.join(random.choice('0123456789') for _ in range(10)) + return self._download_json( + f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id, + headers={'Content-Type': 'application/json'}, note='Getting bearer token', + data=json.dumps({ + 'countryCode': country_code.upper(), + 'platform': 'browser', + 'platformFlagLabel': 'web', + 'language': 'en', + 'uuid': str(uuid.uuid4()), + 'carrierId': '0' + }).encode('utf-8'))['token'] def _real_extract(self, url): url, idata = unsmuggle_url(url, {}) @@ -279,16 +278,16 @@ class ViuOTTIE(InfoExtractor): query['area_id'] = area_id product_data = self._download_json( - 'http://www.viu.com/ott/%s/index.php' % country_code, video_id, + f'http://www.viu.com/ott/{country_code}/index.php', video_id, 'Downloading video info', query=query)['data'] video_data = product_data.get('current_product') if not video_data: - raise ExtractorError('This video is not available in your region.', expected=True) + self.raise_geo_restricted() series_id = video_data.get('series_id') if self._yes_playlist(series_id, video_id, idata): - series = product_data.get('series', {}) + series = product_data.get('series') or {} product = series.get('product') if product: entries = [] @@ -296,14 +295,10 @@ class ViuOTTIE(InfoExtractor): item_id = entry.get('product_id') if not item_id: continue - item_id = compat_str(item_id) entries.append(self.url_result( - smuggle_url( - 'http://www.viu.com/ott/%s/%s/vod/%s/' % (country_code, lang_code, item_id), - {'force_noplaylist': True}), # prevent infinite recursion - 'ViuOTT', - item_id, - entry.get('synopsis', '').strip())) + smuggle_url(f'http://www.viu.com/ott/{country_code}/{lang_code}/vod/{item_id}/', + {'force_noplaylist': True}), + ViuOTTIE, str(item_id), entry.get('synopsis', '').strip())) return self.playlist_result(entries, series_id, series.get('name'), series.get('description')) @@ -312,69 +307,65 @@ class ViuOTTIE(InfoExtractor): 'ccs_product_id': video_data['ccs_product_id'], 'language_flag_id': self._LANGUAGE_FLAG.get(lang_code.lower()) or '3', } - headers = { - 'Referer': url, - 'Origin': url, - } - try: + + def download_playback(): stream_data = self._download_json( - 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code, - video_id, 'Downloading stream info', query=query, headers=headers) - stream_data = self._detect_error(stream_data)['stream'] - except (ExtractorError, KeyError): - stream_data = None - if video_data.get('user_level', 0) > 0: - user = self._login(country_code, video_id) - if user: - query['identity'] = user['identity'] - stream_data = self._download_json( - 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code, - video_id, 'Downloading stream info', query=query, headers=headers) - stream_data = self._detect_error(stream_data).get('stream') - else: - # preview is limited to 3min for non-members - # try to bypass the duration limit - duration_limit = True - query['duration'] = '180' - stream_data = self._download_json( - 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code, - video_id, 'Downloading stream info', query=query, headers=headers) - try: - stream_data = self._detect_error(stream_data)['stream'] - except (ExtractorError, KeyError): # if still not working, give up - self._raise_login_required() + 'https://api-gateway-global.viu.com/api/playback/distribute', + video_id=video_id, query=query, fatal=False, note='Downloading stream info', + headers={ + 'Authorization': f'Bearer {self._auth_codes[country_code]}', + 'Referer': url, + 'Origin': url + }) + return self._detect_error(stream_data).get('stream') + + if not self._auth_codes.get(country_code): + self._auth_codes[country_code] = self._get_token(country_code, video_id) + stream_data = None + try: + stream_data = download_playback() + except (ExtractorError, KeyError): + token = self._login(country_code, video_id) + if token is not None: + query['identity'] = token + else: + # preview is limited to 3min for non-members. But we can try to bypass it + duration_limit, query['duration'] = True, '180' + try: + stream_data = download_playback() + except (ExtractorError, KeyError): + if token is not None: + raise + self.raise_login_required(method='password') if not stream_data: raise ExtractorError('Cannot get stream info', expected=True) - stream_sizes = stream_data.get('size', {}) formats = [] - for vid_format, stream_url in stream_data.get('url', {}).items(): - height = int_or_none(self._search_regex( - r's(\d+)p', vid_format, 'height', default=None)) + for vid_format, stream_url in (stream_data.get('url') or {}).items(): + height = int(self._search_regex(r's(\d+)p', vid_format, 'height', default=None)) # bypass preview duration limit if duration_limit: - stream_url = compat_urlparse.urlparse(stream_url) - query = dict(compat_urlparse.parse_qsl(stream_url.query, keep_blank_values=True)) - time_duration = int_or_none(video_data.get('time_duration')) + stream_url = urllib.parse.urlparse(stream_url) query.update({ - 'duration': time_duration if time_duration > 0 else '9999999', + 'duration': video_data.get('time_duration') or '9999999', 'duration_start': '0', }) - stream_url = stream_url._replace(query=compat_urlparse.urlencode(query)).geturl() + stream_url = stream_url._replace(query=urllib.parse.urlencode(dict( + urllib.parse.parse_qsl(stream_url.query, keep_blank_values=True)))).geturl() formats.append({ 'format_id': vid_format, 'url': stream_url, 'height': height, 'ext': 'mp4', - 'filesize': int_or_none(stream_sizes.get(vid_format)) + 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int) }) self._sort_formats(formats) subtitles = {} - for sub in video_data.get('subtitle', []): + for sub in video_data.get('subtitle') or []: sub_url = sub.get('url') if not sub_url: continue @@ -383,17 +374,16 @@ class ViuOTTIE(InfoExtractor): 'ext': 'srt', }) - title = video_data['synopsis'].strip() - + title = strip_or_none(video_data.get('synopsis')) return { 'id': video_id, 'title': title, 'description': video_data.get('description'), - 'series': product_data.get('series', {}).get('name'), + 'series': try_get(product_data, lambda x: x['series']['name']), 'episode': title, 'episode_number': int_or_none(video_data.get('number')), 'duration': int_or_none(stream_data.get('duration')), - 'thumbnail': video_data.get('cover_image_url'), + 'thumbnail': url_or_none(video_data.get('cover_image_url')), 'formats': formats, 'subtitles': subtitles, } From 28787f16c6811cc4f2cc067d5739caf257b3ea75 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 25 Mar 2022 12:38:33 +0530 Subject: [PATCH 0851/2552] [downloader] Fix invocation of `HttpieFD` Closes #3154 --- yt_dlp/downloader/external.py | 36 ++++++++++++++++++++--------------- yt_dlp/utils.py | 8 ++++++++ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index c900b3842..71af705ea 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -13,6 +13,7 @@ from ..compat import ( ) from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( + classproperty, cli_option, cli_valueless_option, cli_bool_option, @@ -73,17 +74,23 @@ class ExternalFD(FragmentFD): def get_basename(cls): return cls.__name__[:-2].lower() + @classproperty + def EXE_NAME(cls): + return cls.get_basename() + @property def exe(self): - return self.get_basename() + return self.EXE_NAME @classmethod def available(cls, path=None): - path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT]) - if path: - cls.exe = path - return path - return False + path = check_executable( + cls.EXE_NAME if path in (None, cls.get_basename()) else path, + [cls.AVAILABLE_OPT]) + if not path: + return False + cls.exe = path + return path @classmethod def supports(cls, info_dict): @@ -106,7 +113,7 @@ class ExternalFD(FragmentFD): def _configuration_args(self, keys=None, *args, **kwargs): return _configuration_args( - self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(), + self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME, keys, *args, **kwargs) def _call_downloader(self, tmpfilename, info_dict): @@ -306,10 +313,7 @@ class Aria2cFD(ExternalFD): class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' - - @classmethod - def available(cls, path=None): - return super().available(path or 'http') + EXE_NAME = 'http' def _make_cmd(self, tmpfilename, info_dict): cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] @@ -510,11 +514,13 @@ class AVconvFD(FFmpegFD): pass -_BY_NAME = dict( - (klass.get_basename(), klass) +_BY_NAME = { + klass.get_basename(): klass for name, klass in globals().items() if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') -) +} + +_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()} def list_external_downloaders(): @@ -526,4 +532,4 @@ def get_external_downloader(external_downloader): downloader . """ # Drop .exe extension on Windows bn = os.path.splitext(os.path.basename(external_downloader))[0] - return _BY_NAME.get(bn) + return _BY_NAME.get(bn, _BY_EXE.get(bn)) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index c71a7b833..4961ba14d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5502,3 +5502,11 @@ has_websockets = bool(compat_websockets) def merge_headers(*dicts): """Merge dicts of http headers case insensitively, prioritizing the latter ones""" return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} + + +class classproperty: + def __init__(self, f): + self.f = f + + def __get__(self, _, cls): + return self.f(cls) From b1a7cd056a4613b49f93aa249f6c7ecf5a828185 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 25 Mar 2022 13:32:54 +0530 Subject: [PATCH 0852/2552] Treat multiple `--match-filters` as OR Closes #3144 --- yt_dlp/options.py | 19 +++++++++---------- yt_dlp/utils.py | 12 +++++++----- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 123a07bfa..34a2e1103 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -465,19 +465,18 @@ def create_parser(): metavar='COUNT', dest='max_views', default=None, type=int, help=optparse.SUPPRESS_HELP) selection.add_option( - '--match-filter', - metavar='FILTER', dest='match_filter', default=None, + '--match-filters', + metavar='FILTER', dest='match_filter', action='append', help=( 'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a ' 'number or a string using the operators defined in "Filtering formats". ' - 'You can also simply specify a field to match if the field is present ' - 'and "!field" to check if the field is not present. In addition, ' - 'Python style regular expression matching can be done using "~=", ' - 'and multiple filters can be checked with "&". ' - 'Use a "\\" to escape "&" or quotes if needed. Eg: --match-filter ' - '"!is_live & like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" ' - 'matches only videos that are not live, has a like count more than 100 ' - '(or the like field is not available), and also has a description ' + 'You can also simply specify a field to match if the field is present, ' + 'use "!field" to check if the field is not present, and "&" to check multiple conditions. ' + 'Use a "\\" to escape "&" or quotes if needed. If used multiple times, ' + 'the filter matches if atleast one of the conditions are met. Eg: --match-filter ' + '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" ' + 'matches only videos that are not live OR those that have a like count more than 100 ' + '(or the like field is not available) and also has a description ' 'that contains the phrase "cats & dogs" (ignoring case)')) selection.add_option( '--no-match-filter', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4961ba14d..4de5f9626 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3614,16 +3614,18 @@ def match_str(filter_str, dct, incomplete=False): for filter_part in re.split(r'(? Date: Fri, 25 Mar 2022 17:24:39 +0900 Subject: [PATCH 0853/2552] [utils] WebSocketsWrapper: Allow omitting `__enter__` invocation (#3187) Authored by: Lesmiscore --- yt_dlp/extractor/fc2.py | 1 - yt_dlp/utils.py | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index 7fc6b0e3d..54a83aa16 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -212,7 +212,6 @@ class FC2LiveIE(InfoExtractor): 'Accept': '*/*', 'User-Agent': std_headers['User-Agent'], }) - ws.__enter__() self.write_debug('[debug] Sending HLS server request') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4de5f9626..e359c6bba 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5436,15 +5436,18 @@ class Config: class WebSocketsWrapper(): """Wraps websockets module to use in non-async scopes""" - def __init__(self, url, headers=None): + def __init__(self, url, headers=None, connect=True): self.loop = asyncio.events.new_event_loop() self.conn = compat_websockets.connect( url, extra_headers=headers, ping_interval=None, close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) + if connect: + self.__enter__() atexit.register(self.__exit__, None, None, None) def __enter__(self): - self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) + if not self.pool: + self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) return self def send(self, *args): From 6db9c4d57d033fb22c94a2e6f1ecf0207e700b4c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 25 Mar 2022 14:06:46 +0530 Subject: [PATCH 0854/2552] Ignore format-specific fields in initial pass of `--match-filter` Closes #3074 --- test/helper.py | 10 +--------- test/test_YoutubeDL.py | 2 +- yt_dlp/YoutubeDL.py | 12 +++++++++++- yt_dlp/utils.py | 16 ++++++++++++---- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/test/helper.py b/test/helper.py index 1070e0668..28c21b2eb 100644 --- a/test/helper.py +++ b/test/helper.py @@ -196,15 +196,7 @@ def expect_dict(self, got_dict, expected_dict): def sanitize_got_info_dict(got_dict): IGNORED_FIELDS = ( - # Format keys - 'url', 'manifest_url', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution', - 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize', - 'filesize_approx', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'preference', - 'language', 'language_preference', 'quality', 'source_preference', 'http_headers', - 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', - - # RTMP formats - 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', + *YoutubeDL._format_fields, # Lists 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries', diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 7637297be..f9b40501d 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -931,7 +931,7 @@ class TestYoutubeDL(unittest.TestCase): res = get_videos() self.assertEqual(res, ['1', '2']) - def f(v): + def f(v, incomplete): if v['id'] == '1': return None else: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 740f9a7bd..5771fbcf7 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -513,6 +513,16 @@ class YoutubeDL(object): 'track_number', 'disc_number', 'release_year', )) + _format_fields = { + # NB: Keep in sync with the docstring of extractor/common.py + 'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note', + 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', + 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', + 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', + 'preference', 'language', 'language_preference', 'quality', 'source_preference', + 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', + 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' + } _format_selection_exts = { 'audio': {'m4a', 'mp3', 'ogg', 'aac'}, 'video': {'mp4', 'flv', 'webm', '3gp'}, @@ -2541,7 +2551,7 @@ class YoutubeDL(object): info_dict, _ = self.pre_process(info_dict) - if self._match_entry(info_dict) is not None: + if self._match_entry(info_dict, incomplete=self._format_fields) is not None: return info_dict self.post_extract(info_dict) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index e359c6bba..e9eaf7b4e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3545,6 +3545,11 @@ def _match_one(filter_part, dct, incomplete): '=': operator.eq, } + if isinstance(incomplete, bool): + is_incomplete = lambda _: incomplete + else: + is_incomplete = lambda k: k in incomplete + operator_rex = re.compile(r'''(?x)\s* (?P[a-z_]+) \s*(?P!\s*)?(?P%s)(?P\s*\?)?\s* @@ -3583,7 +3588,7 @@ def _match_one(filter_part, dct, incomplete): if numeric_comparison is not None and m['op'] in STRING_OPERATORS: raise ValueError('Operator %s only supports string values!' % m['op']) if actual_value is None: - return incomplete or m['none_inclusive'] + return is_incomplete(m['key']) or m['none_inclusive'] return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison) UNARY_OPERATORS = { @@ -3598,7 +3603,7 @@ def _match_one(filter_part, dct, incomplete): if m: op = UNARY_OPERATORS[m.group('op')] actual_value = dct.get(m.group('key')) - if incomplete and actual_value is None: + if is_incomplete(m.group('key')) and actual_value is None: return True return op(actual_value) @@ -3606,8 +3611,11 @@ def _match_one(filter_part, dct, incomplete): def match_str(filter_str, dct, incomplete=False): - """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false - When incomplete, all conditions passes on missing fields + """ Filter a dictionary with a simple string syntax. + @returns Whether the filter passes + @param incomplete Set of keys that is expected to be missing from dct. + Can be True/False to indicate all/none of the keys may be missing. + All conditions on incomplete keys pass if the key is missing """ return all( _match_one(filter_part.replace(r'\&', '&'), dct, incomplete) From 34baa9fdf050895c4a09107964d396079da5bb45 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 26 Mar 2022 07:39:36 +0530 Subject: [PATCH 0855/2552] [outtmpl] Fix replacement/default when used with alternate --- test/test_YoutubeDL.py | 2 ++ yt_dlp/YoutubeDL.py | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f9b40501d..c9108c5b6 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -818,6 +818,8 @@ class TestYoutubeDL(unittest.TestCase): test('%(id&foo)s.bar', 'foo.bar') test('%(title&foo)s.bar', 'NA.bar') test('%(title&foo|baz)s.bar', 'baz.bar') + test('%(x,id&foo|baz)s.bar', 'foo.bar') + test('%(x,title&foo|baz)s.bar', 'baz.bar') # Laziness def gen(): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5771fbcf7..478bdacca 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1099,10 +1099,11 @@ class YoutubeDL(object): (?P{field}) (?P(?:{math_op}{math_field})*) (?:>(?P.+?))? - (?P(?.*?))? - (?:\|(?P.*?))? - $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) + (?P + (?P(?.*?))? + (?:\|(?P.*?))? + )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) def _traverse_infodict(k): k = k.split('.') @@ -1173,7 +1174,7 @@ class YoutubeDL(object): value = get_value(mobj) replacement = mobj['replacement'] if value is None and mobj['alternate']: - mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:]) + mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:]) else: break From 5cf34021f502c56cb8436613400d7ef510f722cc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 27 Mar 2022 04:52:11 +0530 Subject: [PATCH 0856/2552] [Concat] Ensure final directory exists Fixes https://github.com/yt-dlp/yt-dlp/issues/3181#issuecomment-1079622589 --- yt_dlp/postprocessor/ffmpeg.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 234ddeff0..ec618b040 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -1129,6 +1129,8 @@ class FFmpegConcatPP(FFmpegPostProcessor): super().__init__(downloader) def concat_files(self, in_files, out_file): + if not self._downloader._ensure_dir_exists(out_file): + return if len(in_files) == 1: if os.path.realpath(in_files[0]) != os.path.realpath(out_file): self.to_screen(f'Moving "{in_files[0]}" to "{out_file}"') From 4a3175fc4cff22343bd23c6cb7d40dbd7d0ccbf5 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 27 Mar 2022 09:27:38 +0530 Subject: [PATCH 0857/2552] [VideoConvertor] Ensure all streams are copied Closes #3200 --- yt_dlp/postprocessor/ffmpeg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index ec618b040..a5ed6d184 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -553,9 +553,9 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor): @staticmethod def _options(target_ext): + yield from FFmpegPostProcessor.stream_copy_opts(False) if target_ext == 'avi': - return ['-c:v', 'libxvid', '-vtag', 'XVID'] - return [] + yield from ('-c:v', 'libxvid', '-vtag', 'XVID') @PostProcessor._restrict_to(images=False) def run(self, info): From fd2ad7cb245423e49db1be9d9654c7dd3103619a Mon Sep 17 00:00:00 2001 From: coletdev Date: Sun, 27 Mar 2022 18:20:25 +1300 Subject: [PATCH 0858/2552] [youtube:tab] Return shorts url if video is a short (#3168) Allows filtering out shorts from feeds with `--match-filter` Closes #3165 Authored-by: coletdjnz --- yt_dlp/extractor/youtube.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 8ee688798..0726e27b4 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -818,12 +818,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor): renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str) badges = self._extract_badges(renderer) thumbnails = self._extract_thumbnails(renderer, 'thumbnail') + navigation_url = urljoin('https://www.youtube.com/', traverse_obj( + renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str)) + url = f'https://www.youtube.com/watch?v={video_id}' + if overlay_style == 'SHORTS' or (navigation_url and '/shorts/' in navigation_url): + url = f'https://www.youtube.com/shorts/{video_id}' return { '_type': 'url', 'ie_key': YoutubeIE.ie_key(), 'id': video_id, - 'url': f'https://www.youtube.com/watch?v={video_id}', + 'url': url, 'title': title, 'description': description, 'duration': duration, From 5c3895fff150871fde273a10c55691403931b4dc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 27 Mar 2022 10:04:04 +0530 Subject: [PATCH 0859/2552] [outtmpl] Limit changes during sanitization Closes #2761 --- README.md | 1 + test/test_utils.py | 6 ++++-- yt_dlp/YoutubeDL.py | 7 +++++-- yt_dlp/options.py | 2 +- yt_dlp/utils.py | 34 +++++++++++++++++++--------------- 5 files changed, 30 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index a2e2b34c3..c184d23c4 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` +* youtube-dl tries to remove some superfluous punctuations from filenames. While this can sometimes be helpfull, it is often undesirable. So yt-dlp tries to keep the fields in the filenames as close to their original values as possible. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options diff --git a/test/test_utils.py b/test/test_utils.py index a7f1b0e94..660ce03bf 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -160,10 +160,12 @@ class TestUtil(unittest.TestCase): sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') - self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') + self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') - self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') + self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf') + self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf') self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') + self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf') forbidden = '"\0\\/' for fc in forbidden: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 478bdacca..c2f4f3a95 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -87,6 +87,7 @@ from .utils import ( MaxDownloadsReached, merge_headers, network_exceptions, + NO_DEFAULT, number_of_digits, orderedSet, OUTTMPL_TYPES, @@ -1150,8 +1151,10 @@ class YoutubeDL(object): na = self.params.get('outtmpl_na_placeholder', 'NA') def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): - return sanitize_filename(str(value), restricted=restricted, - is_id=re.search(r'(^|[_.])id(\.|$)', key)) + return sanitize_filename(str(value), restricted=restricted, is_id=( + bool(re.search(r'(^|[_.])id(\.|$)', key)) + if 'filename-sanitization' in self.params.get('compat_opts', []) + else NO_DEFAULT)) sanitizer = sanitize if callable(sanitize) else filename_sanitizer sanitize = bool(sanitize) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 34a2e1103..eb306898a 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -338,7 +338,7 @@ def create_parser(): action='callback', callback=_set_from_options_callback, callback_kwargs={ 'allowed_values': { - 'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', + 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index e9eaf7b4e..6854dbb63 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -705,36 +705,40 @@ def timeconvert(timestr): return timestamp -def sanitize_filename(s, restricted=False, is_id=False): +def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT): """Sanitizes a string so it could be used as part of a filename. - If restricted is set, use a stricter subset of allowed characters. - Set is_id if this is not an arbitrary string, but an ID that should be kept - if possible. + @param restricted Use a stricter subset of allowed characters + @param is_id Whether this is an ID that should be kept unchanged if possible. + If unset, yt-dlp's new sanitization rules are in effect """ + if s == '': + return '' + def replace_insane(char): if restricted and char in ACCENT_CHARS: return ACCENT_CHARS[char] elif not restricted and char == '\n': - return ' ' + return '\0 ' elif char == '?' or ord(char) < 32 or ord(char) == 127: return '' elif char == '"': return '' if restricted else '\'' elif char == ':': - return '_-' if restricted else ' -' + return '\0_\0-' if restricted else '\0 \0-' elif char in '\\/|*<>': - return '_' - if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()): - return '_' - if restricted and ord(char) > 127: - return '_' + return '\0_' + if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127): + return '\0_' return char - if s == '': - return '' - # Handle timestamps - s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) + s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps result = ''.join(map(replace_insane, s)) + if is_id is NO_DEFAULT: + result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars + STRIP_RE = '(?:\0.|[ _-])*' + result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end + result = result.replace('\0', '') or '_' + if not is_id: while '__' in result: result = result.replace('__', '_') From 727029c50889d4c36dcfcffa2be6cdec309edf56 Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Sun, 27 Mar 2022 01:28:26 -0400 Subject: [PATCH 0860/2552] [youtube] Detect DRM better Authored by: shirt-dev --- yt_dlp/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 0726e27b4..bd3a7d36b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3023,7 +3023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[]) for fmt in streaming_formats: - if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): + if fmt.get('targetDurationSec'): continue itag = str_or_none(fmt.get('itag')) @@ -3105,6 +3105,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fps': int_or_none(fmt.get('fps')) or None, 'height': height, 'quality': q(quality), + 'has_drm': bool(fmt.get('drmFamilies')), 'tbr': tbr, 'url': fmt_url, 'width': int_or_none(fmt.get('width')), From c0b6e5c74d59d5260441ceaeb5dac3626780b31b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 27 Mar 2022 11:38:26 +0530 Subject: [PATCH 0861/2552] Show warning when all media formats have DRM Related: #1379 --- yt_dlp/YoutubeDL.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index c2f4f3a95..434bef65f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2459,6 +2459,11 @@ class YoutubeDL(object): info_dict['__has_drm'] = any(f.get('has_drm') for f in formats) if not self.params.get('allow_unplayable_formats'): formats = [f for f in formats if not f.get('has_drm')] + if info_dict['__has_drm'] and all( + f.get('acodec') == f.get('vcodec') == 'none' for f in formats): + self.report_warning( + 'This video is DRM protected and only images are available for download. ' + 'Use --list-formats to see them') get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start')) if not get_from_start: @@ -2631,8 +2636,9 @@ class YoutubeDL(object): if not formats_to_download: if not self.params.get('ignore_no_formats_error'): - raise ExtractorError('Requested format is not available', expected=True, - video_id=info_dict['id'], ie=info_dict['extractor']) + raise ExtractorError( + 'Requested format is not available. Use --list-formats for a list of available formats', + expected=True, video_id=info_dict['id'], ie=info_dict['extractor']) self.report_warning('Requested format is not available') # Process what we can, even without any available formats. formats_to_download = [{}] From 18e49408259fa26c4d6f89f6d7ffdce59888d151 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 27 Mar 2022 14:04:20 +0530 Subject: [PATCH 0862/2552] [youtube] Add extractor-arg to skip auto-translated subs --- README.md | 2 +- yt_dlp/extractor/youtube.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c184d23c4..955a98557 100644 --- a/README.md +++ b/README.md @@ -1656,7 +1656,7 @@ Some extractors accept additional arguments which can be passed using `--extract The following extractors use this feature: #### youtube -* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests +* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and auto-translated subtitles respectively * `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index bd3a7d36b..1655f99e3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3493,6 +3493,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not trans_code: continue if caption_track.get('kind') != 'asr': + if 'translated_subs' in self._configuration_arg('skip'): + continue trans_code += f'-{lang_code}' trans_name += format_field(lang_name, template=' from %s') # Add an "-orig" label to the original language so that it can be distinguished. From 1235d333ab07456d6bd66b867c88e92a1a11a526 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 27 Mar 2022 14:06:26 +0530 Subject: [PATCH 0863/2552] [youtube] Fix auto-translated automatic captions d49669acad71f640ffd8b78f0ea7911ae1f67720 only covered ASR Closes #2956 --- yt_dlp/extractor/youtube.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1655f99e3..4d7e79fbf 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3479,6 +3479,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): subtitles, automatic_captions = {}, {} for lang_code, caption_track in captions.items(): base_url = caption_track.get('baseUrl') + orig_lang = parse_qs(base_url).get('lang', [None])[-1] if not base_url: continue lang_name = self._get_text(caption_track, 'name', max_runs=1) @@ -3492,6 +3493,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for trans_code, trans_name in translation_languages.items(): if not trans_code: continue + orig_trans_code = trans_code if caption_track.get('kind') != 'asr': if 'translated_subs' in self._configuration_arg('skip'): continue @@ -3499,14 +3501,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): trans_name += format_field(lang_name, template=' from %s') # Add an "-orig" label to the original language so that it can be distinguished. # The subs are returned without "-orig" as well for compatibility - if lang_code == f'a-{trans_code}': + if lang_code == f'a-{orig_trans_code}': process_language( automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {}) # Setting tlang=lang returns damaged subtitles. - # Not using lang_code == f'a-{trans_code}' here for future-proofing - orig_lang = parse_qs(base_url).get('lang', [None])[-1] process_language(automatic_captions, base_url, trans_code, trans_name, - {} if orig_lang == trans_code else {'tlang': trans_code}) + {} if orig_lang == orig_trans_code else {'tlang': trans_code}) info['automatic_captions'] = automatic_captions info['subtitles'] = subtitles From 5b4bb715e65581b23214001379ebb13ab814edde Mon Sep 17 00:00:00 2001 From: mehq <11481344+mehq@users.noreply.github.com> Date: Sun, 27 Mar 2022 14:57:05 +0600 Subject: [PATCH 0864/2552] [BanBye] Add extractor (#3177) Closes #3175 Authored by: mehq --- yt_dlp/extractor/banbye.py | 153 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 4 + 2 files changed, 157 insertions(+) create mode 100644 yt_dlp/extractor/banbye.py diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py new file mode 100644 index 000000000..3d4d36ec3 --- /dev/null +++ b/yt_dlp/extractor/banbye.py @@ -0,0 +1,153 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import math + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlparse, + compat_parse_qs, +) +from ..utils import ( + format_field, + InAdvancePagedList, + traverse_obj, + unified_timestamp, +) + + +class BanByeBaseIE(InfoExtractor): + _API_BASE = 'https://api.banbye.com' + _CDN_BASE = 'https://cdn.banbye.com' + _VIDEO_BASE = 'https://banbye.com/watch' + + @staticmethod + def _extract_playlist_id(url, param='playlist'): + return compat_parse_qs( + compat_urllib_parse_urlparse(url).query).get(param, [None])[0] + + def _extract_playlist(self, playlist_id): + data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id) + return self.playlist_result([ + self.url_result(f'{self._VIDEO_BASE}/{video_id}', BanByeIE) + for video_id in data['videoIds']], playlist_id, data.get('name')) + + +class BanByeIE(BanByeBaseIE): + _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P\w+)' + _TESTS = [{ + 'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T', + 'md5': '2f4ea15c5ca259a73d909b2cfd558eb5', + 'info_dict': { + 'id': 'v_ytfmvkVYLE8T', + 'ext': 'mp4', + 'title': 'md5:5ec098f88a0d796f987648de6322ba0f', + 'description': 'md5:4d94836e73396bc18ef1fa0f43e5a63a', + 'uploader': 'wRealu24', + 'channel_id': 'ch_wrealu24', + 'channel_url': 'https://banbye.com/channel/ch_wrealu24', + 'timestamp': 1647604800, + 'upload_date': '20220318', + 'duration': 1931, + 'thumbnail': r're:https?://.*\.webp', + 'tags': 'count:5', + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + 'comment_count': int, + }, + }, { + 'url': 'https://banbye.com/watch/v_2JjQtqjKUE_F?playlistId=p_Ld82N6gBw_OJ', + 'info_dict': { + 'title': 'Krzysztof Karoń', + 'id': 'p_Ld82N6gBw_OJ', + }, + 'playlist_count': 9, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + playlist_id = self._extract_playlist_id(url, 'playlistId') + + if self._yes_playlist(playlist_id, video_id): + return self._extract_playlist(playlist_id) + + data = self._download_json(f'{self._API_BASE}/videos/{video_id}', video_id) + thumbnails = [{ + 'id': f'{quality}p', + 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp', + } for quality in [48, 96, 144, 240, 512, 1080]] + formats = [{ + 'format_id': f'http-{quality}p', + 'quality': quality, + 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4', + } for quality in data['quality']] + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': data.get('title'), + 'description': data.get('desc'), + 'uploader': traverse_obj(data, ('channel', 'name')), + 'channel_id': data.get('channelId'), + 'channel_url': format_field(data, 'channelId', 'https://banbye.com/channel/%s'), + 'timestamp': unified_timestamp(data.get('publishedAt')), + 'duration': data.get('duration'), + 'tags': data.get('tags'), + 'formats': formats, + 'thumbnails': thumbnails, + 'like_count': data.get('likes'), + 'dislike_count': data.get('dislikes'), + 'view_count': data.get('views'), + 'comment_count': data.get('commentCount'), + } + + +class BanByeChannelIE(BanByeBaseIE): + _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?channel/(?P\w+)' + _TESTS = [{ + 'url': 'https://banbye.com/channel/ch_wrealu24', + 'info_dict': { + 'title': 'wRealu24', + 'id': 'ch_wrealu24', + 'description': 'md5:da54e48416b74dfdde20a04867c0c2f6', + }, + 'playlist_mincount': 791, + }, { + 'url': 'https://banbye.com/channel/ch_wrealu24?playlist=p_Ld82N6gBw_OJ', + 'info_dict': { + 'title': 'Krzysztof Karoń', + 'id': 'p_Ld82N6gBw_OJ', + }, + 'playlist_count': 9, + }] + _PAGE_SIZE = 100 + + def _real_extract(self, url): + channel_id = self._match_id(url) + playlist_id = self._extract_playlist_id(url) + + if playlist_id: + return self._extract_playlist(playlist_id) + + def page_func(page_num): + data = self._download_json(f'{self._API_BASE}/videos', channel_id, query={ + 'channelId': channel_id, + 'sort': 'new', + 'limit': self._PAGE_SIZE, + 'offset': page_num * self._PAGE_SIZE, + }, note=f'Downloading page {page_num+1}') + return [ + self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE) + for video in data['items'] + ] + + channel_data = self._download_json(f'{self._API_BASE}/channels/{channel_id}', channel_id) + entries = InAdvancePagedList( + page_func, + math.ceil(channel_data['videoCount'] / self._PAGE_SIZE), + self._PAGE_SIZE) + + return self.playlist_result( + entries, channel_id, channel_data.get('name'), channel_data.get('description')) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 867304e75..7a9fd9467 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -122,6 +122,10 @@ from .awaan import ( ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE +from .banbye import ( + BanByeIE, + BanByeChannelIE, +) from .bandaichannel import BandaiChannelIE from .bandcamp import ( BandcampIE, From 4628a3aa751ac0b2161b216662f0e959eb9bd206 Mon Sep 17 00:00:00 2001 From: Tim Schindler Date: Sun, 27 Mar 2022 11:00:38 +0200 Subject: [PATCH 0865/2552] [ITProTV] Add extractor (#3196) Authored by: aaearon --- yt_dlp/extractor/extractors.py | 6 ++ yt_dlp/extractor/itprotv.py | 141 +++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 yt_dlp/extractor/itprotv.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 7a9fd9467..de88a0f4a 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -678,6 +678,12 @@ from .iqiyi import ( IqIE, IqAlbumIE ) + +from .itprotv import ( + ITProTVIE, + ITProTVCourseIE +) + from .itv import ( ITVIE, ITVBTCCIE, diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py new file mode 100644 index 000000000..64cb4e69a --- /dev/null +++ b/yt_dlp/extractor/itprotv.py @@ -0,0 +1,141 @@ +# coding: utf-8 + +import re + +from .common import InfoExtractor + +from ..utils import ( + int_or_none, + str_or_none, + traverse_obj, + urljoin +) + + +class ITProTVBaseIE(InfoExtractor): + _ENDPOINTS = { + 'course': 'course?url={}&brand=00002560-0000-3fa9-0000-1d61000035f3', + 'episode': 'brand/00002560-0000-3fa9-0000-1d61000035f3/episode?url={}' + } + + def _call_api(self, ep, item_id, webpage): + return self._download_json( + f'https://api.itpro.tv/api/urza/v3/consumer-web/{self._ENDPOINTS[ep].format(item_id)}', + item_id, note=f'Fetching {ep} data API', + headers={'Authorization': f'Bearer {self._fetch_jwt(webpage)}'})[ep] + + def _fetch_jwt(self, webpage): + return self._search_regex(r'{"passedToken":"([\w-]+\.[\w-]+\.[\w-]+)",', webpage, 'jwt') + + def _check_if_logged_in(self, webpage): + if re.match(r'{\s*member\s*:\s*null', webpage): + self.raise_login_required() + + +class ITProTVIE(ITProTVBaseIE): + _VALID_URL = r'https://app.itpro.tv/course/(?P[\w-]+)/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://app.itpro.tv/course/guided-tour/introductionitprotv', + 'md5': 'bca4a28c2667fd1a63052e71a94bb88c', + 'info_dict': { + 'id': 'introductionitprotv', + 'ext': 'mp4', + 'title': 'An Introduction to ITProTV 101', + 'thumbnail': 'https://itprotv-image-bucket.s3.amazonaws.com/getting-started/itprotv-101-introduction-PGM.11_39_56_02.Still001.png', + 'description': 'md5:b175c2c3061ce35a4dd33865b2c1da4e', + 'duration': 269, + 'series': 'ITProTV 101', + 'series_id': 'guided-tour', + 'availability': 'needs_auth', + 'chapter': 'ITProTV 101', + 'chapter_number': 1, + 'chapter_id': '5dbb3de426b46c0010b5d1b6' + }, + }, + { + 'url': 'https://app.itpro.tv/course/beyond-tech/job-interview-tips', + 'md5': '101a299b98c47ccf4c67f9f0951defa8', + 'info_dict': { + 'id': 'job-interview-tips', + 'ext': 'mp4', + 'title': 'Job Interview Tips', + 'thumbnail': 'https://s3.amazonaws.com:443/production-itprotv-thumbnails/2f370bf5-294d-4bbe-ab80-c0b5781630ea.png', + 'description': 'md5:30d8ba483febdf89ec85623aad3c3cb6', + 'duration': 267, + 'series': 'Beyond Tech', + 'series_id': 'beyond-tech', + 'availability': 'needs_auth', + 'chapter': 'Job Development', + 'chapter_number': 2, + 'chapter_id': '5f7c78d424330c000edf04d9' + }, + }] + + def _real_extract(self, url): + episode_id, course_name = self._match_valid_url(url).group('id', 'course') + webpage = self._download_webpage(url, episode_id) + self._check_if_logged_in(webpage) + course = self._call_api('course', course_name, webpage) + episode = self._call_api('episode', episode_id, webpage) + + chapter_number, chapter = next(( + (i, topic) for i, topic in enumerate(course.get('topics') or [], 1) + if traverse_obj(topic, 'id') == episode.get('topic')), {}) + + return { + 'id': episode_id, + 'title': episode.get('title'), + 'description': episode.get('description'), + 'thumbnail': episode.get('thumbnail'), + 'formats': [ + {'url': episode[f'jwVideo{h}Embed'], 'height': h} + for h in (320, 480, 720, 1080) if episode.get(f'jwVideo{h}Embed') + ], + 'duration': int_or_none(episode.get('length')), + 'series': course.get('name'), + 'series_id': course.get('url'), + 'chapter': str_or_none(chapter.get('title')), + 'chapter_number': chapter_number, + 'chapter_id': str_or_none(chapter.get('id')), + 'subtitles': { + 'en': [{'ext': 'vtt', 'data': episode['enCaptionData']}] + } if episode.get('enCaptionData') else None, + } + + +class ITProTVCourseIE(ITProTVBaseIE): + _VALID_URL = r'https?://app.itpro.tv/course/(?P[\w-]+)/?(?:$|[#?])' + _TESTS = [ + { + 'url': 'https://app.itpro.tv/course/guided-tour', + 'info_dict': { + 'id': 'guided-tour', + 'description': 'md5:b175c2c3061ce35a4dd33865b2c1da4e', + 'title': 'ITProTV 101', + }, + 'playlist_count': 6 + }, + { + 'url': 'https://app.itpro.tv/course/beyond-tech', + 'info_dict': { + 'id': 'beyond-tech', + 'description': 'md5:44cd99855e7f81a15ce1269bd0621fed', + 'title': 'Beyond Tech' + }, + 'playlist_count': 15 + }, + ] + + def _real_extract(self, url): + course_id = self._match_id(url) + webpage = self._download_webpage(url, course_id) + self._check_if_logged_in(webpage) + course = self._call_api('course', course_id, webpage) + + entries = [self.url_result( + urljoin(url, f'{course_id}/{episode["url"]}'), ITProTVIE, + episode['url'], episode.get('title'), url_transparent=True) + for episode in course['episodes']] + + return self.playlist_result( + entries, course_id, course.get('name'), course.get('description')) From 47b8bf207b1206466f1aeaaf0c8ffec91be2fed0 Mon Sep 17 00:00:00 2001 From: Felix S Date: Sun, 27 Mar 2022 09:35:14 +0000 Subject: [PATCH 0866/2552] [go,viu] Extract subtitles from the m3u8 manifest (#3219) Authored by: fstirlitz --- yt_dlp/extractor/go.py | 8 +++++--- yt_dlp/extractor/viu.py | 3 +-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index 2ccc6df21..f92e16600 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -217,6 +217,7 @@ class GoIE(AdobePassIE): title = video_data['title'] formats = [] + subtitles = {} for asset in video_data.get('assets', {}).get('asset', []): asset_url = asset.get('value') if not asset_url: @@ -256,8 +257,10 @@ class GoIE(AdobePassIE): error_message = ', '.join([error['message'] for error in errors]) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] - formats.extend(self._extract_m3u8_formats( - asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) + fmts, subs = self._extract_m3u8_formats_and_subtitles( + asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) else: f = { 'format_id': format_id, @@ -281,7 +284,6 @@ class GoIE(AdobePassIE): formats.append(f) self._sort_formats(formats) - subtitles = {} for cc in video_data.get('closedcaption', {}).get('src', []): cc_url = cc.get('value') if not cc_url: diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index abd553f18..b0a1fca68 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -88,10 +88,9 @@ class ViuIE(ViuBaseIE): # r'(/hlsc_)[a-z]+(\d+\.m3u8)', # r'\1whe\2', video_data['href']) m3u8_url = video_data['href'] - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4') self._sort_formats(formats) - subtitles = {} for key, value in video_data.items(): mobj = re.match(r'^subtitle_(?P[^_]+)_(?P(vtt|srt))', key) if not mobj: From 1c1b2f96ae9696ef16b1b27d1a007bf89c683a0c Mon Sep 17 00:00:00 2001 From: coletdev Date: Mon, 28 Mar 2022 13:49:42 +1300 Subject: [PATCH 0867/2552] [youtube:tab] Fix duration extraction for shorts (#3171) Related: https://github.com/TeamNewPipe/NewPipe/issues/8034 Authored-by: coletdjnz --- test/test_utils.py | 2 ++ yt_dlp/extractor/youtube.py | 6 ++++++ yt_dlp/utils.py | 12 ++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 660ce03bf..31f168998 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -627,6 +627,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('3h 11m 53s'), 11513) self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) + self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513) + self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513) self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('49s'), 49) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4d7e79fbf..e5097c264 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -807,6 +807,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): description = self._get_text(renderer, 'descriptionSnippet') duration = parse_duration(self._get_text( renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text'))) + if duration is None: + duration = parse_duration(self._search_regex( + r'(?i)(ago)(?!.*\1)\s+(?P[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$', + traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str), + video_id, default=None, group='duration')) + view_count = self._get_count(renderer, 'viewCountText') uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 6854dbb63..72f11691f 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2644,23 +2644,23 @@ def parse_duration(s): m = re.match( r'''(?ix)(?:P? (?: - [0-9]+\s*y(?:ears?)?\s* + [0-9]+\s*y(?:ears?)?,?\s* )? (?: - [0-9]+\s*m(?:onths?)?\s* + [0-9]+\s*m(?:onths?)?,?\s* )? (?: - [0-9]+\s*w(?:eeks?)?\s* + [0-9]+\s*w(?:eeks?)?,?\s* )? (?: - (?P[0-9]+)\s*d(?:ays?)?\s* + (?P[0-9]+)\s*d(?:ays?)?,?\s* )? T)? (?: - (?P[0-9]+)\s*h(?:ours?)?\s* + (?P[0-9]+)\s*h(?:ours?)?,?\s* )? (?: - (?P[0-9]+)\s*m(?:in(?:ute)?s?)?\s* + (?P[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s* )? (?: (?P[0-9]+)(?P\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* From 90137ca4bea0a22afec5bc6a0a2c8ff60ea76975 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 28 Mar 2022 08:21:45 +0530 Subject: [PATCH 0868/2552] [utils] Add `filter_dict` --- yt_dlp/YoutubeDL.py | 9 +++------ yt_dlp/extractor/common.py | 3 ++- yt_dlp/extractor/rai.py | 5 +++-- yt_dlp/utils.py | 12 ++++++------ 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 434bef65f..6c2b94f3c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -65,6 +65,7 @@ from .utils import ( ExistingVideoReached, expand_path, ExtractorError, + filter_dict, float_or_none, format_bytes, format_field, @@ -1574,13 +1575,9 @@ class YoutubeDL(object): if not info: return info - force_properties = dict( - (k, v) for k, v in ie_result.items() if v is not None) - for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): - if f in force_properties: - del force_properties[f] new_result = info.copy() - new_result.update(force_properties) + new_result.update(filter_dict(ie_result, lambda k, v: ( + v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'}))) # Extracted info may not be a video result (i.e. # info.get('_type', 'video') != video) but rather an url or diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d3d13c40c..d0e57da23 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -49,6 +49,7 @@ from ..utils import ( error_to_compat_str, extract_attributes, ExtractorError, + filter_dict, fix_xml_ampersands, float_or_none, format_field, @@ -1588,7 +1589,7 @@ class InfoExtractor(object): break traverse_json_ld(json_ld) - return dict((k, v) for k, v in info.items() if v is not None) + return filter_dict(info) def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw): return self._parse_json( diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 34f127285..9d243b2be 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( determine_ext, ExtractorError, + filter_dict, find_xpath_attr, fix_xml_ampersands, GeoRestrictedError, @@ -110,11 +111,11 @@ class RaiBaseIE(InfoExtractor): if not audio_only: formats.extend(self._create_http_urls(relinker_url, formats)) - return dict((k, v) for k, v in { + return filter_dict({ 'is_live': is_live, 'duration': duration, 'formats': formats, - }.items() if v is not None) + }) def _create_http_urls(self, relinker_url, fmts): _RELINKER_REG = r'https?://(?P[^/]+?)/(?:i/)?(?P[^/]+?)/(?P.+?)/(?P\d+)(?:_(?P[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?' diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 72f11691f..08e30d18f 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3105,16 +3105,16 @@ def try_get(src, getter, expected_type=None): return v +def filter_dict(dct, cndn=lambda _, v: v is not None): + return {k: v for k, v in dct.items() if cndn(k, v)} + + def merge_dicts(*dicts): merged = {} for a_dict in dicts: for k, v in a_dict.items(): - if v is None: - continue - if (k not in merged - or (isinstance(v, compat_str) and v - and isinstance(merged[k], compat_str) - and not merged[k])): + if (v is not None and k not in merged + or isinstance(v, str) and merged[k] == ''): merged[k] = v return merged From cb96c5be7002a1b16c1abbb11c2cd0239d86825a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 28 Mar 2022 08:44:55 +0530 Subject: [PATCH 0869/2552] Fix `--no-overwrite` for playlist infojson Fixes: https://github.com/yt-dlp/yt-dlp/issues/1467#issuecomment-1079922971 --- yt_dlp/YoutubeDL.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 6c2b94f3c..6a8e45b1a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1815,7 +1815,7 @@ class YoutubeDL(object): ie_result['entries'] = playlist_results # Write the updated info to json - if _infojson_written and self._write_info_json( + if _infojson_written is True and self._write_info_json( 'updated playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: return @@ -3786,7 +3786,7 @@ class YoutubeDL(object): return encoding def _write_info_json(self, label, ie_result, infofn, overwrite=None): - ''' Write infojson and returns True = written, False = skip, None = error ''' + ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error ''' if overwrite is None: overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): @@ -3798,14 +3798,15 @@ class YoutubeDL(object): return None elif not overwrite and os.path.exists(infofn): self.to_screen(f'[info] {label.title()} metadata is already present') - else: - self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') - try: - write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) - except (OSError, IOError): - self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') - return None - return True + return 'exists' + + self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') + try: + write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) + return True + except (OSError, IOError): + self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') + return None def _write_description(self, label, ie_result, descfn): ''' Write description and returns True = written, False = skip, None = error ''' From f5f15c9993cf8087753a7ba2b57fee55e366b80e Mon Sep 17 00:00:00 2001 From: Luc Ritchie Date: Sun, 27 Mar 2022 23:21:42 -0400 Subject: [PATCH 0870/2552] [BiliIntl] Support user-generated videos (#3203) Authored by: wlritchi --- yt_dlp/extractor/bilibili.py | 67 ++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index b4eb20642..dd1ff512e 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -15,6 +15,7 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + filter_dict, int_or_none, float_or_none, mimetype2ext, @@ -755,15 +756,21 @@ class BiliIntlBaseIE(InfoExtractor): for i, line in enumerate(json['body']) if line.get('content')) return data - def _get_subtitles(self, ep_id): - sub_json = self._call_api(f'/web/v2/subtitle?episode_id={ep_id}&platform=web', ep_id) + def _get_subtitles(self, *, ep_id=None, aid=None): + sub_json = self._call_api( + '/web/v2/subtitle', ep_id or aid, note='Downloading subtitles list', + errnote='Unable to download subtitles list', query=filter_dict({ + 'platform': 'web', + 'episode_id': ep_id, + 'aid': aid, + })) subtitles = {} for sub in sub_json.get('subtitles') or []: sub_url = sub.get('url') if not sub_url: continue sub_data = self._download_json( - sub_url, ep_id, errnote='Unable to download subtitles', fatal=False, + sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False, note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '') if not sub_data: continue @@ -773,9 +780,14 @@ class BiliIntlBaseIE(InfoExtractor): }) return subtitles - def _get_formats(self, ep_id): - video_json = self._call_api(f'/web/playurl?ep_id={ep_id}&platform=web', ep_id, - note='Downloading video formats', errnote='Unable to download video formats') + def _get_formats(self, *, ep_id=None, aid=None): + video_json = self._call_api( + '/web/playurl', ep_id or aid, note='Downloading video formats', + errnote='Unable to download video formats', query=filter_dict({ + 'platform': 'web', + 'ep_id': ep_id, + 'aid': aid, + })) video_json = video_json['playurl'] formats = [] for vid in video_json.get('video') or []: @@ -809,15 +821,15 @@ class BiliIntlBaseIE(InfoExtractor): self._sort_formats(formats) return formats - def _extract_ep_info(self, episode_data, ep_id): + def _extract_video_info(self, video_data, *, ep_id=None, aid=None): return { - 'id': ep_id, - 'title': episode_data.get('title_display') or episode_data['title'], - 'thumbnail': episode_data.get('cover'), + 'id': ep_id or aid, + 'title': video_data.get('title_display') or video_data.get('title'), + 'thumbnail': video_data.get('cover'), 'episode_number': int_or_none(self._search_regex( - r'^E(\d+)(?:$| - )', episode_data.get('title_display'), 'episode number', default=None)), - 'formats': self._get_formats(ep_id), - 'subtitles': self._get_subtitles(ep_id), + r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)), + 'formats': self._get_formats(ep_id=ep_id, aid=aid), + 'subtitles': self._get_subtitles(ep_id=ep_id, aid=aid), 'extractor_key': BiliIntlIE.ie_key(), } @@ -854,7 +866,7 @@ class BiliIntlBaseIE(InfoExtractor): class BiliIntlIE(BiliIntlBaseIE): - _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P\d+)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?(play/(?P\d+)/(?P\d+)|video/(?P\d+))' _TESTS = [{ # Bstation page 'url': 'https://www.bilibili.tv/en/play/34613/341736', @@ -889,24 +901,35 @@ class BiliIntlIE(BiliIntlBaseIE): }, { 'url': 'https://www.biliintl.com/en/play/34613/341736', 'only_matching': True, + }, { + # User-generated content (as opposed to a series licensed from a studio) + 'url': 'https://bilibili.tv/en/video/2019955076', + 'only_matching': True, + }, { + # No language in URL + 'url': 'https://www.bilibili.tv/video/2019955076', + 'only_matching': True, }] def _real_extract(self, url): - season_id, video_id = self._match_valid_url(url).groups() + season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') + video_id = ep_id or aid webpage = self._download_webpage(url, video_id) # Bstation layout initial_data = self._parse_json(self._search_regex( - r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage, + r'window\.__INITIAL_(?:DATA|STATE)__\s*=\s*({.+?});', webpage, 'preload state', default='{}'), video_id, fatal=False) or {} - episode_data = traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict) + video_data = ( + traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict) + or traverse_obj(initial_data, ('UgcVideo', 'videoData'), expected_type=dict) or {}) - if not episode_data: + if season_id and not video_data: # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) - episode_data = next( + video_data = next( episode for episode in traverse_obj(season_json, ('sections', ..., 'episodes', ...), expected_type=dict) - if str(episode.get('episode_id')) == video_id) - return self._extract_ep_info(episode_data, video_id) + if str(episode.get('episode_id')) == ep_id) + return self._extract_video_info(video_data, ep_id=ep_id, aid=aid) class BiliIntlSeriesIE(BiliIntlBaseIE): @@ -934,7 +957,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id) for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]): episode_id = str(episode.get('episode_id')) - yield self._extract_ep_info(episode, episode_id) + yield self._extract_video_info(episode, ep_id=episode_id) def _real_extract(self, url): series_id = self._match_id(url) From bdd60588b05a1cabd853858125522b920065cad8 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Mon, 28 Mar 2022 05:23:44 +0200 Subject: [PATCH 0871/2552] [viki] Don't attempt to modify URLs with signature (#3222) Closes #1379 Authored by: nyuszika7h --- yt_dlp/extractor/viki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py index 8234ba7df..8a930798d 100644 --- a/yt_dlp/extractor/viki.py +++ b/yt_dlp/extractor/viki.py @@ -261,7 +261,7 @@ class VikiIE(VikiBaseIE): mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest') mpd_url = self._search_regex( r'(?mi)(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url) - if 'mpdhd_high' not in mpd_url: + if 'mpdhd_high' not in mpd_url and 'sig=' not in mpd_url: # Modify the URL to get 1080p mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high') formats = self._extract_mpd_formats(mpd_url, video_id) From 9139d2fae08b1d5ff8a0f5d9a280b44e4fa8bf40 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Mon, 28 Mar 2022 10:27:41 +0700 Subject: [PATCH 0872/2552] [WasdTV] Add extractor (#3045) Closes #3041 Authored by: un-def, hatienl0i261299 --- yt_dlp/extractor/extractors.py | 5 + yt_dlp/extractor/wasdtv.py | 161 +++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 yt_dlp/extractor/wasdtv.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index de88a0f4a..e5ae12a7d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1977,6 +1977,11 @@ from .washingtonpost import ( WashingtonPostIE, WashingtonPostArticleIE, ) +from .wasdtv import ( + WASDTVStreamIE, + WASDTVRecordIE, + WASDTVClipIE, +) from .wat import WatIE from .watchbox import WatchBoxIE from .watchindianporn import WatchIndianPornIE diff --git a/yt_dlp/extractor/wasdtv.py b/yt_dlp/extractor/wasdtv.py new file mode 100644 index 000000000..38c10dc62 --- /dev/null +++ b/yt_dlp/extractor/wasdtv.py @@ -0,0 +1,161 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, + traverse_obj, + try_get, +) + + +class WASDTVBaseIE(InfoExtractor): + + def _fetch(self, path, video_id, description, query={}): + response = self._download_json( + f'https://wasd.tv/api/{path}', video_id, query=query, + note=f'Downloading {description} metadata', + errnote=f'Unable to download {description} metadata') + error = response.get('error') + if error: + raise ExtractorError(f'{self.IE_NAME} returned error: {error}', expected=True) + return response.get('result') + + def _extract_thumbnails(self, thumbnails_dict): + return [{ + 'url': url, + 'preference': index, + } for index, url in enumerate( + traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url] + + def _real_extract(self, url): + container = self._get_container(url) + stream = traverse_obj(container, ('media_container_streams', 0)) + media = try_get(stream, lambda x: x['stream_media'][0]) + if not media: + raise ExtractorError('Can not extract media data.', expected=True) + media_meta = media.get('media_meta') + media_url, is_live = self._get_media_url(media_meta) + video_id = media.get('media_id') or container.get('media_container_id') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4') + self._sort_formats(formats) + return { + 'id': str(video_id), + 'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)), + 'description': container.get('media_container_description'), + 'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images')), + 'timestamp': parse_iso8601(container.get('created_at')), + 'view_count': int_or_none(stream.get('stream_current_viewers' if is_live else 'stream_total_viewers')), + 'is_live': is_live, + 'formats': formats, + 'subtitles': subtitles, + } + + def _get_container(self, url): + raise NotImplementedError('Subclass for get media container') + + def _get_media_url(self, media_meta): + raise NotImplementedError('Subclass for get media url') + + +class WASDTVStreamIE(WASDTVBaseIE): + IE_NAME = 'wasdtv:stream' + _VALID_URL = r'https?://wasd\.tv/(?P[^/#?]+)$' + _TESTS = [{ + 'url': 'https://wasd.tv/24_7', + 'info_dict': { + 'id': '559738', + 'ext': 'mp4', + 'title': 'Live 24/7 Music', + 'description': '24/7 Music', + 'timestamp': int, + 'upload_date': r're:^\d{8}$', + 'is_live': True, + 'view_count': int, + }, + }] + + def _get_container(self, url): + nickname = self._match_id(url) + channel = self._fetch(f'channels/nicknames/{nickname}', video_id=nickname, description='channel') + channel_id = channel.get('channel_id') + containers = self._fetch( + 'v2/media-containers', channel_id, 'running media containers', + query={ + 'channel_id': channel_id, + 'media_container_type': 'SINGLE', + 'media_container_status': 'RUNNING', + }) + if not containers: + raise ExtractorError(f'{nickname} is offline', expected=True) + return containers[0] + + def _get_media_url(self, media_meta): + return media_meta['media_url'], True + + +class WASDTVRecordIE(WASDTVBaseIE): + IE_NAME = 'wasdtv:record' + _VALID_URL = r'https?://wasd\.tv/[^/#?]+/videos\?record=(?P\d+)$' + _TESTS = [{ + 'url': 'https://wasd.tv/spacemita/videos?record=907755', + 'md5': 'c9899dd85be4cc997816ff9f9ca516ce', + 'info_dict': { + 'id': '906825', + 'ext': 'mp4', + 'title': 'Музыкальный', + 'description': 'md5:f510388d929ff60ae61d4c3cab3137cc', + 'timestamp': 1645812079, + 'upload_date': '20220225', + 'thumbnail': r're:^https?://.+\.jpg', + 'is_live': False, + 'view_count': int, + }, + }] + + def _get_container(self, url): + container_id = self._match_id(url) + return self._fetch( + f'v2/media-containers/{container_id}', container_id, 'media container') + + def _get_media_url(self, media_meta): + media_archive_url = media_meta.get('media_archive_url') + if media_archive_url: + return media_archive_url, False + return media_meta['media_url'], True + + +class WASDTVClipIE(WASDTVBaseIE): + IE_NAME = 'wasdtv:clip' + _VALID_URL = r'https?://wasd\.tv/[^/#?]+/clips\?clip=(?P\d+)$' + _TESTS = [{ + 'url': 'https://wasd.tv/spacemita/clips?clip=26804', + 'md5': '818885e720143d7a4e776ff66fcff148', + 'info_dict': { + 'id': '26804', + 'ext': 'mp4', + 'title': 'Пуш флексит на голове стримера', + 'timestamp': 1646682908, + 'upload_date': '20220307', + 'thumbnail': r're:^https?://.+\.jpg', + 'view_count': int, + }, + }] + + def _real_extract(self, url): + clip_id = self._match_id(url) + clip = self._fetch(f'v2/clips/{clip_id}', video_id=clip_id, description='clip') + clip_data = clip.get('clip_data') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(clip_data.get('url'), video_id=clip_id, ext='mp4') + self._sort_formats(formats) + return { + 'id': clip_id, + 'title': clip.get('clip_title') or self._og_search_title(self._download_webpage(url, clip_id, fatal=False)), + 'thumbnails': self._extract_thumbnails(clip_data.get('preview')), + 'timestamp': parse_iso8601(clip.get('created_at')), + 'view_count': int_or_none(clip.get('clip_views_count')), + 'formats': formats, + 'subtitles': subtitles, + } From 8a7f68d0b12d0f4910a15b59a3ec090bbf83b6f2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 29 Mar 2022 03:44:17 +0530 Subject: [PATCH 0873/2552] [ffmpeg] Cache version data Related: https://github.com/dasl-/pifi/issues/9 --- yt_dlp/postprocessor/ffmpeg.py | 40 +++++++++++++++------------------- yt_dlp/utils.py | 4 +++- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index a5ed6d184..0b18e8774 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -86,13 +86,18 @@ class FFmpegPostProcessor(PostProcessor): @staticmethod def get_versions(downloader=None): - return FFmpegPostProcessor.get_version_and_features(downloader)[0] + return FFmpegPostProcessor.get_versions_and_features(downloader)[0] + + _version_cache, _features_cache = {}, {} def _determine_executables(self): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] def get_ffmpeg_version(path, prog): - out = _get_exe_version_output(path, ['-bsfs']) + if path in self._version_cache: + self._versions[path], self._features = self._version_cache[path], self._features_cache.get(path, {}) + return + out = _get_exe_version_output(path, ['-bsfs'], to_screen=self.write_debug) ver = detect_exe_version(out) if out else False if ver: regexs = [ @@ -104,13 +109,13 @@ class FFmpegPostProcessor(PostProcessor): mobj = re.match(regex, ver) if mobj: ver = mobj.group(1) - self._versions[prog] = ver + self._versions[prog] = self._version_cache[path] = ver if prog != 'ffmpeg' or not out: return mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P[0-9. ]+)', out) lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None - self._features = { + self._features = self._features_cache[path] = { 'fdk': '--enable-libfdk-aac' in out, 'setts': 'setts' in out.splitlines(), 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False), @@ -148,26 +153,15 @@ class FFmpegPostProcessor(PostProcessor): self._paths[basename] = location self._versions = {} - for p in programs: - get_ffmpeg_version(self._paths[p], p) - + executables = {'basename': ('ffmpeg', 'avconv'), 'probe_basename': ('ffprobe', 'avprobe')} if prefer_ffmpeg is False: - prefs = ('avconv', 'ffmpeg') - else: - prefs = ('ffmpeg', 'avconv') - for p in prefs: - if self._versions[p]: - self.basename = p - break - - if prefer_ffmpeg is False: - prefs = ('avprobe', 'ffprobe') - else: - prefs = ('ffprobe', 'avprobe') - for p in prefs: - if self._versions[p]: - self.probe_basename = p - break + executables = {k: v[::-1] for k, v in executables.items()} + for var, prefs in executables.items(): + for p in prefs: + get_ffmpeg_version(self._paths[p], p) + if self._versions[p]: + setattr(self, var, p) + break if self.basename == 'avconv': self.deprecation_warning( diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 08e30d18f..62a1800d4 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2713,7 +2713,9 @@ def check_executable(exe, args=[]): return exe -def _get_exe_version_output(exe, args): +def _get_exe_version_output(exe, args, *, to_screen=None): + if to_screen: + to_screen(f'Checking exe version: {shell_quote([exe] + args)}') try: # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. From e7870111e83033e0ac728d5a2d565d1eb146c335 Mon Sep 17 00:00:00 2001 From: David <59258980+zerodytrash@users.noreply.github.com> Date: Tue, 29 Mar 2022 03:05:31 -0700 Subject: [PATCH 0874/2552] [YouTube] Add new age-gate bypass (#3233) Closes #3182 Authored by: zerodytrash, pukkandan --- README.md | 2 +- yt_dlp/extractor/youtube.py | 60 ++++++++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 955a98557..ab729fa4c 100644 --- a/README.md +++ b/README.md @@ -1657,7 +1657,7 @@ The following extractors use this feature: #### youtube * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and auto-translated subtitles respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients. +* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (Eg: `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but tv_embedded and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e5097c264..19b4985f6 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -217,15 +217,35 @@ INNERTUBE_CLIENTS = { } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2 - } + }, + # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) + # See: https://github.com/zerodytrash/YouTube-Internal-Clients + 'tv_embedded': { + 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', + 'clientVersion': '2.0', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 85 + }, } +def _split_innertube_client(client_name): + variant, *base = client_name.rsplit('.', 1) + if base: + return variant, base[0], variant + base, *variant = client_name.split('_', 1) + return client_name, base, variant[0] if variant else None + + def build_innertube_clients(): THIRD_PARTY = { - 'embedUrl': 'https://google.com', # Can be any valid URL + 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL } - BASE_CLIENTS = ('android', 'web', 'ios', 'mweb') + BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb') priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): @@ -234,15 +254,15 @@ def build_innertube_clients(): ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') - base_client, *variant = client.split('_') + _, base_client, variant = _split_innertube_client(client) ytcfg['priority'] = 10 * priority(base_client) if not variant: - INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) - agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' - agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY - agegate_ytcfg['priority'] -= 1 - elif variant == ['embedded']: + INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg) + embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' + embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY + embedscreen['priority'] -= 3 + elif variant == 'embedded': ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY ytcfg['priority'] -= 2 else: @@ -2956,13 +2976,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): prs = [] def append_client(*client_names): - """ Append the first client name that exists """ + """ Append the first client name that exists but not already used """ for client_name in client_names: - if client_name in INNERTUBE_CLIENTS: - if client_name not in all_clients: + actual_client = _split_innertube_client(client_name)[0] + if actual_client in INNERTUBE_CLIENTS: + if actual_client not in all_clients: clients.append(client_name) - all_clients.add(client_name) - return + all_clients.add(actual_client) + return # Android player_response does not have microFormats which are needed for # extraction of some data. So we return the initial_pr with formats @@ -2977,7 +2998,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): tried_iframe_fallback = False player_url = None while clients: - client = clients.pop() + client, base_client, variant = _split_innertube_client(clients.pop()) player_ytcfg = master_ytcfg if client == 'web' else {} if 'configs' not in self._configuration_arg('player_skip'): player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg @@ -3005,10 +3026,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): prs.append(pr) # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in - if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated: - append_client(client.replace('_agegate', '_creator')) + if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: + append_client(f'{base_client}_creator') elif self._is_agegated(pr): - append_client(f'{client}_embedded', f'{client.replace("_embedded", "")}_agegate') + if variant == 'tv_embedded': + append_client(f'{base_client}_embedded') + elif not variant: + append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') if last_error: if not len(prs): From af4944d84b857f285bcf70b33edcbef5ad400a31 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 30 Mar 2022 12:22:36 +0530 Subject: [PATCH 0875/2552] Fix bug in 8a7f68d0b12d0f4910a15b59a3ec090bbf83b6f2 Closes #3241 --- yt_dlp/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 0b18e8774..5216acbfb 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -95,7 +95,7 @@ class FFmpegPostProcessor(PostProcessor): def get_ffmpeg_version(path, prog): if path in self._version_cache: - self._versions[path], self._features = self._version_cache[path], self._features_cache.get(path, {}) + self._versions[prog], self._features = self._version_cache[path], self._features_cache.get(path, {}) return out = _get_exe_version_output(path, ['-bsfs'], to_screen=self.write_debug) ver = detect_exe_version(out) if out else False From 48e15bb6b11ce437d18687e068852a8bf2cf0b6c Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Wed, 30 Mar 2022 17:04:00 +0700 Subject: [PATCH 0876/2552] [dailymotion] Support `geo.dailymotion.com` (#3230) Closes #3229 Authored by: hatienl0i261299 --- yt_dlp/extractor/dailymotion.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index 95589d53a..9cb56185b 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -94,10 +94,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor): _VALID_URL = r'''(?ix) https?:// (?: - (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)| + (?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player\.html\?)?video|swf)| (?:www\.)?lequipe\.fr/video ) - /(?P[^/?_]+)(?:.+?\bplaylist=(?Px[0-9a-z]+))? + [/=](?P[^/?_&]+)(?:.+?\bplaylist=(?Px[0-9a-z]+))? ''' IE_NAME = 'dailymotion' _TESTS = [{ @@ -115,6 +115,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader_id': 'x1xm8ri', 'age_limit': 0, }, + }, { + 'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true', + 'md5': 'e2f9717c6604773f963f069ca53a07f8', + 'info_dict': { + 'id': 'x89eyek', + 'ext': 'mp4', + 'title': "En quête d'esprit du 27/03/2022", + 'description': 'md5:66542b9f4df2eb23f314fc097488e553', + 'duration': 2756, + 'timestamp': 1648383669, + 'upload_date': '20220327', + 'uploader': 'CNEWS', + 'uploader_id': 'x24vth', + 'age_limit': 0, + 'view_count': int, + 'like_count': int, + 'tags': ['en_quete_d_esprit'], + 'thumbnail': 'https://s2.dmcdn.net/v/Tncwi1YGKdvFbDuDY/x1080', + } }, { 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', 'md5': '2137c41a8e78554bb09225b8eb322406', From ab2579bb45ccdb82d40dbb75f48721d97df88270 Mon Sep 17 00:00:00 2001 From: Daniel <61970262+rozari0@users.noreply.github.com> Date: Wed, 30 Mar 2022 16:54:35 +0600 Subject: [PATCH 0877/2552] [xnxx] Add `xnxx3.com` (#3188) Authored by: rozari0 --- yt_dlp/extractor/xnxx.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py index dd4fb54d4..27f991627 100644 --- a/yt_dlp/extractor/xnxx.py +++ b/yt_dlp/extractor/xnxx.py @@ -13,7 +13,7 @@ from ..utils import ( class XNXXIE(InfoExtractor): - _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P[0-9a-z]+)/' + _VALID_URL = r'https?://(?:video|www)\.xnxx3?\.com/video-?(?P[0-9a-z]+)/' _TESTS = [{ 'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', 'md5': '7583e96c15c0f21e9da3453d9920fbba', @@ -32,6 +32,9 @@ class XNXXIE(InfoExtractor): }, { 'url': 'http://www.xnxx.com/video-55awb78/', 'only_matching': True, + }, { + 'url': 'http://www.xnxx3.com/video-55awb78/', + 'only_matching': True, }] def _real_extract(self, url): From 180c81509f6bec740df2957aa3d8aebc4e27b601 Mon Sep 17 00:00:00 2001 From: Felix S Date: Wed, 30 Mar 2022 11:31:25 +0000 Subject: [PATCH 0878/2552] [docs] Add an `.editorconfig` file (#3220) Authored by: fstirlitz --- .editorconfig | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..40c19fa66 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +root = true + +[**.py] +charset = utf-8 +indent_size = 4 +indent_style = space +trim_trailing_whitespace = true +insert_final_newline = true From 5d0aeac0e9137e0cc038d44f00d19f1f9181c883 Mon Sep 17 00:00:00 2001 From: MrRawes Date: Wed, 30 Mar 2022 12:35:06 +0100 Subject: [PATCH 0879/2552] [docs] Clarify the exact `BSD` license of dependencies (#3197) Authored by: MrRawes --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ab729fa4c..a75441e35 100644 --- a/README.md +++ b/README.md @@ -265,15 +265,15 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) * [**mutagen**](https://github.com/quodlibet/mutagen)\* - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) -* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) -* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE) +* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen/ffmpeg cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) -* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) +* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) * [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * Any external downloader that you want to use with `--downloader` From 11078c6d571673a0f09e21933f4ad1e6fcc35456 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 30 Mar 2022 18:19:22 +0530 Subject: [PATCH 0880/2552] [crunhyroll] Fix inheritance https://github.com/yt-dlp/yt-dlp/pull/2955#issuecomment-1083060465 --- yt_dlp/extractor/crunchyroll.py | 4 +-- yt_dlp/extractor/vrv.py | 52 ++++++++++++++++++--------------- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index bf1bf8c1c..bb4ae12f5 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -9,7 +9,7 @@ import zlib from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor -from .vrv import VRVIE +from .vrv import VRVBaseIE from ..compat import ( compat_b64decode, compat_etree_Element, @@ -100,7 +100,7 @@ class CrunchyrollBaseIE(InfoExtractor): parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) -class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): +class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE): IE_NAME = 'crunchyroll' _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' _TESTS = [{ diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 10e6be7ed..00e1006c4 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -85,7 +85,30 @@ class VRVBaseIE(InfoExtractor): 'resource_key': resource_key, })['__links__']['cms_resource']['href'] - def _initialize_pre_login(self): + def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): + if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'): + return [] + format_id = join_nonempty( + stream_format, + audio_lang and 'audio-%s' % audio_lang, + hardsub_lang and 'hardsub-%s' % hardsub_lang) + if 'hls' in stream_format: + adaptive_formats = self._extract_m3u8_formats( + url, video_id, 'mp4', m3u8_id=format_id, + note='Downloading %s information' % format_id, + fatal=False) + elif stream_format == 'dash': + adaptive_formats = self._extract_mpd_formats( + url, video_id, mpd_id=format_id, + note='Downloading %s information' % format_id, + fatal=False) + if audio_lang: + for f in adaptive_formats: + if f.get('acodec') != 'none': + f['language'] = audio_lang + return adaptive_formats + + def _set_api_params(self): webpage = self._download_webpage( 'https://vrv.co/', None, headers=self.geo_verification_headers()) self._API_PARAMS = self._parse_json(self._search_regex( @@ -133,28 +156,8 @@ class VRVIE(VRVBaseIE): self._TOKEN = token_credentials['oauth_token'] self._TOKEN_SECRET = token_credentials['oauth_token_secret'] - def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): - if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'): - return [] - format_id = join_nonempty( - stream_format, - audio_lang and 'audio-%s' % audio_lang, - hardsub_lang and 'hardsub-%s' % hardsub_lang) - if 'hls' in stream_format: - adaptive_formats = self._extract_m3u8_formats( - url, video_id, 'mp4', m3u8_id=format_id, - note='Downloading %s information' % format_id, - fatal=False) - elif stream_format == 'dash': - adaptive_formats = self._extract_mpd_formats( - url, video_id, mpd_id=format_id, - note='Downloading %s information' % format_id, - fatal=False) - if audio_lang: - for f in adaptive_formats: - if f.get('acodec') != 'none': - f['language'] = audio_lang - return adaptive_formats + def _initialize_pre_login(self): + return self._set_api_params() def _real_extract(self, url): video_id = self._match_id(url) @@ -249,6 +252,9 @@ class VRVSeriesIE(VRVBaseIE): 'playlist_mincount': 11, } + def _initialize_pre_login(self): + return self._set_api_params() + def _real_extract(self, url): series_id = self._match_id(url) From c418e6b5a6aa483b801c29cf5ada4263e33a9a3e Mon Sep 17 00:00:00 2001 From: zackmark29 <62680932+zackmark29@users.noreply.github.com> Date: Thu, 31 Mar 2022 10:47:58 +0800 Subject: [PATCH 0881/2552] [viu] Fix bypass for preview (#3247) Authored by: zackmark29 --- yt_dlp/extractor/viu.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index b0a1fca68..ba627ca5b 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -329,7 +329,8 @@ class ViuOTTIE(InfoExtractor): if token is not None: query['identity'] = token else: - # preview is limited to 3min for non-members. But we can try to bypass it + # The content is Preview or for VIP only. + # We can try to bypass the duration which is limited to 3mins only duration_limit, query['duration'] = True, '180' try: stream_data = download_playback() @@ -346,13 +347,13 @@ class ViuOTTIE(InfoExtractor): # bypass preview duration limit if duration_limit: - stream_url = urllib.parse.urlparse(stream_url) + old_stream_url = urllib.parse.urlparse(stream_url) + query = dict(urllib.parse.parse_qsl(old_stream_url.query, keep_blank_values=True)) query.update({ 'duration': video_data.get('time_duration') or '9999999', 'duration_start': '0', }) - stream_url = stream_url._replace(query=urllib.parse.urlencode(dict( - urllib.parse.parse_qsl(stream_url.query, keep_blank_values=True)))).geturl() + stream_url = old_stream_url._replace(query=urllib.parse.urlencode(query)).geturl() formats.append({ 'format_id': vid_format, From bb5a7cb8ad9274c7388a54ef6a6ceae24dd892cc Mon Sep 17 00:00:00 2001 From: Bricio <216170+Bricio@users.noreply.github.com> Date: Thu, 31 Mar 2022 00:04:55 -0300 Subject: [PATCH 0882/2552] [Craftsy] Add extractor (#3208) Authored by: Bricio --- yt_dlp/extractor/craftsy.py | 71 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 72 insertions(+) create mode 100644 yt_dlp/extractor/craftsy.py diff --git a/yt_dlp/extractor/craftsy.py b/yt_dlp/extractor/craftsy.py new file mode 100644 index 000000000..ed2f4420e --- /dev/null +++ b/yt_dlp/extractor/craftsy.py @@ -0,0 +1,71 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor + +from ..utils import ( + dict_get, + get_element_by_id, + js_to_json, + traverse_obj, +) + + +class CraftsyIE(InfoExtractor): + _VALID_URL = r'https?://www.craftsy.com/class/(?P[a-z0-9_-]+)/' + _TESTS = [{ + 'url': 'https://www.craftsy.com/class/the-midnight-quilt-show-season-5/', + 'info_dict': { + 'id': 'the-midnight-quilt-show-season-5', + 'title': 'The Midnight Quilt Show Season 5', + 'description': 'md5:113eda818e985d1a566625fb2f833b7a', + }, + 'playlist_count': 10, + }, { + 'url': 'https://www.craftsy.com/class/sew-your-own-designer-handbag/', + 'info_dict': { + 'id': 'sew-your-own-designer-handbag', + 'title': 'Sew Your Own Designer Handbag', + 'description': 'md5:8270d0ef5427d3c895a27351aeaac276', + }, + 'playlist_mincount': 1, + }, { + 'url': 'https://www.craftsy.com/class/all-access-estes-park-wool-market/', + 'info_dict': { + 'id': 'all-access-estes-park-wool-market', + 'title': 'All Access: Estes Park Wool Market', + 'description': 'md5:aded1bd8d38ae2fae4dae936c0ae01e7', + }, + 'playlist_count': 6, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_data = self._parse_json(self._search_regex( + r'class_video_player_vars\s*=\s*({.*})\s*;', + get_element_by_id('vidstore-classes_class-video-player-js-extra', webpage), + 'video data'), video_id, transform_source=js_to_json) + + account_id = traverse_obj(video_data, ('video_player', 'bc_account_id')) + + entries = [] + class_preview = traverse_obj(video_data, ('video_player', 'class_preview')) + if class_preview: + v_id = class_preview.get('video_id') + entries.append(self.url_result( + f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={v_id}', + BrightcoveNewIE, v_id, class_preview.get('title'))) + + if dict_get(video_data, ('is_free', 'user_has_access')): + entries += [ + self.url_result( + f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={lesson["video_id"]}', + BrightcoveNewIE, lesson['video_id'], lesson.get('title')) + for lesson in video_data['lessons']] + + return self.playlist_result( + entries, video_id, video_data.get('class_title'), + self._html_search_meta(('og:description', 'description'), webpage, default=None)) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index e5ae12a7d..52279b985 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -322,6 +322,7 @@ from .cpac import ( from .cozytv import CozyTVIE from .cracked import CrackedIE from .crackle import CrackleIE +from .craftsy import CraftsyIE from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( CrowdBunkerIE, From 504f789ad55f0581681171abc428c3094057cae1 Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Thu, 31 Mar 2022 05:23:32 +0200 Subject: [PATCH 0883/2552] [AZMedien] Support `tv.telezueri.ch` (#3251) Authored by: goggle --- yt_dlp/extractor/azmedien.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index b3cabbf94..0168340b9 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -11,7 +11,7 @@ class AZMedienIE(InfoExtractor): IE_DESC = 'AZ Medien videos' _VALID_URL = r'''(?x) https?:// - (?:www\.)? + (?:www\.|tv\.)? (?P telezueri\.ch| telebaern\.tv| @@ -31,7 +31,7 @@ class AZMedienIE(InfoExtractor): ''' _TESTS = [{ - 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569', + 'url': 'https://tv.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569', 'info_dict': { 'id': '1_anruz3wy', 'ext': 'mp4', @@ -39,6 +39,9 @@ class AZMedienIE(InfoExtractor): 'uploader_id': 'TVOnline', 'upload_date': '20180930', 'timestamp': 1538328802, + 'view_count': int, + 'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031', + 'duration': 1930 }, 'params': { 'skip_download': True, From f189faf1ce0e8c0d81bc7ec841718fe03b74ca34 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 31 Mar 2022 13:30:07 +0530 Subject: [PATCH 0884/2552] [BRMediathek] Fix VALID_URL Closes #2466 --- yt_dlp/extractor/br.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 7169eceb6..0155827d8 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -175,7 +175,7 @@ class BRIE(InfoExtractor): class BRMediathekIE(InfoExtractor): IE_DESC = 'Bayerischer Rundfunk Mediathek' - _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?Pav:[0-9a-f]{24})' + _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek//?video/(?:[^/?&#]+?-)?(?Pav:[0-9a-f]{24})' _TESTS = [{ 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e', @@ -188,6 +188,9 @@ class BRMediathekIE(InfoExtractor): 'timestamp': 1511942766, 'upload_date': '20171129', } + }, { + 'url': 'https://www.br.de/mediathek//video/av:61b0db581aed360007558c12', + 'only_matching': True, }] def _real_extract(self, url): From c4f60dd7cdbf5282a8d1a8fa8dd4f6fd60acc034 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 31 Mar 2022 13:19:16 +0530 Subject: [PATCH 0885/2552] [utils] Add `try_call` --- CONTRIBUTING.md | 2 +- yt_dlp/downloader/http.py | 6 +++--- yt_dlp/extractor/mediasite.py | 11 ++++++----- yt_dlp/extractor/whowatch.py | 9 +++++---- yt_dlp/utils.py | 16 ++++++++++------ 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dbd6a84b2..1897f73e0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -643,7 +643,7 @@ Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_ Use `url_or_none` for safe URL processing. -Use `try_get`, `dict_get` and `traverse_obj` for safe metadata extraction from parsed JSON. +Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON. Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 8e096b76b..cabf401a7 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -18,7 +18,7 @@ from ..utils import ( parse_http_range, sanitized_Request, ThrottledDownload, - try_get, + try_call, write_xattr, XAttrMetadataError, XAttrUnavailableError, @@ -120,12 +120,12 @@ class HttpFD(FileDownloader): else: range_end = None - if try_get(None, lambda _: range_start > range_end): + if try_call(lambda: range_start > range_end): ctx.resume_len = 0 ctx.open_mode = 'wb' raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})')) - if try_get(None, lambda _: range_end >= ctx.content_len): + if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 request = sanitized_Request(url, request_data, headers) diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index ace86c2fd..fbf9223b2 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -14,6 +14,7 @@ from ..utils import ( float_or_none, mimetype2ext, str_or_none, + try_call, try_get, unescapeHTML, unsmuggle_url, @@ -145,11 +146,11 @@ class MediasiteIE(InfoExtractor): 'duration': slide['Time'] / 1000, }) - next_time = try_get(None, [ - lambda _: Stream['Slides'][i + 1]['Time'], - lambda _: duration, - lambda _: slide['Time'], - ], expected_type=(int, float)) + next_time = try_call( + lambda: Stream['Slides'][i + 1]['Time'], + lambda: duration, + lambda: slide['Time'], + expected_type=(int, float)) fragments.append({ 'path': fname_template.format(slide.get('Number', i + 1)), diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py index f8bc2e73a..e4b610d00 100644 --- a/yt_dlp/extractor/whowatch.py +++ b/yt_dlp/extractor/whowatch.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, qualities, + try_call, try_get, ExtractorError, ) @@ -26,10 +27,10 @@ class WhoWatchIE(InfoExtractor): metadata = self._download_json('https://api.whowatch.tv/lives/%s' % video_id, video_id) live_data = self._download_json('https://api.whowatch.tv/lives/%s/play' % video_id, video_id) - title = try_get(None, ( - lambda x: live_data['share_info']['live_title'][1:-1], - lambda x: metadata['live']['title'], - ), compat_str) + title = try_call( + lambda: live_data['share_info']['live_title'][1:-1], + lambda: metadata['live']['title'], + expected_type=str) hls_url = live_data.get('hls_url') if not hls_url: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 62a1800d4..22062f85f 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3096,15 +3096,19 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True): return d.get(key_or_keys, default) -def try_get(src, getter, expected_type=None): - for get in variadic(getter): +def try_call(*funcs, expected_type=None, args=[], kwargs={}): + for f in funcs: try: - v = get(src) - except (AttributeError, KeyError, TypeError, IndexError): + val = f(*args, **kwargs) + except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError): pass else: - if expected_type is None or isinstance(v, expected_type): - return v + if expected_type is None or isinstance(val, expected_type): + return val + + +def try_get(src, getter, expected_type=None): + return try_call(*variadic(getter), args=(src,), expected_type=expected_type) def filter_dict(dct, cndn=lambda _, v: v is not None): From e6f868a63c15f576152733a1508f474b5e5bd1ef Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 31 Mar 2022 13:25:50 +0530 Subject: [PATCH 0886/2552] [utils] `traverse_obj`: Allow filtering by value --- yt_dlp/extractor/funimation.py | 2 +- yt_dlp/extractor/iqiyi.py | 4 ++-- yt_dlp/utils.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 36a9c4772..6aa9bc9ce 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -333,7 +333,7 @@ class FunimationShowIE(FunimationBaseIE): 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s' % show_info.get('id'), display_id) - vod_items = traverse_obj(items_info, ('items', ..., re.compile('(?i)mostRecent[AS]vod').match, 'item')) + vod_items = traverse_obj(items_info, ('items', ..., lambda k, _: re.match(r'(?i)mostRecent[AS]vod', k), 'item')) return { '_type': 'playlist', diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index dc4667744..14877d405 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -626,8 +626,8 @@ class IqIE(InfoExtractor): note=f'Downloading format data for {self._BID_TAGS[bid]}', errnote='Unable to download format data', fatal=False), 'data', expected_type=dict) - video_format = next((video_format for video_format in traverse_obj( - format_data, ('program', 'video', ...), expected_type=dict, default=[]) if str(video_format['bid']) == bid), {}) + video_format = traverse_obj(format_data, ('program', 'video', lambda _, v: str(v['bid']) == bid), + expected_type=dict, default=[], get_all=False) or {} extracted_formats = [] if video_format.get('m3u8Url'): extracted_formats.extend(self._extract_m3u8_formats( diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 22062f85f..a2fa29afe 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5152,8 +5152,8 @@ def traverse_obj( @param path_list A list of paths which are checked one by one. Each path is a list of keys where each key is a string, a function, a tuple of strings/None or "...". - When a fuction is given, it takes the key as argument and - returns whether the key matches or not. When a tuple is given, + When a fuction is given, it takes the key and value as arguments + and returns whether the key matches or not. When a tuple is given, all the keys given in the tuple are traversed, and "..." traverses all the keys in the object "None" returns the object without traversal @@ -5198,7 +5198,7 @@ def traverse_obj( obj = str(obj) _current_depth += 1 depth = max(depth, _current_depth) - return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)] + return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))] elif isinstance(obj, dict) and not (is_user_input and key == ':'): obj = (obj.get(key) if casesense or (key in obj) else next((v for k, v in obj.items() if _lower(k) == key), None)) From 5d45484cc762861f8fe59fa42d499db5a284c2c7 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Fri, 1 Apr 2022 19:31:58 +0900 Subject: [PATCH 0887/2552] [niconico] Fix extraction of thumbnails and uploader (#3266) --- yt_dlp/extractor/niconico.py | 18 ++++++++++++++---- yt_dlp/utils.py | 7 +++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 74828f833..a5a1a01e0 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -25,7 +25,10 @@ from ..utils import ( parse_duration, parse_filesize, parse_iso8601, + parse_resolution, + qualities, remove_start, + str_or_none, traverse_obj, try_get, unescapeHTML, @@ -430,18 +433,25 @@ class NiconicoIE(InfoExtractor): # find in json (logged in) tags = traverse_obj(api_data, ('tag', 'items', ..., 'name')) + thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp']) + return { 'id': video_id, '_api_data': api_data, 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None), 'formats': formats, - 'thumbnail': get_video_info('thumbnail', 'url') or self._html_search_meta( - ('image', 'og:image'), webpage, 'thumbnail', default=None), + 'thumbnails': [{ + 'id': key, + 'url': url, + 'ext': 'jpg', + 'preference': thumb_prefs(key), + **parse_resolution(url, lenient=True), + } for key, url in (get_video_info('thumbnail') or {}).items() if url], 'description': clean_html(get_video_info('description')), - 'uploader': traverse_obj(api_data, ('owner', 'nickname')), + 'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')), + 'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))), 'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601( self._html_search_meta('video:release_date', webpage, 'date published', default=None)), - 'uploader_id': traverse_obj(api_data, ('owner', 'id')), 'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')), 'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')), 'view_count': int_or_none(get_video_info('count', 'view')), diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a2fa29afe..ce918750d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2418,11 +2418,14 @@ def parse_count(s): return str_to_int(mobj.group(1)) -def parse_resolution(s): +def parse_resolution(s, *, lenient=False): if s is None: return {} - mobj = re.search(r'(?\d+)\s*[xX×,]\s*(?P\d+)(?![a-zA-Z0-9])', s) + if lenient: + mobj = re.search(r'(?P\d+)\s*[xX×,]\s*(?P\d+)', s) + else: + mobj = re.search(r'(?\d+)\s*[xX×,]\s*(?P\d+)(?![a-zA-Z0-9])', s) if mobj: return { 'width': int(mobj.group('w')), From 4c268f9cb75edd0ca7b2e3737cfa5abd21ee653d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 2 Apr 2022 11:20:31 +0530 Subject: [PATCH 0888/2552] [Nebula] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0 --- yt_dlp/extractor/nebula.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index b77ef5f28..77f253519 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -86,7 +86,7 @@ class NebulaBaseIE(InfoExtractor): # if 401 or 403, attempt credential re-auth and retry if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403): self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}') - self._login() + self._perform_login() return inner_call() else: raise From c085e4ec475eb17343d228d2749c8e2a1d998edf Mon Sep 17 00:00:00 2001 From: nixxo Date: Sat, 2 Apr 2022 07:57:56 +0200 Subject: [PATCH 0889/2552] [rai] Fix extraction of http formats (#3272) Closes #3270 Authored by: nixxo --- yt_dlp/extractor/rai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 9d243b2be..6864129c6 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -118,7 +118,7 @@ class RaiBaseIE(InfoExtractor): }) def _create_http_urls(self, relinker_url, fmts): - _RELINKER_REG = r'https?://(?P[^/]+?)/(?:i/)?(?P[^/]+?)/(?P.+?)/(?P\d+)(?:_(?P[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?' + _RELINKER_REG = r'https?://(?P[^/]+?)/(?:i/)?(?P[^/]+?)/(?P.+?)/(?P\w+)(?:_(?P[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?' _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' _QUALITY = { # tbr: w, h From c8e856a551730c289d9ef8f0674620753de6c5be Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 2 Apr 2022 19:07:13 +1300 Subject: [PATCH 0890/2552] [web.archive:youtube] Make CDX API requests non-fatal Partial fix for https://github.com/yt-dlp/yt-dlp/issues/3278 Authored-by: coletdjnz --- yt_dlp/extractor/archiveorg.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 2a25c0713..b06ac74ae 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -457,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor): _OLDEST_CAPTURE_DATE = 20050214000000 _NEWEST_CAPTURE_DATE = 20500101000000 - def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note='Downloading CDX API JSON'): + def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False): # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md query = { 'url': url, @@ -468,7 +468,9 @@ class YoutubeWebArchiveIE(InfoExtractor): 'collapse': collapse or [], **(query or {}) } - res = self._download_json('https://web.archive.org/cdx/search/cdx', item_id, note, query=query) + res = self._download_json( + 'https://web.archive.org/cdx/search/cdx', item_id, + note or 'Downloading CDX API JSON', query=query, fatal=fatal) if isinstance(res, list) and len(res) >= 2: # format response to make it easier to use return list(dict(zip(res[0], v)) for v in res[1:]) From ad210f4fd460574436dc65d3c3cee041c905c46f Mon Sep 17 00:00:00 2001 From: coletdev Date: Sat, 2 Apr 2022 19:11:14 +1300 Subject: [PATCH 0891/2552] [youtube:search] Support hashtag entries (#3265) Authored-by: coletdjnz --- yt_dlp/extractor/youtube.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 19b4985f6..4e6a80911 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3903,6 +3903,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if video_id: return self._extract_video(video_renderer) + def _hashtag_tile_entry(self, hashtag_tile_renderer): + url = urljoin('https://youtube.com', traverse_obj( + hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url'))) + if url: + return self.url_result( + url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag')) + def _post_thread_entries(self, post_thread_renderer): post_renderer = try_get( post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict) @@ -3991,6 +3998,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'videoRenderer': lambda x: [self._video_entry(x)], 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}), 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}), + 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)] } for key, renderer in isr_content.items(): if key not in known_renderers: @@ -5520,7 +5528,17 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'python', 'title': 'python', } - + }, { + 'url': 'https://www.youtube.com/results?search_query=%23cats', + 'playlist_mincount': 1, + 'info_dict': { + 'id': '#cats', + 'title': '#cats', + 'entries': [{ + 'url': r're:https://(www\.)?youtube\.com/hashtag/cats', + 'title': '#cats', + }], + }, }, { 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', 'only_matching': True, From a17526e427fffcd38064a4657de4fa59cf5a9953 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 3 Apr 2022 19:01:03 +0530 Subject: [PATCH 0892/2552] [youtube:tab] Minor improvements (See desc) * Support shorts on channel homepage * Extract thumbnail of OLAK playlists --- yt_dlp/extractor/youtube.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4e6a80911..485849ba9 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3777,7 +3777,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): def _extract_basic_item_renderer(item): # Modified from _extract_grid_item_renderer known_basic_renderers = ( - 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer' + 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer' ) for key, renderer in item.items(): if not isinstance(renderer, dict): @@ -3992,7 +3992,8 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): known_renderers = { 'playlistVideoListRenderer': self._playlist_entries, 'gridRenderer': self._grid_entries, - 'shelfRenderer': lambda x: self._shelf_entries(x), + 'reelShelfRenderer': self._grid_entries, + 'shelfRenderer': self._shelf_entries, 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)], 'backstagePostThreadRenderer': self._post_thread_entries, 'videoRenderer': lambda x: [self._video_entry(x)], @@ -4170,7 +4171,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): }) primary_thumbnails = self._extract_thumbnails( - primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')) + primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail')) if playlist_id is None: playlist_id = item_id From dc57e74a7fb9418ec403ff461eab3a47a350d7a3 Mon Sep 17 00:00:00 2001 From: aarubui Date: Sun, 3 Apr 2022 23:53:22 +1000 Subject: [PATCH 0893/2552] [tenplay] Improve extractor (#3280) Authored by: aarubui --- yt_dlp/extractor/tenplay.py | 42 ++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index 5b3222ecf..5c7b54531 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -7,6 +7,7 @@ import base64 from .common import InfoExtractor from ..utils import ( HEADRequest, + int_or_none, urlencode_postdata, ) @@ -15,6 +16,28 @@ class TenPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?Ptpv\d{6}[a-z]{5})' _NETRC_MACHINE = '10play' _TESTS = [{ + 'url': 'https://10play.com.au/neighbours/web-extras/season-39/nathan-borg-is-the-first-aussie-actor-with-a-cochlear-implant-to-join-neighbours/tpv210128qupwd', + 'info_dict': { + 'id': '6226844312001', + 'ext': 'mp4', + 'title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours', + 'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours', + 'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43', + 'duration': 186, + 'season': 39, + 'series': 'Neighbours', + 'thumbnail': r're:https://.*\.jpg', + 'uploader': 'Channel 10', + 'age_limit': 15, + 'timestamp': 1611810000, + 'upload_date': '20210128', + 'uploader_id': '2199827728001', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Only available in Australia', + }, { 'url': 'https://10play.com.au/todd-sampsons-body-hack/episodes/season-4/episode-7/tpv200921kvngh', 'info_dict': { 'id': '6192880312001', @@ -62,12 +85,17 @@ class TenPlayIE(InfoExtractor): def _real_extract(self, url): content_id = self._match_id(url) - _token = self._get_bearer_token(content_id) data = self._download_json( 'https://10play.com.au/api/v1/videos/' + content_id, content_id) + headers = {} + + if data.get('memberGated') is True: + _token = self._get_bearer_token(content_id) + headers = {'Authorization': _token} + _video_url = self._download_json( data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON', - headers={'Authorization': _token}).get('source') + headers=headers).get('source') m3u8_url = self._request_webpage(HEADRequest( _video_url), content_id).geturl() if '10play-not-in-oz' in m3u8_url: @@ -77,12 +105,16 @@ class TenPlayIE(InfoExtractor): return { 'formats': formats, + 'subtitles': {'en': [{'url': data.get('captionUrl')}]} if data.get('captionUrl') else None, 'id': data.get('altId') or content_id, - 'title': data.get('title'), + 'duration': data.get('duration'), + 'title': data.get('subtitle'), + 'alt_title': data.get('title'), 'description': data.get('description'), 'age_limit': self._AUS_AGES.get(data.get('classification')), - 'series': data.get('showName'), - 'season': data.get('showContentSeason'), + 'series': data.get('tvShow'), + 'season': int_or_none(data.get('season')), + 'episode_number': int_or_none(data.get('episode')), 'timestamp': data.get('published'), 'thumbnail': data.get('imageUrl'), 'uploader': 'Channel 10', From fbfde1c3e6b59c5ff94e2604f1502acdeb14f8f0 Mon Sep 17 00:00:00 2001 From: Fam0r Date: Sun, 3 Apr 2022 18:11:50 +0300 Subject: [PATCH 0894/2552] [elonet] Rewrite extractor (#3277) Closes #2911 Authored by: Fam0r, pukkandan --- yt_dlp/extractor/common.py | 4 +- yt_dlp/extractor/elonet.py | 85 ++++++++++++++------------------------ 2 files changed, 34 insertions(+), 55 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d0e57da23..af964c527 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1297,8 +1297,8 @@ class InfoExtractor(object): @staticmethod def _og_regexes(prop): content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))' - property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)' - % {'prop': re.escape(prop)}) + property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)' + % {'prop': re.escape(prop), 'sep': '(?::|[:-])'}) template = r']+?%s[^>]+?%s' return [ template % (property_re, content_re), diff --git a/yt_dlp/extractor/elonet.py b/yt_dlp/extractor/elonet.py index eefba4e24..9c6aea28e 100644 --- a/yt_dlp/extractor/elonet.py +++ b/yt_dlp/extractor/elonet.py @@ -1,30 +1,22 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - base_url, - ExtractorError, - try_get, -) -from ..compat import compat_str +from ..utils import determine_ext class ElonetIE(InfoExtractor): _VALID_URL = r'https?://elonet\.finna\.fi/Record/kavi\.elonet_elokuva_(?P[0-9]+)' _TESTS = [{ - # m3u8 with subtitles 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_107867', - 'md5': '8efc954b96c543711707f87de757caea', 'info_dict': { 'id': '107867', 'ext': 'mp4', 'title': 'Valkoinen peura', - 'description': 'Valkoinen peura (1952) on Erik Blombergin ohjaama ja yhdessä Mirjami Kuosmasen kanssa käsikirjoittama tarunomainen kertomus valkoisen peuran hahmossa lii...', - 'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_107867&index=0&size=large', + 'thumbnail': r're:^https?://elonet\.finna\.fi/Cover/Show\?id=kavi\.elonet_elokuva_107867.+', + 'description': 'md5:bded4201c9677fab10854884fe8f7312', }, + 'params': {'skip_download': 'dash'}, }, { # DASH with subtitles 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_116539', @@ -32,58 +24,45 @@ class ElonetIE(InfoExtractor): 'id': '116539', 'ext': 'mp4', 'title': 'Minulla on tiikeri', - 'description': 'Pienellä pojalla, joka asuu kerrostalossa, on kotieläimenä tiikeri. Se on kuitenkin salaisuus. Kerrostalon räpätäti on Kotilaisen täti, joka on aina vali...', - 'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_116539&index=0&size=large&source=Solr', - } + 'thumbnail': r're:^https?://elonet\.finna\.fi/Cover/Show\?id=kavi\.elonet_elokuva_116539.+', + 'description': 'md5:5ab72b3fe76d3414e46cc8f277104419', + }, + 'params': {'skip_download': 'dash'}, + }, { + # Page with multiple videos, download the main one + 'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_117396', + 'info_dict': { + 'id': '117396', + 'ext': 'mp4', + 'title': 'Sampo', + 'thumbnail': r're:^https?://elonet\.finna\.fi/Cover/Show\?id=kavi\.elonet_elokuva_117396.+', + 'description': 'md5:ec69572a5b054d0ecafe8086b1fa96f7', + }, + 'params': {'skip_download': 'dash'}, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r']+data-video-sources="([^"]+)"', webpage, 'json'), video_id)[0]['src'] + ext = determine_ext(src) - json_s = self._html_search_regex( - r'data-video-sources="(.+?)"', webpage, 'json') - src = try_get( - self._parse_json(json_s, video_id), - lambda x: x[0]["src"], compat_str) - formats = [] - subtitles = {} - if re.search(r'\.m3u8\??', src): - res = self._download_webpage_handle( - # elonet servers have certificate problems - src.replace('https:', 'http:'), video_id, - note='Downloading m3u8 information', - errnote='Failed to download m3u8 information') - if res: - doc, urlh = res - url = urlh.geturl() - formats, subtitles = self._parse_m3u8_formats_and_subtitles(doc, url) - for f in formats: - f['ext'] = 'mp4' - elif re.search(r'\.mpd\??', src): - res = self._download_xml_handle( - src, video_id, - note='Downloading MPD manifest', - errnote='Failed to download MPD manifest') - if res: - doc, urlh = res - url = base_url(urlh.geturl()) - formats, subtitles = self._parse_mpd_formats_and_subtitles(doc, mpd_base_url=url) + if ext == 'm3u8': + formats, subtitles = self._extract_m3u8_formats_and_subtitles(src, video_id, fatal=False) + elif ext == 'mpd': + formats, subtitles = self._extract_mpd_formats_and_subtitles(src, video_id, fatal=False) else: - raise ExtractorError("Unknown streaming format") + formats, subtitles = [], {} + self.raise_no_formats(f'Unknown streaming format {ext}') + self._sort_formats(formats) return { 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'subtitles': subtitles, } From 265e586d96bae2eb86a4f702ee2caef3b0cd78c3 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Mon, 4 Apr 2022 00:41:14 +0900 Subject: [PATCH 0895/2552] [openrec] Download archived livestreams (#3267) Authored by: Lesmiscore --- yt_dlp/extractor/openrec.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index b476c0986..5eb1cdbad 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -7,6 +7,7 @@ from ..utils import ( get_first, int_or_none, traverse_obj, + try_get, unified_strdate, unified_timestamp, ) @@ -18,6 +19,13 @@ class OpenRecBaseIE(InfoExtractor): return self._parse_json( self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id) + def _expand_media(self, video_id, media): + for name, m3u8_url in (media or {}).items(): + if not m3u8_url: + continue + yield from self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', m3u8_id=name) + def _extract_movie(self, webpage, video_id, name, is_live): window_stores = self._extract_pagestore(webpage, video_id) movie_stores = [ @@ -29,13 +37,21 @@ class OpenRecBaseIE(InfoExtractor): if not any(movie_stores): raise ExtractorError(f'Failed to extract {name} info') - m3u8_playlists = get_first(movie_stores, 'media') or {} - formats = [] - for name, m3u8_url in m3u8_playlists.items(): - if not m3u8_url: - continue - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', live=is_live, m3u8_id=name)) + formats = list(self._expand_media(video_id, get_first(movie_stores, 'media'))) + if not formats and is_live: + # archived livestreams + cookies = self._get_cookies('https://www.openrec.tv/') + detail = self._download_json( + f'https://apiv5.openrec.tv/api/v5/movies/{video_id}/detail', video_id, + headers={ + 'Origin': 'https://www.openrec.tv', + 'Referer': 'https://www.openrec.tv/', + 'access-token': try_get(cookies, lambda x: x.get('access_token').value), + 'uuid': try_get(cookies, lambda x: x.get('uuid').value), + }) + new_media = traverse_obj(detail, ('data', 'items', ..., 'media'), get_all=False) + formats = list(self._expand_media(video_id, new_media)) + is_live = False self._sort_formats(formats) From 12e022d074c2e5b240788a61452e5536fa51c151 Mon Sep 17 00:00:00 2001 From: Tim Schindler Date: Mon, 4 Apr 2022 09:20:14 +0200 Subject: [PATCH 0896/2552] [Cybrary] Add extractor (#3264) Authored by: aaearon --- yt_dlp/extractor/cybrary.py | 146 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 4 + 2 files changed, 150 insertions(+) create mode 100644 yt_dlp/extractor/cybrary.py diff --git a/yt_dlp/extractor/cybrary.py b/yt_dlp/extractor/cybrary.py new file mode 100644 index 000000000..c278f0fe0 --- /dev/null +++ b/yt_dlp/extractor/cybrary.py @@ -0,0 +1,146 @@ +# coding: utf-8 +from .common import InfoExtractor + +from ..utils import ( + ExtractorError, + smuggle_url, + str_or_none, + traverse_obj, + urlencode_postdata +) + + +class CybraryBaseIE(InfoExtractor): + _API_KEY = 'AIzaSyCX9ru6j70PX2My1Eq6Q1zoMAhuTdXlzSw' + _ENDPOINTS = { + 'course': 'https://app.cybrary.it/courses/api/catalog/browse/course/{}', + 'course_enrollment': 'https://app.cybrary.it/courses/api/catalog/{}/enrollment', + 'enrollment': 'https://app.cybrary.it/courses/api/enrollment/{}', + 'launch': 'https://app.cybrary.it/courses/api/catalog/{}/launch', + 'vimeo_oembed': 'https://vimeo.com/api/oembed.json?url=https://vimeo.com/{}', + } + _NETRC_MACHINE = 'cybrary' + _TOKEN = None + + def _perform_login(self, username, password): + CybraryBaseIE._TOKEN = self._download_json( + f'https://identitytoolkit.googleapis.com/v1/accounts:signInWithPassword?key={self._API_KEY}', + None, data=urlencode_postdata({'email': username, 'password': password, 'returnSecureToken': True}), + note='Logging in')['idToken'] + + def _real_initialize(self): + if not self._TOKEN: + self.raise_login_required(method='password') + + def _call_api(self, endpoint, item_id): + return self._download_json( + self._ENDPOINTS[endpoint].format(item_id), item_id, + note=f'Downloading {endpoint} JSON metadata', + headers={'Authorization': f'Bearer {self._TOKEN}'}) + + def _get_vimeo_id(self, activity_id): + launch_api = self._call_api('launch', activity_id) + + if launch_api.get('url'): + return self._search_regex(r'https?://player\.vimeo\.com/video/(?P[0-9]+)', launch_api['url'], 'vimeo_id') + return traverse_obj(launch_api, ('vendor_data', 'content', ..., 'videoId'), get_all=False) + + +class CybraryIE(CybraryBaseIE): + _VALID_URL = r'https?://app.cybrary.it/immersive/(?P[0-9]+)/activity/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://app.cybrary.it/immersive/12487950/activity/63102', + 'md5': '9ae12d37e555cb2ed554223a71a701d0', + 'info_dict': { + 'id': '646609770', + 'ext': 'mp4', + 'title': 'Getting Started', + 'thumbnail': 'https://i.vimeocdn.com/video/1301817996-76a268f0c56cff18a5cecbbdc44131eb9dda0c80eb0b3a036_1280', + 'series_id': '63111', + 'uploader_url': 'https://vimeo.com/user30867300', + 'duration': 88, + 'uploader_id': 'user30867300', + 'series': 'Cybrary Orientation', + 'uploader': 'Cybrary', + 'chapter': 'Cybrary Orientation Series', + 'chapter_id': '63110' + }, + 'expected_warnings': ['No authenticators for vimeo'] + }, { + 'url': 'https://app.cybrary.it/immersive/12747143/activity/52686', + 'md5': '62f26547dccc59c44363e2a13d4ad08d', + 'info_dict': { + 'id': '445638073', + 'ext': 'mp4', + 'title': 'Azure Virtual Network IP Addressing', + 'thumbnail': 'https://i.vimeocdn.com/video/936667051-1647ace66c627d4a2382185e0dae8deb830309bfddd53f8b2367b2f91e92ed0e-d_1280', + 'series_id': '52733', + 'uploader_url': 'https://vimeo.com/user30867300', + 'duration': 426, + 'uploader_id': 'user30867300', + 'series': 'AZ-500: Microsoft Azure Security Technologies', + 'uploader': 'Cybrary', + 'chapter': 'Implement Network Security', + 'chapter_id': '52693' + }, + 'expected_warnings': ['No authenticators for vimeo'] + }] + + def _real_extract(self, url): + activity_id, enrollment_id = self._match_valid_url(url).group('id', 'enrollment') + course = self._call_api('enrollment', enrollment_id)['content'] + activity = traverse_obj(course, ('learning_modules', ..., 'activities', lambda _, v: int(activity_id) == v['id']), get_all=False) + + if activity.get('type') not in ['Video Activity', 'Lesson Activity']: + raise ExtractorError('The activity is not a video', expected=True) + + module = next((m for m in course.get('learning_modules') or [] + if int(activity_id) in traverse_obj(m, ('activities', ..., 'id') or [])), None) + + vimeo_id = self._get_vimeo_id(activity_id) + + return { + '_type': 'url_transparent', + 'series': traverse_obj(course, ('content_description', 'title')), + 'series_id': str_or_none(traverse_obj(course, ('content_description', 'id'))), + 'id': vimeo_id, + 'chapter': module.get('title'), + 'chapter_id': str_or_none(module.get('id')), + 'title': activity.get('title'), + 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'http_headers': {'Referer': 'https://api.cybrary.it'}}) + } + + +class CybraryCourseIE(CybraryBaseIE): + _VALID_URL = r'https://app.cybrary.it/browse/course/(?P[\w-]+)/?(?:$|[#?])' + _TESTS = [{ + 'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies', + 'info_dict': { + 'id': 898, + 'title': 'AZ-500: Microsoft Azure Security Technologies', + 'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4' + }, + 'playlist_count': 59 + }, { + 'url': 'https://app.cybrary.it/browse/course/cybrary-orientation', + 'info_dict': { + 'id': 1245, + 'title': 'Cybrary Orientation', + 'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e' + }, + 'playlist_count': 4 + }] + + def _real_extract(self, url): + course_id = self._match_id(url) + course = self._call_api('course', course_id) + enrollment_info = self._call_api('course_enrollment', course['id']) + + entries = [self.url_result( + f'https://app.cybrary.it/immersive/{enrollment_info["id"]}/activity/{activity["id"]}') + for activity in traverse_obj(course, ('content_item', 'learning_modules', ..., 'activities', ...))] + + return self.playlist_result( + entries, + traverse_obj(course, ('content_item', 'id'), expected_type=str_or_none), + course.get('title'), course.get('short_description')) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 52279b985..457f4c2aa 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -345,6 +345,10 @@ from .curiositystream import ( CuriosityStreamSeriesIE, ) from .cwtv import CWTVIE +from .cybrary import ( + CybraryIE, + CybraryCourseIE +) from .daftsex import DaftsexIE from .dailymail import DailyMailIE from .dailymotion import ( From 18eac302a22a31b324c848dce997c34213a5199a Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Mon, 4 Apr 2022 14:29:35 +0700 Subject: [PATCH 0897/2552] [Imdb] Improve extractor (#3291) Closes #3283 Authored by: hatienl0i261299 --- yt_dlp/extractor/imdb.py | 64 ++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/yt_dlp/extractor/imdb.py b/yt_dlp/extractor/imdb.py index 24f1fde64..7eb66d821 100644 --- a/yt_dlp/extractor/imdb.py +++ b/yt_dlp/extractor/imdb.py @@ -7,9 +7,10 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, + int_or_none, mimetype2ext, - parse_duration, qualities, + traverse_obj, try_get, url_or_none, ) @@ -28,6 +29,17 @@ class ImdbIE(InfoExtractor): 'title': 'No. 2', 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7', 'duration': 152, + 'thumbnail': r're:^https?://.+\.jpg', + } + }, { + 'url': 'https://www.imdb.com/video/vi3516832537', + 'info_dict': { + 'id': '3516832537', + 'ext': 'mp4', + 'title': 'Paul: U.S. Trailer #1', + 'description': 'md5:17fcc4fe11ec29b4399be9d4c5ef126c', + 'duration': 153, + 'thumbnail': r're:^https?://.+\.jpg', } }, { 'url': 'http://www.imdb.com/video/_/vi2524815897', @@ -51,8 +63,13 @@ class ImdbIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - - data = self._download_json( + webpage = self._download_webpage(f'https://www.imdb.com/video/vi{video_id}', video_id) + info = self._search_nextjs_data(webpage, video_id) + video_info = traverse_obj(info, ('props', 'pageProps', 'videoPlaybackData', 'video'), default={}) + title = (traverse_obj(video_info, ('name', 'value'), ('primaryTitle', 'titleText', 'text')) + or self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None) + or self._html_search_regex(r'(.+?)', webpage, 'title')) + data = video_info.get('playbackURLs') or try_get(self._download_json( 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id, query={ 'key': base64.b64encode(json.dumps({ @@ -60,11 +77,10 @@ class ImdbIE(InfoExtractor): 'subType': 'FORCE_LEGACY', 'id': 'vi%s' % video_id, }).encode()).decode(), - })[0] - + }), lambda x: x[0]['videoLegacyEncodings']) quality = qualities(('SD', '480p', '720p', '1080p')) - formats = [] - for encoding in data['videoLegacyEncodings']: + formats, subtitles = [], {} + for encoding in data: if not encoding or not isinstance(encoding, dict): continue video_url = url_or_none(encoding.get('url')) @@ -73,11 +89,13 @@ class ImdbIE(InfoExtractor): ext = mimetype2ext(encoding.get( 'mimeType')) or determine_ext(video_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + fmts, subs = self._extract_m3u8_formats_and_subtitles( video_url, video_id, 'mp4', entry_protocol='m3u8_native', - preference=1, m3u8_id='hls', fatal=False)) + preference=1, m3u8_id='hls', fatal=False) + subtitles = self._merge_subtitles(subtitles, subs) + formats.extend(fmts) continue - format_id = encoding.get('definition') + format_id = traverse_obj(encoding, ('displayName', 'value'), 'definition') formats.append({ 'format_id': format_id, 'url': video_url, @@ -86,33 +104,15 @@ class ImdbIE(InfoExtractor): }) self._sort_formats(formats) - webpage = self._download_webpage( - 'https://www.imdb.com/video/vi' + video_id, video_id) - video_metadata = self._parse_json(self._search_regex( - r'args\.push\(\s*({.+?})\s*\)\s*;', webpage, - 'video metadata'), video_id) - - video_info = video_metadata.get('VIDEO_INFO') - if video_info and isinstance(video_info, dict): - info = try_get( - video_info, lambda x: x[list(video_info.keys())[0]][0], dict) - else: - info = {} - - title = self._html_search_meta( - ['og:title', 'twitter:title'], webpage) or self._html_search_regex( - r'(.+?)', webpage, 'title', - default=None) or info['videoTitle'] - return { 'id': video_id, 'title': title, 'alt_title': info.get('videoSubTitle'), 'formats': formats, - 'description': info.get('videoDescription'), - 'thumbnail': url_or_none(try_get( - info, lambda x: x['videoSlate']['source'])), - 'duration': parse_duration(info.get('videoRuntime')), + 'description': try_get(video_info, lambda x: x['description']['value']), + 'thumbnail': url_or_none(try_get(video_info, lambda x: x['thumbnail']['url'])), + 'duration': int_or_none(try_get(video_info, lambda x: x['runtime']['value'])), + 'subtitles': subtitles, } From 5127e92a943b620a2f5c348e339facef0134fd9f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 3 Apr 2022 20:17:59 +0530 Subject: [PATCH 0898/2552] Fix filepath sanitization in `--print-to-file` --- yt_dlp/YoutubeDL.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 6a8e45b1a..4c43ac871 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1240,18 +1240,21 @@ class YoutubeDL(object): outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) return self.escape_outtmpl(outtmpl) % info_dict - def _prepare_filename(self, info_dict, tmpl_type='default'): + def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None): + assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive' + if outtmpl is None: + outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default']) try: - outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])) + outtmpl = self._outtmpl_expandpath(outtmpl) filename = self.evaluate_outtmpl(outtmpl, info_dict, True) if not filename: return None - if tmpl_type in ('default', 'temp'): + if tmpl_type in ('', 'temp'): final_ext, ext = self.params.get('final_ext'), info_dict.get('ext') if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'): filename = replace_extension(filename, ext, final_ext) - else: + elif tmpl_type: force_ext = OUTTMPL_TYPES[tmpl_type] if force_ext: filename = replace_extension(filename, force_ext, info_dict.get('ext')) @@ -1267,10 +1270,12 @@ class YoutubeDL(object): self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None - def prepare_filename(self, info_dict, dir_type='', warn=False): - """Generate the output filename.""" - - filename = self._prepare_filename(info_dict, dir_type or 'default') + def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False): + """Generate the output filename""" + if outtmpl: + assert not dir_type, 'outtmpl and dir_type are mutually exclusive' + dir_type = None + filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl) if not filename and dir_type not in ('', 'temp'): return '' @@ -2767,7 +2772,7 @@ class YoutubeDL(object): self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy)) for tmpl, file_tmpl in self.params['print_to_file'].get(key, []): - filename = self.evaluate_outtmpl(file_tmpl, info_dict) + filename = self.prepare_filename(info_dict, outtmpl=file_tmpl) tmpl = format_tmpl(tmpl) self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') if self._ensure_dir_exists(filename): From 85e801a9dbc671f97af92aebea18170e6a384374 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 4 Apr 2022 14:56:02 +0530 Subject: [PATCH 0899/2552] Fallback to video-only format when selecting by extension Closes #3296 --- yt_dlp/YoutubeDL.py | 51 +++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4c43ac871..51d83bde0 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2188,7 +2188,7 @@ class YoutubeDL(object): yield merged_format else: - format_fallback, format_reverse, format_idx = False, True, 1 + format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1 mobj = re.match( r'(?Pbest|worst|b|w)(?Pvideo|audio|v|a)?(?P\*)?(?:\.(?P[1-9]\d*))?$', format_spec) @@ -2215,6 +2215,7 @@ class YoutubeDL(object): filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' elif format_spec in self._format_selection_exts['video']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' + seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none' elif format_spec in self._format_selection_exts['storyboards']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' else: @@ -2223,11 +2224,15 @@ class YoutubeDL(object): def selector_function(ctx): formats = list(ctx['formats']) matches = list(filter(filter_f, formats)) if filter_f is not None else formats - if format_fallback and ctx['incomplete_formats'] and not matches: - # for extractors with incomplete formats (audio only (soundcloud) - # or video only (imgur)) best/worst will fallback to - # best/worst {video,audio}-only format - matches = formats + if not matches: + if format_fallback and ctx['incomplete_formats']: + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) best/worst will fallback to + # best/worst {video,audio}-only format + matches = formats + elif seperate_fallback and not ctx['has_merged_format']: + # for compatibility with youtube-dl when there is no pre-merged format + matches = list(filter(seperate_fallback, formats)) matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) try: yield matches[format_idx - 1] @@ -2604,33 +2609,15 @@ class YoutubeDL(object): self.report_error(err, tb=False, is_error=False) continue - # While in format selection we may need to have an access to the original - # format set in order to calculate some metrics or do some processing. - # For now we need to be able to guess whether original formats provided - # by extractor are incomplete or not (i.e. whether extractor provides only - # video-only or audio-only formats) for proper formats selection for - # extractors with such incomplete formats (see - # https://github.com/ytdl-org/youtube-dl/pull/5556). - # Since formats may be filtered during format selection and may not match - # the original formats the results may be incorrect. Thus original formats - # or pre-calculated metrics should be passed to format selection routines - # as well. - # We will pass a context object containing all necessary additional data - # instead of just formats. - # This fixes incorrect format selection issue (see - # https://github.com/ytdl-org/youtube-dl/issues/10083). - incomplete_formats = ( - # All formats are video-only or - all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) - # all formats are audio-only - or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) - - ctx = { + formats_to_download = list(format_selector({ 'formats': formats, - 'incomplete_formats': incomplete_formats, - } - - formats_to_download = list(format_selector(ctx)) + 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats), + 'incomplete_formats': ( + # All formats are video-only or + all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) + # all formats are audio-only + or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)), + })) if interactive_format_selection and not formats_to_download: self.report_error('Requested format is not available', tb=False, is_error=False) continue From 04f3fd2c8948621612d852f8f68ef549a484bfb6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 4 Apr 2022 13:57:35 +0530 Subject: [PATCH 0900/2552] [cleanup] Use `_html_extract_title` --- CONTRIBUTING.md | 4 ++-- yt_dlp/extractor/adobeconnect.py | 2 +- yt_dlp/extractor/allocine.py | 6 ++---- yt_dlp/extractor/archiveorg.py | 3 +-- yt_dlp/extractor/asiancrush.py | 3 +-- yt_dlp/extractor/bbc.py | 5 ++--- yt_dlp/extractor/breitbart.py | 5 ++--- yt_dlp/extractor/callin.py | 2 +- yt_dlp/extractor/cbc.py | 6 +++--- yt_dlp/extractor/closertotruth.py | 3 +-- yt_dlp/extractor/common.py | 10 ++++------ yt_dlp/extractor/cspan.py | 2 +- yt_dlp/extractor/fivetv.py | 3 +-- yt_dlp/extractor/foxgay.py | 3 +-- yt_dlp/extractor/generic.py | 6 ++---- yt_dlp/extractor/glide.py | 4 +--- yt_dlp/extractor/hellporno.py | 3 +-- yt_dlp/extractor/huya.py | 3 +-- yt_dlp/extractor/imdb.py | 2 +- yt_dlp/extractor/infoq.py | 2 +- yt_dlp/extractor/iwara.py | 3 +-- yt_dlp/extractor/linkedin.py | 2 +- yt_dlp/extractor/miaopai.py | 3 +-- yt_dlp/extractor/mojvideo.py | 3 +-- yt_dlp/extractor/newgrounds.py | 6 ++---- yt_dlp/extractor/nhk.py | 4 +++- yt_dlp/extractor/playvid.py | 3 +-- yt_dlp/extractor/rule34video.py | 2 +- yt_dlp/extractor/senategov.py | 2 +- yt_dlp/extractor/sunporno.py | 3 +-- yt_dlp/extractor/thisav.py | 4 +--- yt_dlp/extractor/traileraddict.py | 3 +-- yt_dlp/extractor/varzesh3.py | 3 +-- yt_dlp/extractor/vshare.py | 3 +-- yt_dlp/extractor/vupload.py | 2 +- yt_dlp/extractor/weibo.py | 3 +-- yt_dlp/extractor/yahoo.py | 2 +- yt_dlp/extractor/youjizz.py | 3 +-- 38 files changed, 51 insertions(+), 80 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1897f73e0..ea1893d15 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -534,13 +534,13 @@ Extracting variables is acceptable for reducing code duplication and improving r Correct: ```python -title = self._html_search_regex(r'([^<]+)', webpage, 'title') +title = self._html_search_regex(r'

([^<]+)

', webpage, 'title') ``` Incorrect: ```python -TITLE_RE = r'([^<]+)' +TITLE_RE = r'

([^<]+)

' # ...some lines of code... title = self._html_search_regex(TITLE_RE, webpage, 'title') ``` diff --git a/yt_dlp/extractor/adobeconnect.py b/yt_dlp/extractor/adobeconnect.py index e688dddcb..e2e6f93f3 100644 --- a/yt_dlp/extractor/adobeconnect.py +++ b/yt_dlp/extractor/adobeconnect.py @@ -14,7 +14,7 @@ class AdobeConnectIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'(.+?)', webpage, 'title') + title = self._html_extract_title(webpage) qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) is_live = qs.get('isLive', ['false'])[0] == 'true' formats = [] diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index cd533acfc..403a277e9 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -7,6 +7,7 @@ from ..utils import ( int_or_none, qualities, remove_end, + strip_or_none, try_get, unified_timestamp, url_basename, @@ -102,10 +103,7 @@ class AllocineIE(InfoExtractor): video_id = display_id media_data = self._download_json( 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) - title = remove_end( - self._html_search_regex( - r'(?s)(.+?)', webpage, 'title').strip(), - ' - AlloCiné') + title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné')) for key, value in media_data['video'].items(): if not key.endswith('Path'): continue diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index b06ac74ae..2ab3c1beb 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -483,8 +483,7 @@ class YoutubeWebArchiveIE(InfoExtractor): regex), webpage, name, default='{}'), video_id, fatal=False) def _extract_webpage_title(self, webpage): - page_title = self._html_search_regex( - r'([^<]*)', webpage, 'title', default='') + page_title = self._html_extract_title(webpage, default='') # YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix. return self._html_search_regex( r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)', diff --git a/yt_dlp/extractor/asiancrush.py b/yt_dlp/extractor/asiancrush.py index 75a632958..7f1940fca 100644 --- a/yt_dlp/extractor/asiancrush.py +++ b/yt_dlp/extractor/asiancrush.py @@ -181,8 +181,7 @@ class AsianCrushPlaylistIE(AsianCrushBaseIE): 'title', default=None) or self._og_search_title( webpage, default=None) or self._html_search_meta( 'twitter:title', webpage, 'title', - default=None) or self._search_regex( - r'([^<]+)', webpage, 'title', fatal=False) + default=None) or self._html_extract_title(webpage) if title: title = re.sub(r'\s*\|\s*.+?$', '', title) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 823155730..29ad7ded7 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -906,9 +906,8 @@ class BBCIE(BBCCoUkIE): playlist_title = json_ld_info.get('title') if not playlist_title: - playlist_title = self._og_search_title( - webpage, default=None) or self._html_search_regex( - r'(.+?)', webpage, 'playlist title', default=None) + playlist_title = (self._og_search_title(webpage, default=None) + or self._html_extract_title(webpage, 'playlist title', default=None)) if playlist_title: playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip() diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index f50f719dc..e029aa627 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -29,9 +29,8 @@ class BreitBartIE(InfoExtractor): self._sort_formats(formats) return { 'id': video_id, - 'title': self._og_search_title( - webpage, default=None) or self._html_search_regex( - r'(?s)(.*?)', webpage, 'video title'), + 'title': (self._og_search_title(webpage, default=None) + or self._html_extract_title(webpage, 'video title')), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'age_limit': self._rta_search(webpage), diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index acf327ace..1f3b7cfff 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -54,7 +54,7 @@ class CallinIE(InfoExtractor): id = episode['id'] title = (episode.get('title') or self._og_search_title(webpage, fatal=False) - or self._html_search_regex('(.*?)', webpage, 'title')) + or self._html_extract_title(webpage)) url = episode['m3u8'] formats = self._extract_m3u8_formats(url, display_id, ext='ts') self._sort_formats(formats) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index ac1272f7b..fba8bf965 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -127,9 +127,9 @@ class CBCIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - title = self._og_search_title(webpage, default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', default=None) or self._html_search_regex( - r'([^<]+)', webpage, 'title', fatal=False) + title = (self._og_search_title(webpage, default=None) + or self._html_search_meta('twitter:title', webpage, 'title', default=None) + or self._html_extract_title(webpage)) entries = [ self._extract_player_init(player_init, display_id) for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py index 26243d52d..517e121e0 100644 --- a/yt_dlp/extractor/closertotruth.py +++ b/yt_dlp/extractor/closertotruth.py @@ -54,8 +54,7 @@ class CloserToTruthIE(InfoExtractor): r']+src=["\'].*?\b(?:partner_id|p)/(\d+)', webpage, 'kaltura partner_id') - title = self._search_regex( - r'(.+?)\s*\|\s*.+?', webpage, 'video title') + title = self._html_extract_title(webpage, 'video title') select = self._search_regex( r'(?s)]+id="select-version"[^>]*>(.+?)', diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index af964c527..81688eb54 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1329,9 +1329,8 @@ class InfoExtractor(object): def _og_search_description(self, html, **kargs): return self._og_search_property('description', html, fatal=False, **kargs) - def _og_search_title(self, html, **kargs): - kargs.setdefault('fatal', False) - return self._og_search_property('title', html, **kargs) + def _og_search_title(self, html, *, fatal=False, **kargs): + return self._og_search_property('title', html, fatal=fatal, **kargs) def _og_search_video_url(self, html, name='video url', secure=True, **kargs): regexes = self._og_regexes('video') + self._og_regexes('video:url') @@ -1342,9 +1341,8 @@ class InfoExtractor(object): def _og_search_url(self, html, **kargs): return self._og_search_property('url', html, **kargs) - def _html_extract_title(self, html, name, **kwargs): - return self._html_search_regex( - r'(?s)(.*?)', html, name, **kwargs) + def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs): + return self._html_search_regex(r'(?s)([^<]+)', html, name, fatal=fatal, **kwargs) def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): name = variadic(name) diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index d29b58ba6..f51159bbe 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -278,7 +278,7 @@ class CSpanCongressIE(InfoExtractor): video_id, transform_source=js_to_json) title = (self._og_search_title(webpage, default=None) - or self._html_search_regex(r'(?s)(.*?)', webpage, 'video title')) + or self._html_extract_title(webpage, 'video title')) description = (self._og_search_description(webpage, default=None) or self._html_search_meta('description', webpage, 'description', default=None)) diff --git a/yt_dlp/extractor/fivetv.py b/yt_dlp/extractor/fivetv.py index be81fccb8..d6bebd19b 100644 --- a/yt_dlp/extractor/fivetv.py +++ b/yt_dlp/extractor/fivetv.py @@ -75,8 +75,7 @@ class FiveTVIE(InfoExtractor): r']+?href="([^"]+)"[^>]+?class="videoplayer"'], webpage, 'video url') - title = self._og_search_title(webpage, default=None) or self._search_regex( - r'([^<]+)', webpage, 'title') + title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage) duration = int_or_none(self._og_search_property( 'video:duration', webpage, 'duration', default=None)) diff --git a/yt_dlp/extractor/foxgay.py b/yt_dlp/extractor/foxgay.py index 512a10645..1c53e0642 100644 --- a/yt_dlp/extractor/foxgay.py +++ b/yt_dlp/extractor/foxgay.py @@ -29,8 +29,7 @@ class FoxgayIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = remove_end(self._html_search_regex( - r'([^<]+)', webpage, 'title'), ' - Foxgay.com') + title = remove_end(self._html_extract_title(webpage), ' - Foxgay.com') description = get_element_by_id('inf_tit', webpage) # The default user-agent with foxgay cookies leads to pages without videos diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 4a2e30158..65e803dd7 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2873,10 +2873,8 @@ class GenericIE(InfoExtractor): # Site Name | Video Title # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical - video_title = self._og_search_title( - webpage, default=None) or self._html_search_regex( - r'(?s)(.*?)', webpage, 'video title', - default='video') + video_title = (self._og_search_title(webpage, default=None) + or self._html_extract_title(webpage, 'video title', default='video')) # Try to detect age limit automatically age_limit = self._rta_search(webpage) diff --git a/yt_dlp/extractor/glide.py b/yt_dlp/extractor/glide.py index d94dfbf09..12af859be 100644 --- a/yt_dlp/extractor/glide.py +++ b/yt_dlp/extractor/glide.py @@ -23,9 +23,7 @@ class GlideIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'(.+?)', webpage, - 'title', default=None) or self._og_search_title(webpage) + title = self._html_extract_title(webpage, default=None) or self._og_search_title(webpage) video_url = self._proto_relative_url(self._search_regex( r']+src=(["\'])(?P.+?)\1', webpage, 'video URL', default=None, diff --git a/yt_dlp/extractor/hellporno.py b/yt_dlp/extractor/hellporno.py index fae425103..92d32cdcc 100644 --- a/yt_dlp/extractor/hellporno.py +++ b/yt_dlp/extractor/hellporno.py @@ -38,8 +38,7 @@ class HellPornoIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - title = remove_end(self._html_search_regex( - r'([^<]+)', webpage, 'title'), ' - Hell Porno') + title = remove_end(self._html_extract_title(webpage), ' - Hell Porno') info = self._parse_html5_media_entries(url, webpage, display_id)[0] self._sort_formats(info['formats']) diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index b81439682..4e96f22fa 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -66,8 +66,7 @@ class HuyaLiveIE(InfoExtractor): room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo']) if not room_info: raise ExtractorError('Can not extract the room info', expected=True) - title = room_info.get('roomName') or room_info.get('introduction') or self._html_search_regex( - r'([^<]+)', webpage, 'title') + title = room_info.get('roomName') or room_info.get('introduction') or self._html_extract_title(webpage) screen_type = room_info.get('screenType') live_source_type = room_info.get('liveSourceType') stream_info_list = stream_data['data'][0]['gameStreamInfoList'] diff --git a/yt_dlp/extractor/imdb.py b/yt_dlp/extractor/imdb.py index 7eb66d821..96cee2e2f 100644 --- a/yt_dlp/extractor/imdb.py +++ b/yt_dlp/extractor/imdb.py @@ -68,7 +68,7 @@ class ImdbIE(InfoExtractor): video_info = traverse_obj(info, ('props', 'pageProps', 'videoPlaybackData', 'video'), default={}) title = (traverse_obj(video_info, ('name', 'value'), ('primaryTitle', 'titleText', 'text')) or self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None) - or self._html_search_regex(r'(.+?)', webpage, 'title')) + or self._html_extract_title(webpage)) data = video_info.get('playbackURLs') or try_get(self._download_json( 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id, query={ diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 0a70a1fb4..347cc5154 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -115,7 +115,7 @@ class InfoQIE(BokeCCBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_title = self._html_search_regex(r'(.*?)', webpage, 'title') + video_title = self._html_extract_title(webpage) video_description = self._html_search_meta('description', webpage, 'description') if '/cn/' in url: diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index 254d98692..c0e01e352 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -76,8 +76,7 @@ class IwaraIE(InfoExtractor): 'age_limit': age_limit, } - title = remove_end(self._html_search_regex( - r'([^<]+)', webpage, 'title'), ' | Iwara') + title = remove_end(self._html_extract_title(webpage), ' | Iwara') thumbnail = self._html_search_regex( r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index bf549e164..0f57bfa06 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -102,7 +102,7 @@ class LinkedInIE(LinkedInBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'([^<]+)', webpage, 'title') + title = self._html_extract_title(webpage) description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage)) creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage))) diff --git a/yt_dlp/extractor/miaopai.py b/yt_dlp/extractor/miaopai.py index f9e35ac7f..cf0610bdf 100644 --- a/yt_dlp/extractor/miaopai.py +++ b/yt_dlp/extractor/miaopai.py @@ -24,8 +24,7 @@ class MiaoPaiIE(InfoExtractor): webpage = self._download_webpage( url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD}) - title = self._html_search_regex( - r'([^<]+)', webpage, 'title') + title = self._html_extract_title(webpage) thumbnail = self._html_search_regex( r']+class=(?P[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P[\'"])(?P[^\'"]+)(?P=q2)', webpage, 'thumbnail', fatal=False, group='url') diff --git a/yt_dlp/extractor/mojvideo.py b/yt_dlp/extractor/mojvideo.py index 0421f3f44..16d94052b 100644 --- a/yt_dlp/extractor/mojvideo.py +++ b/yt_dlp/extractor/mojvideo.py @@ -38,8 +38,7 @@ class MojvideoIE(InfoExtractor): r'([^<]*)', playerapi, 'error description', fatal=False) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True) - title = self._html_search_regex( - r'([^<]+)', playerapi, 'title') + title = self._html_extract_title(playerapi) video_url = self._html_search_regex( r'([^<]+)', playerapi, 'video URL') thumbnail = self._html_search_regex( diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 1e1274ef0..6525a6d8a 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -106,8 +106,7 @@ class NewgroundsIE(InfoExtractor): uploader = None webpage = self._download_webpage(url, media_id) - title = self._html_search_regex( - r'(.+?)', webpage, 'title') + title = self._html_extract_title(webpage) media_url_string = self._search_regex( r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None) @@ -219,8 +218,7 @@ class NewgroundsPlaylistIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - title = self._search_regex( - r'([^>]+)', webpage, 'title', default=None) + title = self._html_extract_title(webpage, default=None) # cut left menu webpage = self._search_regex( diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 626c6379b..3b8efc3e6 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -309,7 +309,9 @@ class NhkForSchoolProgramListIE(InfoExtractor): webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id) - title = self._og_search_title(webpage, fatal=False) or self._html_extract_title(webpage, fatal=False) or self._html_search_regex(r'

([^<]+?)とは?\s*

', webpage, 'title', fatal=False) + title = (self._og_search_title(webpage) + or self._html_extract_title(webpage) + or self._html_search_regex(r'

([^<]+?)とは?\s*

', webpage, 'title', fatal=False)) title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None description = self._html_search_regex( r'(?s)\s*

[^<]+

', diff --git a/yt_dlp/extractor/playvid.py b/yt_dlp/extractor/playvid.py index 4aef186ea..e1c406b6c 100644 --- a/yt_dlp/extractor/playvid.py +++ b/yt_dlp/extractor/playvid.py @@ -85,8 +85,7 @@ class PlayvidIE(InfoExtractor): # Extract title - should be in the flashvars; if not, look elsewhere if video_title is None: - video_title = self._html_search_regex( - r'(.*?)</title', webpage, 'title') + video_title = self._html_extract_title(webpage) return { 'id': video_id, diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py index 522d4ccd5..a602a9f33 100644 --- a/yt_dlp/extractor/rule34video.py +++ b/yt_dlp/extractor/rule34video.py @@ -49,7 +49,7 @@ class Rule34VideoIE(InfoExtractor): 'quality': quality, }) - title = self._html_search_regex(r'<title>([^<]+)', webpage, 'title') + title = self._html_extract_title(webpage) thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None) duration = self._html_search_regex(r'"icon-clock">\s+((?:\d+:?)+)', webpage, 'duration', default=None) diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py index 6f4240422..b295184a1 100644 --- a/yt_dlp/extractor/senategov.py +++ b/yt_dlp/extractor/senategov.py @@ -112,7 +112,7 @@ class SenateISVPIE(InfoExtractor): if smuggled_data.get('force_title'): title = smuggled_data['force_title'] else: - title = self._html_search_regex(r'([^<]+)', webpage, video_id) + title = self._html_extract_title(webpage) poster = qs.get('poster') thumbnail = poster[0] if poster else None diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py index 68051169b..59b77bf92 100644 --- a/yt_dlp/extractor/sunporno.py +++ b/yt_dlp/extractor/sunporno.py @@ -36,8 +36,7 @@ class SunPornoIE(InfoExtractor): webpage = self._download_webpage( 'http://www.sunporno.com/videos/%s' % video_id, video_id) - title = self._html_search_regex( - r'([^<]+)', webpage, 'title') + title = self._html_extract_title(webpage) description = self._html_search_meta( 'description', webpage, 'description') thumbnail = self._html_search_regex( diff --git a/yt_dlp/extractor/thisav.py b/yt_dlp/extractor/thisav.py index 4af286e6d..6bb00b3ab 100644 --- a/yt_dlp/extractor/thisav.py +++ b/yt_dlp/extractor/thisav.py @@ -37,9 +37,7 @@ class ThisAVIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = remove_end(self._html_search_regex( - r'([^<]+)', webpage, 'title'), - ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站') + title = remove_end(self._html_extract_title(webpage), ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站') video_url = self._html_search_regex( r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) if video_url: diff --git a/yt_dlp/extractor/traileraddict.py b/yt_dlp/extractor/traileraddict.py index 10100fbcf..514f4793e 100644 --- a/yt_dlp/extractor/traileraddict.py +++ b/yt_dlp/extractor/traileraddict.py @@ -24,8 +24,7 @@ class TrailerAddictIE(InfoExtractor): name = mobj.group('movie') + '/' + mobj.group('trailer_name') webpage = self._download_webpage(url, name) - title = self._search_regex(r'(.+?)', - webpage, 'video title').replace(' - Trailer Addict', '') + title = self._html_extract_title(webpage, 'video title').replace(' - Trailer Addict', '') view_count_str = self._search_regex( r'([0-9,.]+)', webpage, 'view count', fatal=False) diff --git a/yt_dlp/extractor/varzesh3.py b/yt_dlp/extractor/varzesh3.py index 81313dc9d..32655b96d 100644 --- a/yt_dlp/extractor/varzesh3.py +++ b/yt_dlp/extractor/varzesh3.py @@ -42,8 +42,7 @@ class Varzesh3IE(InfoExtractor): video_url = self._search_regex( r']+src="([^"]+)"', webpage, 'video url') - title = remove_start(self._html_search_regex( - r'([^<]+)', webpage, 'title'), 'ویدیو ورزش 3 | ') + title = remove_start(self._html_extract_title(webpage), 'ویدیو ورزش 3 | ') description = self._html_search_regex( r'(?s)
(.+?)
', diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py index c631ac1fa..b4874ac39 100644 --- a/yt_dlp/extractor/vshare.py +++ b/yt_dlp/extractor/vshare.py @@ -50,8 +50,7 @@ class VShareIE(InfoExtractor): 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id, headers={'Referer': url}) - title = self._html_search_regex( - r'([^<]+)', webpage, 'title') + title = self._html_extract_title(webpage) title = title.split(' - ')[0] error = self._html_search_regex( diff --git a/yt_dlp/extractor/vupload.py b/yt_dlp/extractor/vupload.py index 2229a6591..b561f63f7 100644 --- a/yt_dlp/extractor/vupload.py +++ b/yt_dlp/extractor/vupload.py @@ -28,7 +28,7 @@ class VuploadIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'(.+?)', webpage, 'title') + title = self._html_extract_title(webpage) video_json = self._parse_json(self._html_search_regex(r'sources:\s*(.+?]),', webpage, 'video'), video_id, transform_source=js_to_json) formats = [] for source in video_json: diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index 621df5b54..dafa2af3b 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -73,8 +73,7 @@ class WeiboIE(InfoExtractor): webpage = self._download_webpage( url, video_id, note='Revisiting webpage') - title = self._html_search_regex( - r'(.+?)', webpage, 'title') + title = self._html_extract_title(webpage) video_formats = compat_parse_qs(self._search_regex( r'video-sources=\\\"(.+?)\"', webpage, 'video_sources')) diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 6cf3b1de2..20504de2c 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -533,7 +533,7 @@ class YahooJapanNewsIE(InfoExtractor): title = self._html_search_meta( ['og:title', 'twitter:title'], webpage, 'title', default=None - ) or self._html_search_regex('([^<]+)', webpage, 'title') + ) or self._html_extract_title(webpage) if display_id == host: # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...) diff --git a/yt_dlp/extractor/youjizz.py b/yt_dlp/extractor/youjizz.py index 5f5fbf21c..111623ffe 100644 --- a/yt_dlp/extractor/youjizz.py +++ b/yt_dlp/extractor/youjizz.py @@ -36,8 +36,7 @@ class YouJizzIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'(.+?)', webpage, 'title') + title = self._html_extract_title(webpage) formats = [] From 5fa3c9a88f597625296981a4a26be723e65d4842 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Mon, 4 Apr 2022 17:07:07 +0700 Subject: [PATCH 0901/2552] [TikTok] Fix URLs with user id (#3295) Closes #3243 Authored by: hatienl0i261299 --- yt_dlp/extractor/tiktok.py | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 56cc2dcc6..6f8c32882 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -263,7 +263,7 @@ class TikTokBaseIE(InfoExtractor): return { 'id': aweme_id, - 'title': aweme_detail['desc'], + 'title': aweme_detail.get('desc'), 'description': aweme_detail['desc'], 'view_count': int_or_none(stats_info.get('play_count')), 'like_count': int_or_none(stats_info.get('digg_count')), @@ -457,6 +457,30 @@ class TikTokIE(TikTokBaseIE): 'comment_count': int, }, 'expected_warnings': ['Video not available'] + }, { + # Video without title and description + 'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694', + 'info_dict': { + 'id': '7059698374567611694', + 'ext': 'mp4', + 'title': 'N/A', + 'description': '', + 'uploader': 'pokemonlife22', + 'creator': 'Pokemon', + 'uploader_id': '6820838815978423302', + 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', + 'track': 'original sound', + 'timestamp': 1643714123, + 'duration': 6, + 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', + 'upload_date': '20220201', + 'artist': 'Pokemon', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + }, + 'expected_warnings': ['Video not available'] }, { # Auto-captions available 'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758', @@ -521,6 +545,15 @@ class TikTokUserIE(TikTokBaseIE): 'thumbnail': r're:https://.+_1080x1080\.webp' }, 'expected_warnings': ['Retrying'] + }, { + 'url': 'https://www.tiktok.com/@6820838815978423302', + 'playlist_mincount': 5, + 'info_dict': { + 'id': '6820838815978423302', + 'title': '6820838815978423302', + 'thumbnail': r're:https://.+_1080x1080\.webp' + }, + 'expected_warnings': ['Retrying'] }, { 'url': 'https://www.tiktok.com/@meme', 'playlist_mincount': 593, @@ -593,7 +626,7 @@ class TikTokUserIE(TikTokBaseIE): webpage = self._download_webpage(url, user_name, headers={ 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' }) - user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID') + user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID', default=None) or user_name videos = LazyList(self._video_entries_api(webpage, user_id, user_name)) thumbnail = traverse_obj(videos, (0, 'author', 'avatar_larger', 'url_list', 0)) From f4d706a931bdf2534c23353b5843d3220efe6f89 Mon Sep 17 00:00:00 2001 From: Jeff Huffman Date: Tue, 5 Apr 2022 03:51:12 -0700 Subject: [PATCH 0902/2552] [crunchyroll:playlist] Implement beta API (#2955) Closes #3121, #2930 Authored by: tejing1 --- yt_dlp/extractor/crunchyroll.py | 200 +++++++++++++++++++++++++------- 1 file changed, 155 insertions(+), 45 deletions(-) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index bb4ae12f5..7edb645f8 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -86,6 +86,22 @@ class CrunchyrollBaseIE(InfoExtractor): if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): raise ExtractorError('Login succeeded but did not set etp_rt cookie') + # Beta-specific, but needed for redirects + def _get_beta_embedded_json(self, webpage, display_id): + initial_state = self._parse_json(self._search_regex( + r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id) + app_config = self._parse_json(self._search_regex( + r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id) + return initial_state, app_config + + def _redirect_to_beta(self, webpage, iekey, video_id): + if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): + raise ExtractorError('Received a beta page from non-beta url when not logged in.') + initial_state, app_config = self._get_beta_embedded_json(webpage, video_id) + url = app_config['baseSiteUrl'] + initial_state['router']['locations']['current']['pathname'] + self.to_screen(f'{video_id}: Redirected to beta site - {url}') + return self.url_result(f'{url}', iekey, video_id) + @staticmethod def _add_skip_wall(url): parsed_url = compat_urlparse.urlparse(url) @@ -406,6 +422,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text webpage = self._download_webpage( self._add_skip_wall(webpage_url), video_id, headers=self.geo_verification_headers()) + if re.search(r'
', webpage): + return self._redirect_to_beta(webpage, CrunchyrollBetaIE.ie_key(), video_id) note_m = self._html_search_regex( r'
(.+?)
', webpage, 'trailer-notice', default='') @@ -670,6 +688,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): # https:// gives a 403, but http:// does not self._add_skip_wall(url).replace('https://', 'http://'), show_id, headers=self.geo_verification_headers()) + if re.search(r'
', webpage): + return self._redirect_to_beta(webpage, CrunchyrollBetaShowIE.ie_key(), show_id) title = self._html_search_meta('name', webpage, default=None) episode_re = r'
  • ]+>.*?(?:\w{1,2}/)?)watch/(?P\w+)/(?P[\w\-]+)/?(?:\?|$)' + _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P(?:\w{1,2}/)?)watch/(?P\w+)/(?P[\w\-]*)/?(?:\?|$)' _TESTS = [{ 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future', 'info_dict': { @@ -705,51 +772,49 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE): 'uploader': 'Toei Animation', 'title': 'World Trigger Episode 73 – To the Future', 'upload_date': '20160402', + 'episode_number': 73, + 'series': 'World Trigger', + 'average_rating': 4.9, + 'episode': 'To the Future', + 'season': 'World Trigger', + 'thumbnail': 'https://img1.ak.crunchyroll.com/i/spire3-tmb/c870dedca1a83137c2d3d144984155ed1459527119_main.jpg', + 'season_number': 1, }, 'params': {'skip_download': 'm3u8'}, 'expected_warnings': ['Unable to download XML'] + }, { + 'url': 'https://beta.crunchyroll.com/watch/GYK53DMPR/wicked-lord-shingan-reborn', + 'info_dict': { + 'id': '648781', + 'ext': 'mp4', + 'episode_number': 1, + 'timestamp': 1389173400, + 'series': 'Love, Chunibyo & Other Delusions - Heart Throb -', + 'description': 'md5:5579d1a0355cc618558ba23d27067a62', + 'uploader': 'TBS', + 'episode': 'Wicked Lord Shingan... Reborn', + 'average_rating': 4.9, + 'season': 'Love, Chunibyo & Other Delusions - Heart Throb -', + 'thumbnail': 'https://img1.ak.crunchyroll.com/i/spire3-tmb/2ba0384e225a5370d5f0ee9496d91ea51389046521_main.jpg', + 'title': 'Love, Chunibyo & Other Delusions - Heart Throb - Episode 1 – Wicked Lord Shingan... Reborn', + 'season_number': 2, + 'upload_date': '20140108', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Unable to download XML'] + }, { + 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/', + 'only_matching': True, }] def _real_extract(self, url): - lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id') - webpage = self._download_webpage(url, display_id) - initial_state = self._parse_json( - self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), - display_id) - episode_data = initial_state['content']['byId'][internal_id] + lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') + if not self._get_cookies(url).get('etp_rt'): - video_id = episode_data['external_id'].split('.')[1] - series_id = episode_data['episode_metadata']['series_slug_title'] - return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}', - CrunchyrollIE.ie_key(), video_id) - - app_config = self._parse_json( - self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), - display_id) - client_id = app_config['cxApiParams']['accountAuthClientId'] - api_domain = app_config['cxApiParams']['apiDomain'] - basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii') - auth_response = self._download_json( - f'{api_domain}/auth/v1/token', display_id, - note='Authenticating with cookie', - headers={ - 'Authorization': 'Basic ' + basic_token - }, data='grant_type=etp_rt_cookie'.encode('ascii')) - policy_response = self._download_json( - f'{api_domain}/index/v2', display_id, - note='Retrieving signed policy', - headers={ - 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] - }) - bucket = policy_response['cms']['bucket'] - params = { - 'Policy': policy_response['cms']['policy'], - 'Signature': policy_response['cms']['signature'], - 'Key-Pair-Id': policy_response['cms']['key_pair_id'] - } - locale = traverse_obj(initial_state, ('localization', 'locale')) - if locale: - params['locale'] = locale + return self._redirect_from_beta(url, lang, internal_id, display_id, True, CrunchyrollIE.ie_key()) + + api_domain, bucket, params = self._get_params(lang) + episode_response = self._download_json( f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id, note='Retrieving episode metadata', @@ -827,9 +892,9 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE): } -class CrunchyrollBetaShowIE(CrunchyrollBaseIE): +class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE): IE_NAME = 'crunchyroll:playlist:beta' - _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P(?:\w{1,2}/)?)series/\w+/(?P[\w\-]+)/?(?:\?|$)' + _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P(?:\w{1,2}/)?)series/(?P\w+)/(?P[\w\-]*)/?(?:\?|$)' _TESTS = [{ 'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', 'info_dict': { @@ -837,12 +902,57 @@ class CrunchyrollBetaShowIE(CrunchyrollBaseIE): 'title': 'Girl Friend BETA', }, 'playlist_mincount': 10, + }, { + 'url': 'https://beta.crunchyroll.com/series/GYJQV73V6/love-chunibyo--other-delusions---heart-throb--', + 'info_dict': { + 'id': 'love-chunibyo-other-delusions-heart-throb-', + 'title': 'Love, Chunibyo & Other Delusions - Heart Throb -', + }, + 'playlist_mincount': 10, }, { 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA', 'only_matching': True, }] def _real_extract(self, url): - lang, series_id = self._match_valid_url(url).group('lang', 'id') - return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id.lower()}', - CrunchyrollShowPlaylistIE.ie_key(), series_id) + lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') + + if not self._get_cookies(url).get('etp_rt'): + return self._redirect_from_beta(url, lang, internal_id, display_id, False, CrunchyrollShowPlaylistIE.ie_key()) + + api_domain, bucket, params = self._get_params(lang) + + series_response = self._download_json( + f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id, + note='Retrieving series metadata', query=params) + + seasons_response = self._download_json( + f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id, + note='Retrieving season list', query=params) + + def entries(): + for season in seasons_response['items']: + episodes_response = self._download_json( + f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id, + note=f'Retrieving episode list for {season.get("slug_title")}', query=params) + for episode in episodes_response['items']: + episode_id = episode['id'] + episode_display_id = episode['slug_title'] + yield { + '_type': 'url', + 'url': f'https://beta.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}', + 'ie_key': CrunchyrollBetaIE.ie_key(), + 'id': episode_id, + 'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')), + 'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')), + 'duration': float_or_none(episode.get('duration_ms'), 1000), + 'series': episode.get('series_title'), + 'series_id': episode.get('series_id'), + 'season': episode.get('season_title'), + 'season_id': episode.get('season_id'), + 'season_number': episode.get('season_number'), + 'episode': episode.get('title'), + 'episode_number': episode.get('sequence_number') + } + + return self.playlist_result(entries(), internal_id, series_response.get('title')) From 0a8a7e68fabf6fc9387f270301e51225ac349b00 Mon Sep 17 00:00:00 2001 From: Teemu Ikonen Date: Tue, 5 Apr 2022 15:15:47 +0300 Subject: [PATCH 0903/2552] [ruutu] Detect embeds (#3294) Authored by: tpikonen --- yt_dlp/extractor/generic.py | 26 +++++++++++++++++++++++++- yt_dlp/extractor/ruutu.py | 15 +++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 65e803dd7..2c503e581 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -149,6 +149,7 @@ from .blogger import BloggerIE from .mainstreaming import MainStreamingIE from .gfycat import GfycatIE from .panopto import PanoptoBaseIE +from .ruutu import RuutuIE class GenericIE(InfoExtractor): @@ -2511,7 +2512,24 @@ class GenericIE(InfoExtractor): 'id': 'insert-a-quiz-into-a-panopto-video' }, 'playlist_count': 1 - } + }, + { + # Ruutu embed + 'url': 'https://www.nelonen.fi/ohjelmat/madventures-suomi/2160731-riku-ja-tunna-lahtevat-peurajahtiin-tv-sta-tutun-biologin-kanssa---metsastysreissu-huipentuu-kasvissyojan-painajaiseen', + 'md5': 'a2513a98d3496099e6eced40f7e6a14b', + 'info_dict': { + 'id': '4044426', + 'ext': 'mp4', + 'title': 'Riku ja Tunna lähtevät peurajahtiin tv:stä tutun biologin kanssa – metsästysreissu huipentuu kasvissyöjän painajaiseen!', + 'thumbnail': r're:^https?://.+\.jpg$', + 'duration': 108, + 'series' : 'Madventures Suomi', + 'description': 'md5:aa55b44bd06a1e337a6f1d0b46507381', + 'categories': ['Matkailu', 'Elämäntyyli'], + 'age_limit': 0, + 'upload_date': '20220308', + }, + }, ] def report_following_redirect(self, new_url): @@ -3737,6 +3755,12 @@ class GenericIE(InfoExtractor): panopto_urls = PanoptoBaseIE._extract_urls(webpage) if panopto_urls: return self.playlist_from_matches(panopto_urls, video_id, video_title) + + # Look for Ruutu embeds + ruutu_url = RuutuIE._extract_url(webpage) + if ruutu_url: + return self.url_result(ruutu_url, RuutuIE) + # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index d9cf39d71..5a30e3360 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -1,6 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import json +import re + from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( @@ -8,6 +11,8 @@ from ..utils import ( ExtractorError, find_xpath_attr, int_or_none, + traverse_obj, + try_call, unified_strdate, url_or_none, xpath_attr, @@ -123,6 +128,16 @@ class RuutuIE(InfoExtractor): ] _API_BASE = 'https://gatling.nelonenmedia.fi' + @classmethod + def _extract_url(cls, webpage): + settings = try_call( + lambda: json.loads(re.search( + r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False)) + video_id = traverse_obj(settings, ( + 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) + if video_id: + return f'http://www.ruutu.fi/video/{video_id}' + def _real_extract(self, url): video_id = self._match_id(url) From a44ca5a470e09b5170fc9c3a46733f050fadbfae Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 27 Mar 2022 07:50:43 +0530 Subject: [PATCH 0904/2552] [cleanup] Misc fixes Closes https://github.com/yt-dlp/yt-dlp/pull/3213, Closes https://github.com/yt-dlp/yt-dlp/pull/3117 Related: https://github.com/yt-dlp/yt-dlp/issues/3146#issuecomment-1077323114, https://github.com/yt-dlp/yt-dlp/pull/3277#discussion_r841019671, https://github.com/yt-dlp/yt-dlp/commit/a825ffbffa0bea322e3ccb44c6f8e01d8d9572fb#commitcomment-68538986, https://github.com/yt-dlp/yt-dlp/issues/2360, https://github.com/yt-dlp/yt-dlp/commit/5fa3c9a88f597625296981a4a26be723e65d4842#r70393519, https://github.com/yt-dlp/yt-dlp/commit/5fa3c9a88f597625296981a4a26be723e65d4842#r70393254 --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 2 +- .../ISSUE_TEMPLATE/2_site_support_request.yml | 2 +- .../ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 2 +- .github/ISSUE_TEMPLATE/5_feature_request.yml | 23 ++++++- .github/ISSUE_TEMPLATE/6_question.yml | 2 +- .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml | 2 +- .../2_site_support_request.yml | 2 +- .../3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml | 2 +- .../ISSUE_TEMPLATE_tmpl/5_feature_request.yml | 23 ++++++- .github/ISSUE_TEMPLATE_tmpl/6_question.yml | 2 +- .gitignore | 3 + README.md | 61 ++++++++++--------- yt_dlp/YoutubeDL.py | 9 +-- yt_dlp/__init__.py | 2 +- yt_dlp/cookies.py | 11 ++-- yt_dlp/downloader/fragment.py | 24 ++++---- yt_dlp/downloader/http.py | 20 ++---- yt_dlp/extractor/bilibili.py | 6 +- yt_dlp/extractor/canvas.py | 8 --- yt_dlp/extractor/common.py | 8 ++- yt_dlp/extractor/dropout.py | 4 +- yt_dlp/extractor/facebook.py | 6 +- yt_dlp/extractor/generic.py | 6 +- yt_dlp/extractor/limelight.py | 2 +- yt_dlp/extractor/niconico.py | 2 +- yt_dlp/extractor/tiktok.py | 11 +++- yt_dlp/extractor/yandexvideo.py | 1 - yt_dlp/extractor/youtube.py | 15 +++-- yt_dlp/options.py | 14 +++-- yt_dlp/postprocessor/ffmpeg.py | 13 +++- yt_dlp/utils.py | 19 +++--- 33 files changed, 184 insertions(+), 127 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 4d9187143..c671a1910 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index cff73b555..5ff022a04 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 44012044a..acdfeb038 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index d93380725..a4a038fc8 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 51987d533..1bdafc441 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -13,7 +13,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true @@ -30,3 +30,24 @@ body: placeholder: WRITE DESCRIPTION HERE validations: required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + If your feature request involves an existing yt-dlp command, provide the complete verbose output of that command. + Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU `), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version 2021.12.01 (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (2021.12.01) + + render: shell diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index 061158ed3..030d2cfe7 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -35,7 +35,7 @@ body: attributes: label: Verbose log description: | - If your question involes a yt-dlp command, provide the complete verbose output of that command. + If your question involves a yt-dlp command, provide the complete verbose output of that command. Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml index fd6435ba6..422af9c72 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml index f380c1331..fec50559a 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml index 88b1f1217..266408c19 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml index 03a6ba551..8b49b6385 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml index eb5d3d634..1f33f09dc 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml @@ -13,7 +13,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true @@ -30,3 +30,24 @@ body: placeholder: WRITE DESCRIPTION HERE validations: required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + If your feature request involves an existing yt-dlp command, provide the complete verbose output of that command. + Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU `), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-vU', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version 2021.12.01 (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (2021.12.01) + + render: shell diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml index 061158ed3..030d2cfe7 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml @@ -35,7 +35,7 @@ body: attributes: label: Verbose log description: | - If your question involes a yt-dlp command, provide the complete verbose output of that command. + If your question involves a yt-dlp command, provide the complete verbose output of that command. Add the `-vU` flag to **your** command line you run yt-dlp with (`yt-dlp -vU `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.gitignore b/.gitignore index fd51ad66e..c815538e8 100644 --- a/.gitignore +++ b/.gitignore @@ -116,3 +116,6 @@ yt-dlp.zip ytdlp_plugins/extractor/* !ytdlp_plugins/extractor/__init__.py !ytdlp_plugins/extractor/sample.py +ytdlp_plugins/postprocessor/* +!ytdlp_plugins/postprocessor/__init__.py +!ytdlp_plugins/postprocessor/sample.py diff --git a/README.md b/README.md index a75441e35..6b4f39b9e 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` +* yt-dlp stores config files in slightly different locations to youtube-dl. See [configuration](#configuration) for a list of correct locations * The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` * The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order * The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this @@ -431,24 +432,24 @@ You can also fork the project on github and run your fork's [build workflow](.gi --dateafter DATE Download only videos uploaded on or after this date. The date formats accepted is the same as --date - --match-filter FILTER Generic video filter. Any field (see + --match-filters FILTER Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a number or a string using the operators defined in "Filtering formats". You can also simply specify a field to match if the - field is present and "!field" to check if - the field is not present. In addition, - Python style regular expression matching - can be done using "~=", and multiple - filters can be checked with "&". Use a "\" - to escape "&" or quotes if needed. Eg: - --match-filter "!is_live & like_count>?100 - & description~='(?i)\bcats \& dogs\b'" - matches only videos that are not live, has - a like count more than 100 (or the like - field is not available), and also has a - description that contains the phrase "cats - & dogs" (ignoring case) + field is present, use "!field" to check if + the field is not present, and "&" to check + multiple conditions. Use a "\" to escape + "&" or quotes if needed. If used multiple + times, the filter matches if atleast one of + the conditions are met. Eg: --match-filter + !is_live --match-filter "like_count>?100 & + description~='(?i)\bcats \& dogs\b'" + matches only videos that are not live OR + those that have a like count more than 100 + (or the like field is not available) and + also has a description that contains the + phrase "cats & dogs" (ignoring case) --no-match-filter Do not use generic video filter (default) --no-playlist Download only the video, if the URL refers to a video and a playlist @@ -840,15 +841,17 @@ You can also fork the project on github and run your fork's [build workflow](.gi (requires ffmpeg and ffprobe) --audio-format FORMAT Specify audio format to convert the audio to when -x is used. Currently supported - formats are: best (default) or one of - best|aac|flac|mp3|m4a|opus|vorbis|wav|alac - --audio-quality QUALITY Specify ffmpeg audio quality, insert a + formats are: best (default) or one of aac, + flac, mp3, m4a, opus, vorbis, wav, alac + --audio-quality QUALITY Specify ffmpeg audio quality to use when + converting the audio with -x. Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if - necessary (currently supported: mp4|mkv|flv - |webm|mov|avi|mp3|mka|m4a|ogg|opus). If + necessary (currently supported: mp4, mkv, + flv, webm, mov, avi, mka, ogg, aac, flac, + mp3, m4a, opus, vorbis, wav, alac). If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; Eg. @@ -948,10 +951,10 @@ You can also fork the project on github and run your fork's [build workflow](.gi option can be used multiple times --no-exec Remove any previously defined --exec --convert-subs FORMAT Convert the subtitles to another format - (currently supported: srt|vtt|ass|lrc) + (currently supported: srt, vtt, ass, lrc) (Alias: --convert-subtitles) --convert-thumbnails FORMAT Convert the thumbnails to another format - (currently supported: jpg|png|webp) + (currently supported: jpg, png, webp) --split-chapters Split video into multiple files based on internal chapters. The "chapter:" prefix can be used with "--paths" and "--output" @@ -1638,7 +1641,11 @@ $ yt-dlp --parse-metadata "description:Artist - (?P.+)" # Set title as "Series name S01E05" $ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s" -# Set "comment" field in video metadata using description instead of webpage_url +# Prioritize uploader as the "artist" field in video metadata +$ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --add-metadata + +# Set "comment" field in video metadata using description instead of webpage_url, +# handling multiple lines correctly $ yt-dlp --parse-metadata "description:(?s)(?P.+)" --add-metadata # Remove "formats" field from the infojson by setting it to an empty string @@ -1651,7 +1658,7 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player-client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"` The following extractors use this feature: @@ -1661,10 +1668,8 @@ The following extractors use this feature: * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) -* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`. - * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total. -* `max_comment_depth` Maximum depth for nested comments. YouTube supports depths 1 or 2 (default) - * **Deprecated**: Set `max-replies` to `0` or `all` in `max_comments` instead (e.g. `max_comments=all,all,0` to get no replies) +* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` + * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) @@ -1743,7 +1748,7 @@ with YoutubeDL(ydl_opts) as ydl: ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L191). +Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L195). Here's a more complete example demonstrating various functionality: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 51d83bde0..d03229d86 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -517,7 +517,7 @@ class YoutubeDL(object): _format_fields = { # NB: Keep in sync with the docstring of extractor/common.py - 'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note', + 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', @@ -938,7 +938,7 @@ class YoutubeDL(object): def deprecation_warning(self, message): if self.params.get('logger') is not None: - self.params['logger'].warning('DeprecationWarning: {message}') + self.params['logger'].warning(f'DeprecationWarning: {message}') else: self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True) @@ -2478,8 +2478,9 @@ class YoutubeDL(object): if info_dict.get('is_live') and formats: formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] if get_from_start and not formats: - self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. ' - 'If you want to download from the current time, pass --no-live-from-start') + self.raise_no_formats(info_dict, msg=( + '--live-from-start is passed, but there are no formats that can be downloaded from the start. ' + 'If you want to download from the current time, use --no-live-from-start')) if not formats: self.raise_no_formats(info_dict) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index a445d8621..ebf2d227a 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -379,7 +379,7 @@ def validate_options(opts): 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) - # --(post-processor/downloader)-args without name + # --(postprocessor/downloader)-args without name def report_args_compat(name, value, key1, key2=None): if key1 in value and key2 not in value: warnings.append(f'{name} arguments given without specifying name. The arguments will be given to all {name}s') diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 7265cad81..1f08a3664 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -21,6 +21,7 @@ from .compat import ( compat_cookiejar_Cookie, ) from .utils import ( + error_to_str, expand_path, Popen, YoutubeDLCookieJar, @@ -721,7 +722,7 @@ def _get_kwallet_network_wallet(logger): network_wallet = stdout.decode('utf-8').strip() logger.debug('NetworkWallet = "{}"'.format(network_wallet)) return network_wallet - except BaseException as e: + except Exception as e: logger.warning('exception while obtaining NetworkWallet: {}'.format(e)) return default_wallet @@ -766,8 +767,8 @@ def _get_kwallet_password(browser_keyring_name, logger): if stdout[-1:] == b'\n': stdout = stdout[:-1] return stdout - except BaseException as e: - logger.warning(f'exception running kwallet-query: {type(e).__name__}({e})') + except Exception as e: + logger.warning(f'exception running kwallet-query: {error_to_str(e)}') return b'' @@ -823,8 +824,8 @@ def _get_mac_keyring_password(browser_keyring_name, logger): if stdout[-1:] == b'\n': stdout = stdout[:-1] return stdout - except BaseException as e: - logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})') + except Exception as e: + logger.warning(f'exception running find-generic-password: {error_to_str(e)}') return None diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 6b75dfc62..c45a8a476 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -403,7 +403,7 @@ class FragmentFD(FileDownloader): pass if compat_os_name == 'nt': - def bindoj_result(future): + def future_result(future): while True: try: return future.result(0.1) @@ -412,7 +412,7 @@ class FragmentFD(FileDownloader): except concurrent.futures.TimeoutError: continue else: - def bindoj_result(future): + def future_result(future): return future.result() def interrupt_trigger_iter(fg): @@ -430,7 +430,7 @@ class FragmentFD(FileDownloader): result = True for tpe, job in spins: try: - result = result and bindoj_result(job) + result = result and future_result(job) except KeyboardInterrupt: interrupt_trigger[0] = False finally: @@ -494,16 +494,14 @@ class FragmentFD(FileDownloader): self.report_error('Giving up after %s fragment retries' % fragment_retries) def append_fragment(frag_content, frag_index, ctx): - if not frag_content: - if not is_fatal(frag_index - 1): - self.report_skip_fragment(frag_index, 'fragment not found') - return True - else: - ctx['dest_stream'].close() - self.report_error( - 'fragment %s not found, unable to continue' % frag_index) - return False - self._append_fragment(ctx, pack_func(frag_content, frag_index)) + if frag_content: + self._append_fragment(ctx, pack_func(frag_content, frag_index)) + elif not is_fatal(frag_index - 1): + self.report_skip_fragment(frag_index, 'fragment not found') + else: + ctx['dest_stream'].close() + self.report_error(f'fragment {frag_index} not found, unable to continue') + return False return True decrypt_fragment = self.decrypter(info_dict) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index cabf401a7..591a9b08d 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -7,7 +7,6 @@ import random from .common import FileDownloader from ..compat import ( - compat_str, compat_urllib_error, compat_http_client ) @@ -58,8 +57,6 @@ class HttpFD(FileDownloader): ctx.resume_len = 0 ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() - ctx.chunk_size = None - throttle_start = None # parse given Range req_start, req_end, _ = parse_http_range(headers.get('Range')) @@ -85,12 +82,6 @@ class HttpFD(FileDownloader): class NextFragment(Exception): pass - def set_range(req, start, end): - range_header = 'bytes=%d-' % start - if end: - range_header += compat_str(end) - req.add_header('Range', range_header) - def establish_connection(): ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) if not is_test and chunk_size else chunk_size) @@ -131,7 +122,7 @@ class HttpFD(FileDownloader): request = sanitized_Request(url, request_data, headers) has_range = range_start is not None if has_range: - set_range(request, range_start, range_end) + request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}') # Establish connection try: ctx.data = self.ydl.urlopen(request) @@ -214,7 +205,6 @@ class HttpFD(FileDownloader): raise RetryDownload(err) def download(): - nonlocal throttle_start data_len = ctx.data.info().get('Content-length', None) # Range HTTP header may be ignored/unsupported by a webserver @@ -329,14 +319,14 @@ class HttpFD(FileDownloader): if speed and speed < (self.params.get('throttledratelimit') or 0): # The speed must stay below the limit for 3 seconds # This prevents raising error when the speed temporarily goes down - if throttle_start is None: - throttle_start = now - elif now - throttle_start > 3: + if ctx.throttle_start is None: + ctx.throttle_start = now + elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() raise ThrottledDownload() elif speed: - throttle_start = None + ctx.throttle_start = None if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index dd1ff512e..3212f3328 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -926,9 +926,9 @@ class BiliIntlIE(BiliIntlBaseIE): if season_id and not video_data: # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) - video_data = next( - episode for episode in traverse_obj(season_json, ('sections', ..., 'episodes', ...), expected_type=dict) - if str(episode.get('episode_id')) == ep_id) + video_data = traverse_obj(season_json, + ('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), + expected_type=dict, get_all=False) return self._extract_video_info(video_data, ep_id=ep_id, aid=aid) diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py index 31e7d7de6..8b9903774 100644 --- a/yt_dlp/extractor/canvas.py +++ b/yt_dlp/extractor/canvas.py @@ -245,10 +245,6 @@ class VrtNUIE(GigyaBaseIE): 'upload_date': '20200727', }, 'skip': 'This video is only available for registered users', - 'params': { - 'username': '', - 'password': '', - }, 'expected_warnings': ['is not a supported codec'], }, { # Only available via new API endpoint @@ -264,10 +260,6 @@ class VrtNUIE(GigyaBaseIE): 'episode_number': 5, }, 'skip': 'This video is only available for registered users', - 'params': { - 'username': '', - 'password': '', - }, 'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'], }] _NETRC_MACHINE = 'vrtnu' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 81688eb54..e2605c1f4 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -139,6 +139,8 @@ class InfoExtractor(object): for HDS - URL of the F4M manifest, for DASH - URL of the MPD manifest, for MSS - URL of the ISM manifest. + * manifest_stream_number (For internal use only) + The index of the stream in the manifest file * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). @@ -215,7 +217,7 @@ class InfoExtractor(object): (HTTP or RTMP) download. Boolean. * has_drm The format has DRM and cannot be downloaded. Boolean * downloader_options A dictionary of downloader options as - described in FileDownloader + described in FileDownloader (For internal use only) RTMP formats can also have the additional fields: page_url, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, rtmp_protocol, rtmp_real_time @@ -3684,9 +3686,9 @@ class InfoExtractor(object): def _merge_subtitle_items(subtitle_list1, subtitle_list2): """ Merge subtitle items for one language. Items with duplicated URLs/data will be dropped. """ - list1_data = set([item.get('url') or item['data'] for item in subtitle_list1]) + list1_data = set((item.get('url'), item.get('data')) for item in subtitle_list1) ret = list(subtitle_list1) - ret.extend([item for item in subtitle_list2 if (item.get('url') or item['data']) not in list1_data]) + ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data) return ret @classmethod diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index a7442d8f0..2fa61950c 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -123,7 +123,7 @@ class DropoutIE(InfoExtractor): self._login(display_id) webpage = self._download_webpage(url, display_id, note='Downloading video webpage') finally: - self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out') + self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out', fatal=False) embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url') thumbnail = self._og_search_thumbnail(webpage) @@ -139,7 +139,7 @@ class DropoutIE(InfoExtractor): '_type': 'url_transparent', 'ie_key': VHXEmbedIE.ie_key(), 'url': embed_url, - 'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'), + 'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'), 'display_id': display_id, 'title': title, 'description': self._html_search_meta('description', webpage, fatal=False), diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 2deed585f..5e0e2facf 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -397,8 +397,10 @@ class FacebookIE(InfoExtractor): r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)] post = traverse_obj(post_data, ( ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] - media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or [] - if str(m.get('id')) == video_id and m.get('__typename') == 'Video'] + media = traverse_obj( + post, + (..., 'attachments', ..., 'media', lambda _, m: str(m['id']) == video_id and m['__typename'] == 'Video'), + expected_type=dict) title = get_first(media, ('title', 'text')) description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2c503e581..bd56ad289 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2523,7 +2523,7 @@ class GenericIE(InfoExtractor): 'title': 'Riku ja Tunna lähtevät peurajahtiin tv:stä tutun biologin kanssa – metsästysreissu huipentuu kasvissyöjän painajaiseen!', 'thumbnail': r're:^https?://.+\.jpg$', 'duration': 108, - 'series' : 'Madventures Suomi', + 'series': 'Madventures Suomi', 'description': 'md5:aa55b44bd06a1e337a6f1d0b46507381', 'categories': ['Matkailu', 'Elämäntyyli'], 'age_limit': 0, @@ -3886,8 +3886,8 @@ class GenericIE(InfoExtractor): if RtmpIE.suitable(vurl): return True vpath = compat_urlparse.urlparse(vurl).path - vext = determine_ext(vpath) - return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml') + vext = determine_ext(vpath, None) + return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml') def filter_video(urls): return list(filter(check_video, urls)) diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index 369141d67..b20681ad1 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -194,7 +194,7 @@ class LimelightBaseIE(InfoExtractor): cc_url = cc.get('webvttFileUrl') if not cc_url: continue - lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en') + lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en') subtitles.setdefault(lang, []).append({ 'url': cc_url, }) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index a5a1a01e0..4eb6ed070 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -469,7 +469,7 @@ class NiconicoIE(InfoExtractor): comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey')) user_id_str = session_api_data.get('serviceUserId') - thread_ids = [x for x in traverse_obj(api_data, ('comment', 'threads')) or [] if x['isActive']] + thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive'])) raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key) if not raw_danmaku: self.report_warning(f'Failed to get comments. {bug_reports_message()}') diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 6f8c32882..c1d6c5477 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -264,7 +264,7 @@ class TikTokBaseIE(InfoExtractor): return { 'id': aweme_id, 'title': aweme_detail.get('desc'), - 'description': aweme_detail['desc'], + 'description': aweme_detail.get('desc'), 'view_count': int_or_none(stats_info.get('play_count')), 'like_count': int_or_none(stats_info.get('digg_count')), 'repost_count': int_or_none(stats_info.get('share_count')), @@ -387,6 +387,9 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, + 'artist': 'Ysrbeats', + 'album': 'Lehanga', + 'track': 'Lehanga', } }, { 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', @@ -410,6 +413,8 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, + 'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson', + 'track': 'Big Fun', } }, { # Banned audio, only available on the app @@ -463,7 +468,7 @@ class TikTokIE(TikTokBaseIE): 'info_dict': { 'id': '7059698374567611694', 'ext': 'mp4', - 'title': 'N/A', + 'title': 'tiktok video #7059698374567611694', 'description': '', 'uploader': 'pokemonlife22', 'creator': 'Pokemon', @@ -480,7 +485,7 @@ class TikTokIE(TikTokBaseIE): 'repost_count': int, 'comment_count': int, }, - 'expected_warnings': ['Video not available'] + 'expected_warnings': ['Video not available', 'Creating a generic title'] }, { # Auto-captions available 'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758', diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index a101af67e..7d3966bf1 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -163,7 +163,6 @@ class YandexVideoPreviewIE(InfoExtractor): 'thumbnail': 'https://i.mycdn.me/videoPreview?id=544866765315&type=37&idx=13&tkn=TY5qjLYZHxpmcnK8U2LgzYkgmaU&fn=external_8', 'uploader_id': '481054701571', 'title': 'LOFT - summer, summer, summer HD', - 'manifest_stream_number': 0, 'uploader': 'АРТЁМ КУДРОВ', }, }, { # youtube diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 485849ba9..017554c88 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -837,17 +837,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor): uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') channel_id = traverse_obj( - renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False) + renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), + expected_type=str, get_all=False) timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText') scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False)) overlay_style = traverse_obj( - renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str) + renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), + get_all=False, expected_type=str) badges = self._extract_badges(renderer) thumbnails = self._extract_thumbnails(renderer, 'thumbnail') navigation_url = urljoin('https://www.youtube.com/', traverse_obj( - renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str)) + renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), + expected_type=str)) or '' url = f'https://www.youtube.com/watch?v={video_id}' - if overlay_style == 'SHORTS' or (navigation_url and '/shorts/' in navigation_url): + if overlay_style == 'SHORTS' or '/shorts/' in navigation_url: url = f'https://www.youtube.com/shorts/{video_id}' return { @@ -862,7 +865,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'uploader': uploader, 'channel_id': channel_id, 'thumbnails': thumbnails, - 'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None, + 'upload_date': (strftime_or_none(timestamp, '%Y%m%d') + if self._configuration_arg('approximate_date', ie_key='youtubetab') + else None), 'live_status': ('is_upcoming' if scheduled_timestamp is not None else 'was_live' if 'streamed' in time_text.lower() else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges diff --git a/yt_dlp/options.py b/yt_dlp/options.py index eb306898a..06c613262 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -163,6 +163,8 @@ def create_parser(): values = [process(value)] if delim is None else list(map(process, value.split(delim)[::-1])) while values: actual_val = val = values.pop() + if not val: + raise optparse.OptionValueError(f'Invalid {option.metavar} for {opt_str}: {value}') if val == 'all': current.update(allowed_values) elif val == '-all': @@ -1311,7 +1313,7 @@ def create_parser(): '--audio-format', metavar='FORMAT', dest='audioformat', default='best', help=( 'Specify audio format to convert the audio to when -x is used. Currently supported formats are: ' - 'best (default) or one of %s' % '|'.join(FFmpegExtractAudioPP.SUPPORTED_EXTS))) + 'best (default) or one of %s' % ', '.join(FFmpegExtractAudioPP.SUPPORTED_EXTS))) postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', @@ -1323,7 +1325,7 @@ def create_parser(): 'Remux the video into another container if necessary (currently supported: %s). ' 'If target container does not support the video/audio codec, remuxing will fail. ' 'You can specify multiple rules; Eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 ' - 'and anything else to mkv.' % '|'.join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS))) + 'and anything else to mkv.' % ', '.join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS))) postproc.add_option( '--recode-video', metavar='FORMAT', dest='recodevideo', default=None, @@ -1438,7 +1440,7 @@ def create_parser(): '"multi_video" (default; only when the videos form a single show). ' 'All the video files must have same codecs and number of streams to be concatable. ' 'The "pl_video:" prefix can be used with "--paths" and "--output" to ' - 'set the output filename for the split files. See "OUTPUT TEMPLATE" for details')) + 'set the output filename for the concatenated files. See "OUTPUT TEMPLATE" for details')) postproc.add_option( '--fixup', metavar='POLICY', dest='fixup', default=None, @@ -1486,20 +1488,20 @@ def create_parser(): help=optparse.SUPPRESS_HELP) postproc.add_option( '--no-exec-before-download', - action='store_const', dest='exec_before_dl_cmd', const=[], + action='store_const', dest='exec_before_dl_cmd', const=None, help=optparse.SUPPRESS_HELP) postproc.add_option( '--convert-subs', '--convert-sub', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, help=( 'Convert the subtitles to another format (currently supported: %s) ' - '(Alias: --convert-subtitles)' % '|'.join(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS))) + '(Alias: --convert-subtitles)' % ', '.join(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS))) postproc.add_option( '--convert-thumbnails', metavar='FORMAT', dest='convertthumbnails', default=None, help=( 'Convert the thumbnails to another format ' - '(currently supported: %s) ' % '|'.join(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS))) + '(currently supported: %s) ' % ', '.join(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS))) postproc.add_option( '--split-chapters', '--split-tracks', dest='split_chapters', action='store_true', default=False, diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 5216acbfb..643290286 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -500,6 +500,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): temp_path = new_path = prefix + sep + extension if new_path == path: + if acodec == 'copy': + self.to_screen(f'File is already in target format {self._preferredcodec}, skipping') + return [], information orig_path = prepend_extension(path, 'orig') temp_path = prepend_extension(path, 'temp') if (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)) @@ -1122,6 +1125,11 @@ class FFmpegConcatPP(FFmpegPostProcessor): self._only_multi_video = only_multi_video super().__init__(downloader) + def _get_codecs(self, file): + codecs = traverse_obj(self.get_metadata_object(file), ('streams', ..., 'codec_name')) + self.write_debug(f'Codecs = {", ".join(codecs)}') + return tuple(codecs) + def concat_files(self, in_files, out_file): if not self._downloader._ensure_dir_exists(out_file): return @@ -1131,8 +1139,7 @@ class FFmpegConcatPP(FFmpegPostProcessor): os.replace(in_files[0], out_file) return [] - codecs = [traverse_obj(self.get_metadata_object(file), ('streams', ..., 'codec_name')) for file in in_files] - if len(set(map(tuple, codecs))) > 1: + if len(set(map(self._get_codecs, in_files))) > 1: raise PostProcessingError( 'The files have different streams/codecs and cannot be concatenated. ' 'Either select different formats or --recode-video them to a common format') @@ -1146,7 +1153,7 @@ class FFmpegConcatPP(FFmpegPostProcessor): entries = info.get('entries') or [] if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'): return [], info - elif any(len(entry) > 1 for entry in traverse_obj(entries, (..., 'requested_downloads')) or []): + elif traverse_obj(entries, (..., 'requested_downloads', lambda _, v: len(v) > 1)): raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats') in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or [] diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ce918750d..6663583fc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1040,7 +1040,7 @@ def make_HTTPS_handler(params, **kwargs): def bug_reports_message(before=';'): - msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , ' + msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , ' 'filling out the appropriate issue template. ' 'Confirm you are on the latest version using yt-dlp -U') @@ -2883,6 +2883,7 @@ class PagedList: class OnDemandPagedList(PagedList): + """Download pages until a page with less than maximum results""" def _getslice(self, start, end): for pagenum in itertools.count(start // self._pagesize): firstid = pagenum * self._pagesize @@ -2922,6 +2923,7 @@ class OnDemandPagedList(PagedList): class InAdvancePagedList(PagedList): + """PagedList with total number of pages known in advance""" def __init__(self, pagefunc, pagecount, pagesize): PagedList.__init__(self, pagefunc, pagesize, True) self._pagecount = pagecount @@ -3090,13 +3092,10 @@ def multipart_encode(data, boundary=None): def dict_get(d, key_or_keys, default=None, skip_false_values=True): - if isinstance(key_or_keys, (list, tuple)): - for key in key_or_keys: - if key not in d or d[key] is None or skip_false_values and not d[key]: - continue - return d[key] - return default - return d.get(key_or_keys, default) + for val in map(d.get, variadic(key_or_keys)): + if val is not None and (val or not skip_false_values): + return val + return default def try_call(*funcs, expected_type=None, args=[], kwargs={}): @@ -3324,6 +3323,10 @@ def error_to_compat_str(err): return err_str +def error_to_str(err): + return f'{type(err).__name__}: {err}' + + def mimetype2ext(mt): if mt is None: return None From ce0593ef61a3da2ac296a8e791bbb0c6e356c05a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Apr 2022 21:31:59 +0530 Subject: [PATCH 0905/2552] [http] Fix #3215 --- yt_dlp/downloader/http.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 591a9b08d..a232168fa 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -145,7 +145,8 @@ class HttpFD(FileDownloader): or content_len < range_end) if accept_content_len: ctx.content_len = content_len - ctx.data_len = min(content_len, req_end or content_len) - (req_start or 0) + if content_len or req_end: + ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0) return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file From 0edb3e336c7ebb6d52bf86eeb35fc9b44c7aba51 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Apr 2022 23:08:18 +0530 Subject: [PATCH 0906/2552] Do not prevent download if locking is unsupported Closes #3022 Failure to lock download-archive is still fatal. This is consistent with youtube-dl's behavior --- yt_dlp/downloader/common.py | 6 ++- yt_dlp/utils.py | 91 +++++++++++++++++++------------------ 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index afd2f2e38..cbfea7a65 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -11,6 +11,7 @@ from ..utils import ( encodeFilename, error_to_compat_str, format_bytes, + LockingUnsupportedError, sanitize_open, shell_quote, timeconvert, @@ -234,7 +235,10 @@ class FileDownloader(object): @wrap_file_access('open', fatal=True) def sanitize_open(self, filename, open_mode): - return sanitize_open(filename, open_mode) + f, filename = sanitize_open(filename, open_mode) + if not getattr(f, 'locked', None): + self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True) + return f, filename @wrap_file_access('remove') def try_remove(self, filename): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 6663583fc..2db22d676 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -674,26 +674,25 @@ def sanitize_open(filename, open_mode): It returns the tuple (stream, definitive_file_name). """ - try: - if filename == '-': - if sys.platform == 'win32': - import msvcrt - msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) - return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) - stream = locked_file(filename, open_mode, block=False).open() - return (stream, filename) - except (IOError, OSError) as err: - if err.errno in (errno.EACCES,): - raise + if filename == '-': + if sys.platform == 'win32': + import msvcrt + msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) + return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) - # In case of error, try to remove win32 forbidden chars - alt_filename = sanitize_path(filename) - if alt_filename == filename: - raise - else: - # An exception here should be caught in the caller - stream = locked_file(filename, open_mode, block=False).open() - return (stream, alt_filename) + for attempt in range(2): + try: + try: + stream = locked_file(filename, open_mode, block=False).__enter__() + except LockingUnsupportedError: + stream = open(filename, open_mode) + return (stream, filename) + except (IOError, OSError) as err: + if attempt or err.errno in (errno.EACCES,): + raise + old_filename, filename = filename, sanitize_path(filename) + if old_filename == filename: + raise def timeconvert(timestr): @@ -2120,6 +2119,13 @@ def intlist_to_bytes(xs): return compat_struct_pack('%dB' % len(xs), *xs) +class LockingUnsupportedError(IOError): + msg = 'File locking is not supported on this platform' + + def __init__(self): + super().__init__(self.msg) + + # Cross-platform file locking if sys.platform == 'win32': import ctypes.wintypes @@ -2200,21 +2206,20 @@ else: fcntl.lockf(f, fcntl.LOCK_UN) except ImportError: - UNSUPPORTED_MSG = 'file locking is not supported on this platform' def _lock_file(f, exclusive, block): - raise IOError(UNSUPPORTED_MSG) + raise LockingUnsupportedError() def _unlock_file(f): - raise IOError(UNSUPPORTED_MSG) + raise LockingUnsupportedError() class locked_file(object): - _closed = False + locked = False def __init__(self, filename, mode, block=True, encoding=None): - assert mode in ['r', 'rb', 'a', 'ab', 'w', 'wb'] - self.f = io.open(filename, mode, encoding=encoding) + assert mode in {'r', 'rb', 'a', 'ab', 'w', 'wb'} + self.f = open(filename, mode, encoding=encoding) self.mode = mode self.block = block @@ -2222,36 +2227,34 @@ class locked_file(object): exclusive = 'r' not in self.mode try: _lock_file(self.f, exclusive, self.block) + self.locked = True except IOError: self.f.close() raise return self - def __exit__(self, etype, value, traceback): + def unlock(self): + if not self.locked: + return try: - if not self._closed: - _unlock_file(self.f) + _unlock_file(self.f) finally: - self.f.close() - self._closed = True - - def __iter__(self): - return iter(self.f) - - def write(self, *args): - return self.f.write(*args) + self.locked = False - def read(self, *args): - return self.f.read(*args) + def __exit__(self, *_): + try: + self.unlock() + finally: + self.f.close() - def flush(self): - self.f.flush() + open = __enter__ + close = __exit__ - def open(self): - return self.__enter__() + def __getattr__(self, attr): + return getattr(self.f, attr) - def close(self, *args): - self.__exit__(self, *args, value=False, traceback=False) + def __iter__(self): + return iter(self.f) def get_filesystem_encoding(): From 897376719871279eef89426b1452abb89051f0dc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Apr 2022 23:15:17 +0530 Subject: [PATCH 0907/2552] Do not lock downloading file on Windows Closes #3124 --- yt_dlp/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2db22d676..87dd04e23 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -683,6 +683,10 @@ def sanitize_open(filename, open_mode): for attempt in range(2): try: try: + if sys.platform == 'win32': + # FIXME: Windows only has mandatory locking which also locks the file from being read. + # So for now, don't lock the file on windows. Ref: https://github.com/yt-dlp/yt-dlp/issues/3124 + raise LockingUnsupportedError() stream = locked_file(filename, open_mode, block=False).__enter__() except LockingUnsupportedError: stream = open(filename, open_mode) From 42a4f21a034139acf293140383b752cdc7f7f36a Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Wed, 6 Apr 2022 15:52:08 +0700 Subject: [PATCH 0908/2552] [fptplay] Fix metadata extraction (#3218) Authored by: hatienl0i261299 --- yt_dlp/extractor/fptplay.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py index a34e90bb1..c23fe6c53 100644 --- a/yt_dlp/extractor/fptplay.py +++ b/yt_dlp/extractor/fptplay.py @@ -7,12 +7,14 @@ import urllib.parse from .common import InfoExtractor from ..utils import ( + clean_html, join_nonempty, + strip_or_none, ) class FptplayIE(InfoExtractor): - _VALID_URL = r'https?://fptplay\.vn/(?Pxem-video)/[^/]+\-(?P\w+)(?:/tap-(?P[^/]+)?/?(?:[?#]|$)|)' + _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P\w+)(?:/tap-(?P\d+)?/?(?:[?#]|$)|)' _GEO_COUNTRIES = ['VN'] IE_NAME = 'fptplay' IE_DESC = 'fptplay.vn' @@ -22,7 +24,7 @@ class FptplayIE(InfoExtractor): 'info_dict': { 'id': '621a123016f369ebbde55945', 'ext': 'mp4', - 'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Ms. Cupid In Love', + 'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Tập 1A', 'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c', }, }, { @@ -31,25 +33,42 @@ class FptplayIE(InfoExtractor): 'info_dict': { 'id': '61f3aa8a6b3b1d2e73c60eb5', 'ext': 'mp4', - 'title': 'Má Tôi Là Đại Gia - 3', + 'title': 'Má Tôi Là Đại Gia - Tập 3', 'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c', }, + }, { + 'url': 'https://fptplay.vn/xem-video/lap-toi-do-giam-under-the-skin-6222d9684ec7230fa6e627a2/tap-4', + 'md5': 'bcb06c55ec14786d7d4eda07fa1ccbb9', + 'info_dict': { + 'id': '6222d9684ec7230fa6e627a2', + 'ext': 'mp4', + 'title': 'Lạp Tội Đồ Giám - Tập 2B', + 'description': 'md5:e5a47e9d35fbf7e9479ca8a77204908b', + }, }, { 'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0', 'only_matching': True, }] def _real_extract(self, url): - type_url, video_id, episode = self._match_valid_url(url).group('type', 'id', 'episode') - webpage = self._download_webpage(url, video_id=video_id, fatal=False) - info = self._download_json(self.get_api_with_st_token(video_id, episode or 0), video_id) + video_id, slug_episode = self._match_valid_url(url).group('id', 'episode') + webpage = self._download_webpage(url, video_id=video_id, fatal=False) or '' + title = self._search_regex( + r'(?s)]*>(.+)', webpage, 'title', fatal=False) + real_episode = slug_episode if not title else self._search_regex( + r'[^">]+)"\s+class="epi-title active"', webpage, 'episode', fatal=False) + title = strip_or_none(title) or self._html_search_meta(('og:title', 'twitter:title'), webpage) + + info = self._download_json( + self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4') self._sort_formats(formats) return { 'id': video_id, - 'title': join_nonempty( - self._html_search_meta(('og:title', 'twitter:title'), webpage), episode, delim=' - '), - 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), + 'title': join_nonempty(title, real_episode, delim=' - '), + 'description': ( + clean_html(self._search_regex(r']*>(.+)

    ', webpage, 'description')) + or self._html_search_meta(('og:description', 'twitter:description'), webpage)), 'formats': formats, 'subtitles': subtitles, } From da1ffde15de28bf0565d1bd0c02d3f17edcdfff7 Mon Sep 17 00:00:00 2001 From: panatexxa <91012623+panatexxa@users.noreply.github.com> Date: Thu, 7 Apr 2022 04:26:12 +0200 Subject: [PATCH 0909/2552] [Moviepilot] Add extractor (#3282) Authored by: panatexxa --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/moviepilot.py | 115 +++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 yt_dlp/extractor/moviepilot.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 457f4c2aa..bd27e14b2 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -915,6 +915,7 @@ from .motherless import ( ) from .motorsport import MotorsportIE from .movieclips import MovieClipsIE +from .moviepilot import MoviepilotIE from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py new file mode 100644 index 000000000..4605d3481 --- /dev/null +++ b/yt_dlp/extractor/moviepilot.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .dailymotion import DailymotionIE +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + try_get, +) + +import re + + +class MoviepilotIE(InfoExtractor): + _IE_NAME = 'moviepilot' + _IE_DESC = 'Moviepilot trailer' + _VALID_URL = r'https?://(?:www\.)?moviepilot\.de/movies/(?P[^/]+)' + + _TESTS = [{ + 'url': 'https://www.moviepilot.de/movies/interstellar-2/', + 'info_dict': { + 'id': 'x7xdut5', + 'display_id': 'interstellar-2', + 'ext': 'mp4', + 'title': 'Interstellar', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaXev1VvzitVZMFsR/x720', + 'timestamp': 1400491705, + 'description': 'md5:7dfc5c1758e7322a7346934f1f0c489c', + 'uploader': 'Moviepilot', + 'like_count': int, + 'view_count': int, + 'uploader_id': 'x6nd9k', + 'upload_date': '20140519', + 'duration': 140, + 'age_limit': 0, + 'tags': ['Alle Trailer', 'Movie', 'Third Party'], + }, + }, { + 'url': 'https://www.moviepilot.de/movies/interstellar-2/trailer', + 'only_matching': True, + }, { + 'url': 'https://www.moviepilot.de/movies/interstellar-2/kinoprogramm/berlin', + 'only_matching': True, + }, { + 'url': 'https://www.moviepilot.de/movies/queen-slim/trailer', + 'info_dict': { + 'id': 'x7xj6o7', + 'display_id': 'queen-slim', + 'title': 'Queen & Slim', + 'ext': 'mp4', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SbUM71WtomSjVmI_q/x720', + 'timestamp': 1571838685, + 'description': 'md5:73058bcd030aa12d991e4280d65fbebe', + 'uploader': 'Moviepilot', + 'like_count': int, + 'view_count': int, + 'uploader_id': 'x6nd9k', + 'upload_date': '20191023', + 'duration': 138, + 'age_limit': 0, + 'tags': ['Movie', 'Verleih', 'Neue Trailer'], + }, + }, { + 'url': 'https://www.moviepilot.de/movies/der-geiger-von-florenz/trailer', + 'info_dict': { + 'id': 'der-geiger-von-florenz', + 'title': 'Der Geiger von Florenz', + 'ext': 'mp4', + }, + 'skip': 'No trailer for this movie.', + }, { + 'url': 'https://www.moviepilot.de/movies/muellers-buero/', + 'info_dict': { + 'id': 'x7xcw1i', + 'display_id': 'muellers-buero', + 'title': 'Müllers Büro', + 'ext': 'mp4', + 'description': 'md5:57501251c05cdc61ca314b7633e0312e', + 'timestamp': 1287584475, + 'age_limit': 0, + 'duration': 82, + 'upload_date': '20101020', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1WfAm1d6maq_/x720', + 'uploader': 'Moviepilot', + 'like_count': int, + 'view_count': int, + 'tags': ['Alle Trailer', 'Movie', 'Verleih'], + 'uploader_id': 'x6nd9k', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(f'https://www.moviepilot.de/movies/{video_id}/trailer', video_id) + + duration = try_get( + re.match(r'P(?P\d+)H(?P\d+)M(?P\d+)S', + self._html_search_meta('duration', webpage, fatal=False) or ''), + lambda mobj: sum(float(x) * y for x, y in zip(mobj.groups(), (3600, 60, 1)))) + # _html_search_meta is not used since we don't want name=description to match + description = self._html_search_regex( + ']+itemprop="description"[^>]+content="([^>"]+)"', webpage, 'description', fatal=False) + + return { + '_type': 'url_transparent', + 'ie_key': DailymotionIE.ie_key(), + 'display_id': video_id, + 'title': self._og_search_title(webpage), + 'url': self._html_search_meta('embedURL', webpage), + 'thumbnail': self._html_search_meta('thumbnailURL', webpage), + 'description': description, + 'duration': duration, + 'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage), delimiter=' ') + } From 06b1628d3ed446d25ddbd4030fb92d8d90431c7e Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Thu, 7 Apr 2022 13:42:01 +0900 Subject: [PATCH 0910/2552] [twitcasting] Don't return multi_video for archive with single hls manifest (#3319) Authored by: Lesmiscore --- yt_dlp/extractor/twitcasting.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index af911de98..7f3fa0735 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -183,6 +183,14 @@ class TwitCastingIE(InfoExtractor): infodict = { 'formats': formats } + elif len(m3u8_urls) == 1: + formats = self._extract_m3u8_formats( + m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS) + self._sort_formats(formats) + infodict = { + # No problem here since there's only one manifest + 'formats': formats, + } else: infodict = { '_type': 'multi_video', From fcfa8853e41ca04714a7aa28a783e2804c184375 Mon Sep 17 00:00:00 2001 From: Justin Keogh Date: Thu, 7 Apr 2022 05:58:56 +0000 Subject: [PATCH 0911/2552] [utils] locked_file: Do not truncate files before locking (#2994) Authored by: jakeogh, pukkandan --- yt_dlp/utils.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 87dd04e23..66c3da4c8 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2222,10 +2222,23 @@ class locked_file(object): locked = False def __init__(self, filename, mode, block=True, encoding=None): - assert mode in {'r', 'rb', 'a', 'ab', 'w', 'wb'} - self.f = open(filename, mode, encoding=encoding) - self.mode = mode - self.block = block + if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}: + raise NotImplementedError(mode) + self.mode, self.block = mode, block + + writable = any(f in mode for f in 'wax+') + readable = any(f in mode for f in 'r+') + flags = functools.reduce(operator.ior, ( + getattr(os, 'O_CLOEXEC', 0), # UNIX only + getattr(os, 'O_BINARY', 0), # Windows only + getattr(os, 'O_NOINHERIT', 0), # Windows only + os.O_CREAT if writable else 0, # O_TRUNC only after locking + os.O_APPEND if 'a' in mode else 0, + os.O_EXCL if 'x' in mode else 0, + os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY, + )) + + self.f = os.fdopen(os.open(filename, flags), mode, encoding=encoding) def __enter__(self): exclusive = 'r' not in self.mode @@ -2235,6 +2248,8 @@ class locked_file(object): except IOError: self.f.close() raise + if 'w' in self.mode: + self.f.truncate() return self def unlock(self): From b63837bce0b104b1f72f2ebb6c0d05080cf2a607 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 7 Apr 2022 12:00:58 +0530 Subject: [PATCH 0912/2552] [utils] locked_file: Fix non-blocking non-exclusive lock --- yt_dlp/utils.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 66c3da4c8..02b5ae2ee 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2190,18 +2190,15 @@ else: import fcntl def _lock_file(f, exclusive, block): + flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH + if not block: + flags |= fcntl.LOCK_NB try: - fcntl.flock(f, - fcntl.LOCK_SH if not exclusive - else fcntl.LOCK_EX if block - else fcntl.LOCK_EX | fcntl.LOCK_NB) + fcntl.flock(f, flags) except BlockingIOError: raise except OSError: # AOSP does not have flock() - fcntl.lockf(f, - fcntl.LOCK_SH if not exclusive - else fcntl.LOCK_EX if block - else fcntl.LOCK_EX | fcntl.LOCK_NB) + fcntl.lockf(f, flags) def _unlock_file(f): try: From b506289fe205cc2f3488f72c826034465cef2d0c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 7 Apr 2022 11:30:46 +0530 Subject: [PATCH 0913/2552] [test] Add `test_locked_file` --- test/test_utils.py | 31 +++++++++++++++++++++++++++++++ yt_dlp/utils.py | 5 +++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 31f168998..1f826c2f2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -56,6 +56,7 @@ from yt_dlp.utils import ( is_html, js_to_json, limit_length, + locked_file, merge_dicts, mimetype2ext, month_by_name, @@ -1795,6 +1796,36 @@ Line 1 self.assertEqual(Config.hide_login_info(['--username=foo']), ['--username=PRIVATE']) + def test_locked_file(self): + TEXT = 'test_locked_file\n' + FILE = 'test_locked_file.ytdl' + MODES = 'war' # Order is important + + try: + for lock_mode in MODES: + with locked_file(FILE, lock_mode, False) as f: + if lock_mode == 'r': + self.assertEqual(f.read(), TEXT * 2, 'Wrong file content') + else: + f.write(TEXT) + for test_mode in MODES: + testing_write = test_mode != 'r' + try: + with locked_file(FILE, test_mode, False): + pass + except (BlockingIOError, PermissionError): + if not testing_write: # FIXME + print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})') + continue + self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}') + else: + self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}') + finally: + try: + os.remove(FILE) + except Exception: + pass + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 02b5ae2ee..84b2603df 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -684,8 +684,9 @@ def sanitize_open(filename, open_mode): try: try: if sys.platform == 'win32': - # FIXME: Windows only has mandatory locking which also locks the file from being read. - # So for now, don't lock the file on windows. Ref: https://github.com/yt-dlp/yt-dlp/issues/3124 + # FIXME: An exclusive lock also locks the file from being read. + # Since windows locks are mandatory, don't lock the file on windows (for now). + # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124 raise LockingUnsupportedError() stream = locked_file(filename, open_mode, block=False).__enter__() except LockingUnsupportedError: From 870efdee28860d7f6473c52bf7bb1bafb71aaeec Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Thu, 7 Apr 2022 16:19:36 +0900 Subject: [PATCH 0914/2552] [TVer] Fix extractor (#3268) Authored by: Lesmiscore --- yt_dlp/extractor/tver.py | 111 ++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 47 deletions(-) diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index 9ff3136e2..f23af1f14 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -1,77 +1,94 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, - remove_start, smuggle_url, + str_or_none, traverse_obj, ) class TVerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?Pcorner|episode|feature|lp|tokyo2020/video)/(?P[fc]?\d+)' - # videos are only available for 7 days + _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?Plp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P[a-zA-Z0-9]+)' _TESTS = [{ - 'url': 'https://tver.jp/corner/f0062178', - 'only_matching': True, + 'skip': 'videos are only available for 7 days', + 'url': 'https://tver.jp/episodes/ephss8yveb', + 'info_dict': { + 'title': '#44 料理と値段と店主にびっくり オモてなしすぎウマい店 2時間SP', + 'description': 'md5:66985373a66fed8ad3cd595a3cfebb13', + }, + 'add_ie': ['BrightcoveNew'], }, { - 'url': 'https://tver.jp/feature/f0062413', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/episode/79622438', - 'only_matching': True, - }, { - # subtitle = ' ' - 'url': 'https://tver.jp/corner/f0068870', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/lp/f0009694', - 'only_matching': True, + 'skip': 'videos are only available for 7 days', + 'url': 'https://tver.jp/lp/episodes/ep6f16g26p', + 'info_dict': { + # sorry but this is "correct" + 'title': '4月11日(月)23時06分 ~ 放送予定', + 'description': 'md5:4029cc5f4b1e8090dfc5b7bd2bc5cd0b', + }, + 'add_ie': ['BrightcoveNew'], }, { - 'url': 'https://tver.jp/lp/c0000239', + 'url': 'https://tver.jp/corner/f0103888', 'only_matching': True, }, { - 'url': 'https://tver.jp/tokyo2020/video/6264525510001', + 'url': 'https://tver.jp/lp/f0033031', 'only_matching': True, }] - _TOKEN = None BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' + _PLATFORM_UID = None + _PLATFORM_TOKEN = None def _real_initialize(self): - self._TOKEN = self._download_json( - 'https://tver.jp/api/access_token.php', None)['token'] + create_response = self._download_json( + 'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None, + note='Creating session', data=b'device_type=pc', headers={ + 'Origin': 'https://s.tver.jp', + 'Referer': 'https://s.tver.jp/', + 'Content-Type': 'application/x-www-form-urlencoded', + }) + self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid')) + self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token')) def _real_extract(self, url): - path, video_id = self._match_valid_url(url).groups() - if path == 'lp': - webpage = self._download_webpage(url, video_id) - redirect_path = self._search_regex(r'to_href="([^"]+)', webpage, 'redirect path') - path, video_id = self._match_valid_url(f'https://tver.jp{redirect_path}').groups() - api_response = self._download_json(f'https://api.tver.jp/v4/{path}/{video_id}', video_id, query={'token': self._TOKEN}) - p_id = traverse_obj(api_response, ('main', 'publisher_id')) - if not p_id: - error_msg, expected = traverse_obj(api_response, ('episode', 0, 'textbar', 0, ('text', 'longer')), get_all=False), True - if not error_msg: - error_msg, expected = 'Failed to extract publisher ID', False - raise ExtractorError(error_msg, expected=expected) - service = remove_start(traverse_obj(api_response, ('main', 'service')), 'ts_') + video_id, video_type = self._match_valid_url(url).group('id', 'type') + if video_type not in {'series', 'episodes'}: + webpage = self._download_webpage(url, video_id, note='Resolving to new URL') + video_id = self._match_id(self._search_regex( + (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), + webpage, 'url regex')) + video_info = self._download_json( + f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, + query={'v': '5'}, headers={ + 'Origin': 'https://tver.jp', + 'Referer': 'https://tver.jp/', + }) + p_id = video_info['video']['accountID'] + r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False) + if not r_id: + raise ExtractorError('Failed to extract reference ID for Brightcove') + if not r_id.isdigit(): + r_id = f'ref:{r_id}' - r_id = traverse_obj(api_response, ('main', 'reference_id')) - if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'): - r_id = 'ref:' + r_id - bc_url = smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), - {'geo_countries': ['JP']}) + additional_info = self._download_json( + f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', + video_id, fatal=False, + query={ + 'platform_uid': self._PLATFORM_UID, + 'platform_token': self._PLATFORM_TOKEN, + }, headers={ + 'x-tver-platform-type': 'web' + }) return { '_type': 'url_transparent', - 'description': traverse_obj(api_response, ('main', 'note', 0, 'text'), expected_type=compat_str), - 'episode_number': int_or_none(traverse_obj(api_response, ('main', 'ext', 'episode_number'), expected_type=compat_str)), - 'url': bc_url, + 'title': str_or_none(video_info.get('title')), + 'description': str_or_none(video_info.get('description')), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), + 'series': traverse_obj( + additional_info, ('result', ('episode', 'series'), 'content', ('seriesTitle', 'title')), + get_all=False), 'ie_key': 'BrightcoveNew', } From 61d3665d9da4f80c2c5cc4b6bed6a6830b29fcc3 Mon Sep 17 00:00:00 2001 From: coletdev Date: Thu, 7 Apr 2022 20:11:16 +1200 Subject: [PATCH 0915/2552] [youtube] Fix uploader for collaborative playlists (#3332) Authored by: coletdjnz --- yt_dlp/extractor/common.py | 6 ++++-- yt_dlp/extractor/youtube.py | 27 +++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e2605c1f4..9914910d0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -23,6 +23,7 @@ from ..compat import ( compat_getpass, compat_http_client, compat_os_name, + compat_Pattern, compat_str, compat_urllib_error, compat_urllib_parse_unquote, @@ -41,7 +42,6 @@ from ..utils import ( base_url, bug_reports_message, clean_html, - compiled_regex_type, determine_ext, determine_protocol, dict_get, @@ -1203,7 +1203,9 @@ class InfoExtractor(object): In case of failure return a default value or raise a WARNING or a RegexNotFoundError, depending on fatal, specifying the field name. """ - if isinstance(pattern, (str, compat_str, compiled_regex_type)): + if string is None: + mobj = None + elif isinstance(pattern, (str, compat_Pattern)): mobj = re.search(pattern, string, flags) else: for p in pattern: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 017554c88..031aa35a1 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4109,14 +4109,15 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if fatal: raise ExtractorError('Unable to find selected tab') - @classmethod - def _extract_uploader(cls, data): + def _extract_uploader(self, data): uploader = {} - renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} + renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} owner = try_get( renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict) if owner: - uploader['uploader'] = owner.get('text') + owner_text = owner.get('text') + uploader['uploader'] = self._search_regex( + r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text) uploader['uploader_id'] = try_get( owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str) uploader['uploader_url'] = urljoin( @@ -5136,6 +5137,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'note': 'non-standard redirect to regional channel', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', 'only_matching': True + }, { + 'note': 'collaborative playlist (uploader name in the form "by and x other(s)")', + 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', + 'info_dict': { + 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', + 'modified_date': '20220407', + 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', + 'tags': [], + 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q', + 'uploader': 'pukkandan', + 'availability': 'unlisted', + 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q', + 'channel': 'pukkandan', + 'description': 'Test for collaborative playlist', + 'title': 'yt-dlp test - collaborative playlist', + 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', + }, + 'playlist_mincount': 2 }] @classmethod From 22fba53fbd903cd42b0f4ef24c539a4f818fd6e9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 7 Apr 2022 15:46:53 +0530 Subject: [PATCH 0916/2552] [FfmpegMetadata] Write id3v1 tags --- yt_dlp/postprocessor/ffmpeg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 643290286..27d06cbde 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -769,6 +769,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor): if value is not None and mobj: metadata[mobj.group('i') or 'common'][mobj.group('key')] = value + # Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags + yield ('-write_id3v1', '1') + for name, value in metadata['common'].items(): yield ('-metadata', f'{name}={value}') From bd4073c53575ef802720cd74c5415d6a6417c1dd Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Thu, 7 Apr 2022 18:03:13 +0700 Subject: [PATCH 0917/2552] [AfreecaTV] Add `AfreecaTVUserIE` (#3286) Closes #3257 Authored by: hatienl0i261299 --- yt_dlp/extractor/afreecatv.py | 58 +++++++++++++++++++++++++++++++++- yt_dlp/extractor/extractors.py | 1 + 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 77f0e3c10..28946e9dd 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -1,14 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import re from .common import InfoExtractor from ..compat import compat_xpath from ..utils import ( + ExtractorError, + OnDemandPagedList, date_from_str, determine_ext, - ExtractorError, int_or_none, qualities, traverse_obj, @@ -482,3 +484,57 @@ class AfreecaTVLiveIE(AfreecaTVIE): 'formats': formats, 'is_live': True, } + + +class AfreecaTVUserIE(InfoExtractor): + IE_NAME = 'afreecatv:user' + _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P[^/]+)/vods/?(?P[^/]+)?' + _TESTS = [{ + 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review', + 'info_dict': { + '_type': 'playlist', + 'id': 'ryuryu24', + 'title': 'ryuryu24 - review', + }, + 'playlist_count': 218, + }, { + 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight', + 'info_dict': { + '_type': 'playlist', + 'id': 'parang1995', + 'title': 'parang1995 - highlight', + }, + 'playlist_count': 997, + }, { + 'url': 'https://bj.afreecatv.com/ryuryu24/vods', + 'info_dict': { + '_type': 'playlist', + 'id': 'ryuryu24', + 'title': 'ryuryu24 - all', + }, + 'playlist_count': 221, + }, { + 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip', + 'info_dict': { + '_type': 'playlist', + 'id': 'ryuryu24', + 'title': 'ryuryu24 - balloonclip', + }, + 'playlist_count': 0, + }] + _PER_PAGE = 60 + + def _fetch_page(self, user_id, user_type, page): + page += 1 + info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id, + query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, + note=f'Downloading {user_type} video page {page}') + for item in info['data']: + yield self.url_result( + f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) + + def _real_extract(self, url): + user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') + user_type = user_type or 'all' + entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE) + return self.playlist_result(entries, user_id, f'{user_id} - {user_type}') diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bd27e14b2..3e711c3bb 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -44,6 +44,7 @@ from .aenetworks import ( from .afreecatv import ( AfreecaTVIE, AfreecaTVLiveIE, + AfreecaTVUserIE, ) from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE From 316f2650f8b588507159cddcd13941dd67a4f70c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 7 Apr 2022 16:41:51 +0530 Subject: [PATCH 0918/2552] Ignore `mhtml` formats from `-f mergeall` Closes #3324 --- yt_dlp/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d03229d86..f5ea5a0b5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2179,7 +2179,8 @@ class YoutubeDL(object): yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'])) + formats = list(_check_formats( + f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none')) if not formats: return merged_format = formats[-1] From b52e788eb2ba9914aa812238365dcb3348be2944 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Thu, 7 Apr 2022 20:21:42 +0900 Subject: [PATCH 0919/2552] [Piapro] Extract description with break lines Authored by: Lesmiscore Closes #3334 --- yt_dlp/extractor/piapro.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index c4eb4913f..ae160623b 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -27,6 +27,18 @@ class PiaproIE(InfoExtractor): 'title': '裏表ラバーズ', 'thumbnail': r're:^https?://.*\.jpg$', } + }, { + 'note': 'There are break lines in description, mandating (?s) flag', + 'url': 'https://piapro.jp/t/9cSd', + 'md5': '952bb6d1e8de95050206408a87790676', + 'info_dict': { + 'id': '9cSd', + 'ext': 'mp3', + 'title': '青に溶けた風船 / 初音ミク', + 'description': 'md5:d395a9bd151447631a5a1460bc7f9132', + 'uploader': 'シアン・キノ', + 'uploader_id': 'cyankino', + } }] _login_status = False @@ -81,7 +93,7 @@ class PiaproIE(InfoExtractor): return { 'id': video_id, 'title': self._html_search_regex(r'(.+?)', webpage, 'title', fatal=False), - 'description': self._html_search_regex(r'(.+?)

    \s*(.+?)

    \s* Date: Thu, 7 Apr 2022 22:52:27 +0700 Subject: [PATCH 0920/2552] [NRK] Extract timestamp (#3231) Closes #3211 Authored by: hatienl0i261299 --- yt_dlp/extractor/nrk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 4d723e886..0cf26d598 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -13,6 +13,7 @@ from ..utils import ( ExtractorError, int_or_none, parse_duration, + parse_iso8601, str_or_none, try_get, urljoin, @@ -247,6 +248,7 @@ class NRKIE(NRKBaseIE): 'age_limit': age_limit, 'formats': formats, 'subtitles': subtitles, + 'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str)) } if is_series: @@ -797,7 +799,7 @@ class NRKPlaylistBaseIE(InfoExtractor): for video_id in re.findall(self._ITEM_RE, webpage) ] - playlist_title = self. _extract_title(webpage) + playlist_title = self._extract_title(webpage) playlist_description = self._extract_description(webpage) return self.playlist_result( From 9b8b7a7b5e529fdb9c8d6804b592f7f8eeb3046e Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Fri, 8 Apr 2022 08:44:58 +0200 Subject: [PATCH 0921/2552] [Zattoo] Fix extractors (#2288) Closes: #1244 Authored by: goggle --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/zattoo.py | 283 +++++++++++++++++++-------------- 2 files changed, 165 insertions(+), 123 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 3e711c3bb..2aa1e0b45 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -2119,18 +2119,17 @@ from .zattoo import ( EWETVIE, GlattvisionTVIE, MNetTVIE, - MyVisionTVIE, NetPlusIE, OsnatelTVIE, QuantumTVIE, - QuicklineIE, - QuicklineLiveIE, SaltTVIE, SAKTVIE, VTXTVIE, WalyTVIE, ZattooIE, ZattooLiveIE, + ZattooMoviesIE, + ZattooRecordingsIE, ) from .zdf import ZDFIE, ZDFChannelIE from .zee5 import ( diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index c02b4ca14..8614ca23d 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -51,25 +51,30 @@ class ZattooPlatformBaseIE(InfoExtractor): self._power_guide_hash = data['session']['power_guide_hash'] def _initialize_pre_login(self): - webpage = self._download_webpage( - self._host_url(), None, 'Downloading app token') - app_token = self._html_search_regex( - r'appToken\s*=\s*(["\'])(?P(?:(?!\1).)+?)\1', - webpage, 'app token', group='token') - app_version = self._html_search_regex( - r' +### 2022.04.08 + +* Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz) +* Treat multiple `--match-filters` as OR +* File locking improvevemnts: + * Do not lock downloading file on Windows + * Do not prevent download if locking is unsupported + * Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan) + * Fix non-blocking non-exclusive lock +* De-prioritize automatic-subtitles when no `--sub-lang` is given +* Exit after `--dump-user-agent` +* Fallback to video-only format when selecting by extension +* Fix `--abort-on-error` for subtitles +* Fix `--no-overwrite` for playlist infojson +* Fix `--print` with `--ignore-no-formats` when url is `None` by [flashdagger](https://github.com/flashdagger) +* Fix `--sleep-interval` +* Fix `--throttled-rate` +* Fix `autonumber` +* Fix case of `http_headers` +* Fix filepath sanitization in `--print-to-file` +* Handle float in `--wait-for-video` +* Ignore `mhtml` formats from `-f mergeall` +* Ignore format-specific fields in initial pass of `--match-filter` +* Protect stdout from unexpected progress and console-title +* Remove `Accept-Encoding` header from `std_headers` by [coletdjnz](https://github.com/coletdjnz) +* Remove incorrect warning for `--dateafter` +* Show warning when all media formats have DRM +* [downloader] Fix invocation of `HttpieFD` +* [http] Fix #3215 +* [http] Reject broken range before request by [Lesmiscore](https://github.com/Lesmiscore), [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan) +* [fragment] Read downloaded fragments only when needed by [Lesmiscore](https://github.com/Lesmiscore) +* [http] Retry on more errors by [coletdjnz](https://github.com/coletdjnz) +* [mhtml] Fix fragments with absolute urls by [coletdjnz](https://github.com/coletdjnz) +* [extractor] Add `_perform_login` function +* [extractor] Allow control characters inside json +* [extractor] Support merging subtitles with data by [coletdjnz](https://github.com/coletdjnz) +* [generic] Extract subtitles from video.js by [Lesmiscore](https://github.com/Lesmiscore) +* [ffmpeg] Cache version data +* [FFmpegConcat] Ensure final directory exists +* [FfmpegMetadata] Write id3v1 tags +* [FFmpegVideoConvertor] Add more formats to `--remux-video` +* [FFmpegVideoConvertor] Ensure all streams are copied +* [MetadataParser] Validate outtmpl early +* [outtmpl] Fix replacement/default when used with alternate +* [outtmpl] Limit changes during sanitization +* [phantomjs] Fix bug +* [test] Add `test_locked_file` +* [utils] `format_decimal_suffix`: Fix for very large numbers by [s0u1h](https://github.com/s0u1h) +* [utils] `traverse_obj`: Allow filtering by value +* [utils] Add `filter_dict`, `get_first`, `try_call` +* [utils] ExtractorError: Fix for older python versions +* [utils] WebSocketsWrapper: Allow omitting `__enter__` invocation by [Lesmiscore](https://github.com/Lesmiscore) +* [docs] Add an `.editorconfig` file by [fstirlitz](https://github.com/fstirlitz) +* [docs] Clarify the exact `BSD` license of dependencies by [MrRawes](https://github.com/MrRawes) +* [docs] Minor improvements by [pukkandan](https://github.com/pukkandan), [cffswb](https://github.com/cffswb), [danielyli](https://github.com/danielyli) +* [docs] Remove readthedocs +* [build] Add `requirements.txt` to pip distributions +* [cleanup, postprocessor] Create `_download_json` +* [cleanup, vimeo] Fix tests +* [cleanup] Misc fixes and minor cleanup +* [cleanup] Use `_html_extract_title` +* [AfreecaTV] Add `AfreecaTVUserIE` by [hatienl0i261299](https://github.com/hatienl0i261299) +* [arte] Add `format_note` to m3u8 formats +* [azmedien] Add TVO Online to supported hosts by [1-Byte](https://github.com/1-Byte) +* [BanBye] Add extractor by [mehq](https://github.com/mehq) +* [bilibili] Fix extraction of title with quotes by [dzek69](https://github.com/dzek69) +* [Craftsy] Add extractor by [Bricio](https://github.com/Bricio) +* [Cybrary] Add extractor by [aaearon](https://github.com/aaearon) +* [Huya] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [ITProTV] Add extractor by [aaearon](https://github.com/aaearon) +* [Jable] Add extractors by [mehq](https://github.com/mehq) +* [LastFM] Add extractors by [mehq](https://github.com/mehq) +* [Moviepilot] Add extractor by [panatexxa](https://github.com/panatexxa) +* [panopto] Add extractors by [coletdjnz](https://github.com/coletdjnz), [kmark](https://github.com/kmark) +* [PokemonSoundLibrary] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [WasdTV] Add extractor by [un-def](https://github.com/un-def), [hatienl0i261299](https://github.com/hatienl0i261299) +* [adobepass] Fix Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) +* [afreecatv] Match new vod url by [wlritchi](https://github.com/wlritchi) +* [AZMedien] Support `tv.telezueri.ch` by [goggle](https://github.com/goggle) +* [BiliIntl] Support user-generated videos by [wlritchi](https://github.com/wlritchi) +* [BRMediathek] Fix VALID_URL +* [crunchyroll:playlist] Implement beta API by [tejing1](https://github.com/tejing1) +* [crunchyroll] Fix inheritance +* [daftsex] Fix extractor by [Soebb](https://github.com/Soebb) +* [dailymotion] Support `geo.dailymotion.com` by [hatienl0i261299](https://github.com/hatienl0i261299) +* [ellentube] Extract subtitles from manifest +* [elonet] Rewrite extractor by [Fam0r](https://github.com/Fam0r), [pukkandan](https://github.com/pukkandan) +* [fptplay] Fix metadata extraction by [hatienl0i261299](https://github.com/hatienl0i261299) +* [FranceCulture] Support playlists by [bohwaz](https://github.com/bohwaz) +* [go, viu] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz) +* [Imdb] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [MangoTV] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [Nebula] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0 +* [niconico] Fix extraction of thumbnails and uploader (#3266) +* [niconico] Rewrite NiconicoIE by [Lesmiscore](https://github.com/Lesmiscore) +* [nitter] Minor fixes and update instance list by [foghawk](https://github.com/foghawk) +* [NRK] Extract timestamp by [hatienl0i261299](https://github.com/hatienl0i261299) +* [openrec] Download archived livestreams by [Lesmiscore](https://github.com/Lesmiscore) +* [openrec] Refactor extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [panopto] Improve subtitle extraction and support slides by [coletdjnz](https://github.com/coletdjnz) +* [ParamountPlus, CBS] Change VALID_URL by [Sipherdrakon](https://github.com/Sipherdrakon) +* [ParamountPlusSeries] Support multiple pages by [dodrian](https://github.com/dodrian) +* [Piapro] Extract description with break lines by [Lesmiscore](https://github.com/Lesmiscore) +* [rai] Fix extraction of http formas by [nixxo](https://github.com/nixxo) +* [rumble] unescape title +* [RUTV] Fix format sorting by [Lesmiscore](https://github.com/Lesmiscore) +* [ruutu] Detect embeds by [tpikonen](https://github.com/tpikonen) +* [tenplay] Improve extractor by [aarubui](https://github.com/aarubui) +* [TikTok] Fix URLs with user id by [hatienl0i261299](https://github.com/hatienl0i261299) +* [TikTokVM] Fix redirect to user URL +* [TVer] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [TVer] Support landing page by [vvto33](https://github.com/vvto33) +* [twitcasting] Don't return multi_video for archive with single hls manifest by [Lesmiscore](https://github.com/Lesmiscore) +* [veo] Fix `_VALID_URL` +* [Veo] Fix extractor by [i6t](https://github.com/i6t) +* [viki] Don't attempt to modify URLs with signature by [nyuszika7h](https://github.com/nyuszika7h) +* [viu] Fix bypass for preview by [zackmark29](https://github.com/zackmark29) +* [viu] Fixed extractor by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan) +* [web.archive:youtube] Make CDX API requests non-fatal by [coletdjnz](https://github.com/coletdjnz) +* [wget] Fix proxy by [kikuyan](https://github.com/kikuyan), [coletdjnz](https://github.com/coletdjnz) +* [xnxx] Add `xnxx3.com` by [rozari0](https://github.com/rozari0) +* [youtube] **Add new age-gate bypass** by [zerodytrash](https://github.com/zerodytrash), [pukkandan](https://github.com/pukkandan) +* [youtube] Add extractor-arg to skip auto-translated subs +* [youtube] Avoid false positives when detecting damaged formats +* [youtube] Detect DRM better by [shirt](https://github.com/shirt-dev) +* [youtube] Fix auto-translated automatic captions +* [youtube] Fix pagination of `membership` tab +* [youtube] Fix uploader for collaborative playlists by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Improve video upload date handling by [coletdjnz](https://github.com/coletdjnz) +* [youtube:api] Prefer minified JSON response by [coletdjnz](https://github.com/coletdjnz) +* [youtube:search] Support hashtag entries by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Fix duration extraction for shorts by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Minor improvements +* [youtube:tab] Return shorts url if video is a short by [coletdjnz](https://github.com/coletdjnz) +* [Zattoo] Fix extractors by [goggle](https://github.com/goggle) +* [Zingmp3] Fix signature by [hatienl0i261299](https://github.com/hatienl0i261299) + + ### 2022.03.08.1 * [cleanup] Refactor `__init__.py` @@ -34,7 +172,7 @@ * Set `webpage_url_...` from `webpage_url` and not input URL * Tolerate failure to `--write-link` due to unknown URL * [aria2c] Add `--http-accept-gzip=true` -* [build] Update pyinstaller to 4.10 by [shirt-dev](https://github.com/shirt-dev) +* [build] Update pyinstaller to 4.10 by [shirt](https://github.com/shirt-dev) * [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley) * [devscripts] Improve `prepare_manpage` * [downloader] Do not use aria2c for non-native `m3u8` diff --git a/supportedsites.md b/supportedsites.md index 46ad1328d..eac7842a3 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -42,6 +42,7 @@ - **aenetworks:show** - **afreecatv**: afreecatv.com - **afreecatv:live**: afreecatv.com + - **afreecatv:user** - **AirMozilla** - **AliExpressLive** - **AlJazeera** @@ -104,6 +105,8 @@ - **awaan:video** - **AZMedien**: AZ Medien videos - **BaiduVideo**: 百度视频 + - **BanBye** + - **BanByeChannel** - **bandaichannel** - **Bandcamp** - **Bandcamp:album** @@ -245,6 +248,7 @@ - **cpac:playlist** - **Cracked** - **Crackle** + - **Craftsy** - **CrooksAndLiars** - **CrowdBunker** - **CrowdBunkerChannel** @@ -263,6 +267,8 @@ - **curiositystream:collections** - **curiositystream:series** - **CWTV** + - **Cybrary** + - **CybraryCourse** - **Daftsex** - **DagelijkseKost**: dagelijksekost.een.be - **DailyMail** @@ -484,6 +490,7 @@ - **Hungama** - **HungamaAlbumPlaylist** - **HungamaSong** + - **huya:live**: huya.com - **Hypem** - **ign.com** - **IGNArticle** @@ -512,6 +519,8 @@ - **iq.com**: International version of iQiyi - **iq.com:album** - **iqiyi**: 爱奇艺 + - **ITProTV** + - **ITProTVCourse** - **ITTF** - **ITV** - **ITVBTCC** @@ -520,6 +529,8 @@ - **ivideon**: Ivideon TV - **Iwara** - **Izlesene** + - **Jable** + - **JablePlaylist** - **Jamendo** - **JamendoAlbum** - **JeuxVideo** @@ -555,6 +566,9 @@ - **la7.it:podcast** - **laola1tv** - **laola1tv:embed** + - **LastFM** + - **LastFMPlaylist** + - **LastFMUser** - **lbry** - **lbry:channel** - **LCI** @@ -603,6 +617,7 @@ - **MallTV** - **mangomolo:live** - **mangomolo:video** + - **MangoTV**: 芒果TV - **ManotoTV**: Manoto TV (Episode) - **ManotoTVLive**: Manoto TV (Live) - **ManotoTVShow**: Manoto TV (Show) @@ -635,7 +650,6 @@ - **Metacritic** - **mewatch** - **Mgoon** - - **MGTV**: 芒果TV - **MiaoPai** - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom @@ -671,6 +685,7 @@ - **Motorsport**: motorsport.com - **MovieClips** - **MovieFap** + - **Moviepilot** - **Moviezine** - **MovingImage** - **MSN** @@ -705,7 +720,6 @@ - **MyVideoGe** - **MyVidster** - **MyviEmbed** - - **MyVisionTV** - **n-tv.de** - **N1Info:article** - **N1InfoAsset** @@ -863,6 +877,9 @@ - **PalcoMP3:song** - **PalcoMP3:video** - **pandora.tv**: 판도라TV + - **Panopto** + - **PanoptoList** + - **PanoptoPlaylist** - **ParamountNetwork** - **ParamountPlus** - **ParamountPlusSeries** @@ -912,6 +929,7 @@ - **PlutoTV** - **podomatic** - **Pokemon** + - **PokemonSoundLibrary** - **PokemonWatch** - **PokerGo** - **PokerGoCollection** @@ -957,8 +975,6 @@ - **qqmusic:toplist**: QQ音乐 - 排行榜 - **QuantumTV** - **Qub** - - **Quickline** - - **QuicklineLive** - **R7** - **R7Article** - **Radiko** @@ -1427,6 +1443,9 @@ - **Wakanim** - **Walla** - **WalyTV** + - **wasdtv:clip** + - **wasdtv:record** + - **wasdtv:stream** - **washingtonpost** - **washingtonpost:article** - **wat.tv** @@ -1520,6 +1539,8 @@ - **Zapiks** - **Zattoo** - **ZattooLive** + - **ZattooMovies** + - **ZattooRecordings** - **ZDF** - **ZDFChannel** - **Zee5** From dee1d65dc362f69b28287b2e82d93be4d22d1968 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 8 Apr 2022 09:57:06 +0000 Subject: [PATCH 0928/2552] [version] update Created by: pukkandan :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index c671a1910..39746047b 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08.1 (exe) + [debug] yt-dlp version 2022.04.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08.1) + yt-dlp is up to date (2022.04.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 5ff022a04..4e072a436 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08.1 (exe) + [debug] yt-dlp version 2022.04.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08.1) + yt-dlp is up to date (2022.04.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index acdfeb038..85b5d2cd5 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -60,12 +60,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08.1 (exe) + [debug] yt-dlp version 2022.04.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08.1) + yt-dlp is up to date (2022.04.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index a4a038fc8..cdff538a1 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08.1 (exe) + [debug] yt-dlp version 2022.04.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08.1) + yt-dlp is up to date (2022.04.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 1bdafc441..59c8dd88e 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -13,7 +13,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index d5df2af90..fb3ec8c6d 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,5 +1,5 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2022.03.08.1' +__version__ = '2022.04.08' -RELEASE_GIT_HEAD = 'c0c2c57d3' +RELEASE_GIT_HEAD = '7884ade65' From 2d2b5493ee88ccde079a5cde3d58ac5469057d17 Mon Sep 17 00:00:00 2001 From: Ashish Gupta Date: Fri, 8 Apr 2022 21:03:50 +0530 Subject: [PATCH 0929/2552] [ZEE5] Fix extractor. Authored by: Ashish0804 Closes: https://github.com/yt-dlp/yt-dlp/issues/3105 --- yt_dlp/extractor/zee5.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 3e3f11b15..9e411d83f 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -86,8 +86,8 @@ class Zee5IE(InfoExtractor): 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412', 'only_matching': True }] - _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false' - _DEVICE_ID = 'iIxsxYf40cqO3koIkwzKHZhnJzHN13zb' + _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails/secure?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false' + _DEVICE_ID = 'TszZPYPuY9Pq2cJizV0U000000000000' _USER_TOKEN = None _LOGIN_HINT = 'Use "--username " to login using otp or "--username token" and "--password " to login using user token.' _NETRC_MACHINE = 'zee5' From d46a3e7a127654b7537b0ab537f8c08ba16862ff Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Apr 2022 16:25:40 +0530 Subject: [PATCH 0930/2552] [rai] Add `release_year` Closes #2319 --- yt_dlp/extractor/rai.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 6864129c6..7c72d60c6 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -340,6 +340,7 @@ class RaiPlayIE(RaiBaseIE): 'episode': media.get('episode_title'), 'episode_number': int_or_none(media.get('episode')), 'subtitles': subtitles, + 'release_year': traverse_obj(media, ('track_info', 'edit_year')), } info.update(relinker_info) From 4abea8ca0af0773db9fb2372b272d497bd77b207 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Apr 2022 10:11:25 +0530 Subject: [PATCH 0931/2552] [utils] `sanitize_path`: Fix when path is empty string --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 7af7a9fb9..d4f8d8cab 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2237,7 +2237,7 @@ class YoutubeDL(object): matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) try: yield matches[format_idx - 1] - except IndexError: + except LazyList.IndexError: return filters = [self._build_format_filter(f) for f in selector.filters] diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 84b2603df..ba9566cab 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -778,7 +778,7 @@ def sanitize_path(s, force=False): for path_part in norm_path] if drive_or_unc: sanitized_path.insert(0, drive_or_unc + os.path.sep) - elif force and s[0] == os.path.sep: + elif force and s and s[0] == os.path.sep: sanitized_path.insert(0, os.path.sep) return os.path.join(*sanitized_path) From 98804d034d04d21cbeb8cd43d1e1d90f1cdae836 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Sun, 10 Apr 2022 01:23:27 +0900 Subject: [PATCH 0932/2552] [utils] locked_file: Do not give executable bits for newly created files Authored by: Lesmiscore --- yt_dlp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ba9566cab..14dbbf59f 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2236,7 +2236,7 @@ class locked_file(object): os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY, )) - self.f = os.fdopen(os.open(filename, flags), mode, encoding=encoding) + self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding) def __enter__(self): exclusive = 'r' not in self.mode From f894294636989788f02b917037f1f4a771595489 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Apr 2022 21:19:00 +0530 Subject: [PATCH 0933/2552] [EmbedThumbnail] Do not remove id3v1 tags --- yt_dlp/postprocessor/embedthumbnail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 84ab54f44..057007f2e 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -101,7 +101,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): success = True if info['ext'] == 'mp3': options = [ - '-c', 'copy', '-map', '0:0', '-map', '1:0', '-id3v2_version', '3', + '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (front)"'] self._report_run('ffmpeg', filename) From a25bca9f89f77e6e5153c3400c4a27020d8cba9d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Apr 2022 01:00:21 +0530 Subject: [PATCH 0934/2552] [youtube, cleanup] Minor refactoring Authored by: coletdjnz, pukkandan --- yt_dlp/extractor/youtube.py | 71 ++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 031aa35a1..4ee09ad9a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -384,6 +384,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _real_initialize(self): self._initialize_pref() self._initialize_consent() + self._check_login_required() + + def _check_login_required(self): if (self._LOGIN_REQUIRED and self.get_param('cookiefile') is None and self.get_param('cookiesfrombrowser') is None): @@ -563,6 +566,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor): headers['X-Origin'] = origin return {h: v for h, v in headers.items() if v is not None} + def _download_ytcfg(self, client, video_id): + url = { + 'web': 'https://www.youtube.com', + 'web_music': 'https://music.youtube.com', + 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' + }.get(client) + if not url: + return {} + webpage = self._download_webpage( + url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config') + return self.extract_ytcfg(video_id, webpage) or {} + @staticmethod def _build_api_continuation_query(continuation, ctp=None): query = { @@ -728,6 +743,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return None def _extract_time_text(self, renderer, *path_list): + """@returns (timestamp, time_text)""" text = self._get_text(renderer, *path_list) or '' dt = self.extract_relative_time(text) timestamp = None @@ -2959,16 +2975,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return orderedSet(requested_clients) - def _extract_player_ytcfg(self, client, video_id): - url = { - 'web_music': 'https://music.youtube.com', - 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' - }.get(client) - if not url: - return {} - webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip()) - return self.extract_ytcfg(video_id, webpage) or {} - def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): initial_pr = None if webpage: @@ -3005,8 +3011,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): while clients: client, base_client, variant = _split_innertube_client(clients.pop()) player_ytcfg = master_ytcfg if client == 'web' else {} - if 'configs' not in self._configuration_arg('player_skip'): - player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg + if 'configs' not in self._configuration_arg('player_skip') and client != 'web': + player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER') @@ -4347,6 +4353,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): check_get_keys='contents', fatal=False, ytcfg=ytcfg, note='Downloading API JSON with unavailable videos') + @property + def skip_webpage(self): + return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) + def _extract_webpage(self, url, item_id, fatal=True): retries = self.get_param('extractor_retries', 3) count = -1 @@ -4393,9 +4403,21 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): return webpage, data + def _report_playlist_authcheck(self, ytcfg, fatal=True): + """Use if failed to extract ytcfg (and data) from initial webpage""" + if not ytcfg and self.is_authenticated: + msg = 'Playlists that require authentication may not extract correctly without a successful webpage download' + if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal: + raise ExtractorError( + f'{msg}. If you are not downloading private content, or ' + 'your cookies are only for the first account and channel,' + ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', + expected=True) + self.report_warning(msg, only_once=True) + def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'): data = None - if 'webpage' not in self._configuration_arg('skip'): + if not self.skip_webpage: webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) # Reject webpage data if redirected to home page without explicitly requesting @@ -4409,14 +4431,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): raise ExtractorError(msg, expected=True) self.report_warning(msg, only_once=True) if not data: - if not ytcfg and self.is_authenticated: - msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' - if 'authcheck' not in self._configuration_arg('skip') and fatal: - raise ExtractorError( - msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,' - ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', - expected=True) - self.report_warning(msg, only_once=True) + self._report_playlist_authcheck(ytcfg, fatal=fatal) data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client) return data, ytcfg @@ -4454,14 +4469,20 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'), ('continuationContents', ), ) + display_id = f'query "{query}"' check_get_keys = tuple(set(keys[0] for keys in content_keys)) + ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {} + self._report_playlist_authcheck(ytcfg, fatal=False) continuation_list = [None] + search = None for page_num in itertools.count(1): data.update(continuation_list[0] or {}) + headers = self.generate_api_headers( + ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client) search = self._extract_response( - item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - default_client=default_client, check_get_keys=check_get_keys) + item_id=f'{display_id} page {page_num}', ep='search', query=data, + default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers) slr_contents = traverse_obj(search, *content_keys) yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list) if not continuation_list[0]: @@ -5634,7 +5655,9 @@ class YoutubeFeedsInfoExtractor(InfoExtractor): Subclasses must define the _FEED_NAME property. """ _LOGIN_REQUIRED = True - _TESTS = [] + + def _real_initialize(self): + YoutubeBaseInfoExtractor._check_login_required(self) @property def IE_NAME(self): From 97ec5bc550e0e34f3e79cdbfb5ad9d81b228ceb8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Apr 2022 01:01:48 +0530 Subject: [PATCH 0935/2552] [cookies] Report progress when importing cookies --- yt_dlp/YoutubeDL.py | 16 +++--- yt_dlp/cookies.py | 124 ++++++++++++++++++++++++++++++------------- yt_dlp/minicurses.py | 2 +- 3 files changed, 96 insertions(+), 46 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d4f8d8cab..fef05d517 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -643,6 +643,11 @@ class YoutubeDL(object): else: raise + if auto_init: + if auto_init != 'no_verbose_header': + self.print_debug_header() + self.add_default_info_extractors() + if (sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and not self.params.get('restrictfilenames', False)): @@ -664,13 +669,6 @@ class YoutubeDL(object): # Set http_headers defaults according to std_headers self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {})) - self._setup_opener() - - if auto_init: - if auto_init != 'no_verbose_header': - self.print_debug_header() - self.add_default_info_extractors() - hooks = { 'post_hooks': self.add_post_hook, 'progress_hooks': self.add_progress_hook, @@ -687,6 +685,7 @@ class YoutubeDL(object): get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)), when=when) + self._setup_opener() register_socks_protocols() def preload_download_archive(fn): @@ -3698,6 +3697,7 @@ class YoutubeDL(object): delim=', ') or 'none' write_debug('Optional libraries: %s' % lib_str) + self._setup_opener() proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): @@ -3717,6 +3717,8 @@ class YoutubeDL(object): latest_version) def _setup_opener(self): + if hasattr(self, '_opener'): + return timeout_val = self.params.get('socket_timeout') self._socket_timeout = 20 if timeout_val is None else float(timeout_val) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 1f08a3664..3476595d3 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -20,6 +20,7 @@ from .compat import ( compat_b64decode, compat_cookiejar_Cookie, ) +from .minicurses import MultilinePrinter, QuietMultilinePrinter from .utils import ( error_to_str, expand_path, @@ -73,6 +74,32 @@ class YDLLogger: if self._ydl: self._ydl.report_error(message) + def progress_bar(self): + """Return a context manager with a print method. (Optional)""" + # Do not print to files/pipes, loggers, or when --no-progress is used + if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'): + return + file = self._ydl._out_files['error'] + try: + if not file.isatty(): + return + except BaseException: + return + + printer = MultilinePrinter(file, preserve_output=False) + printer.print = lambda message: printer.print_at_line(f'[Cookies] {message}', 0) + return printer + + +def _create_progress_bar(logger): + if hasattr(logger, 'progress_bar'): + printer = logger.progress_bar() + if printer: + return printer + printer = QuietMultilinePrinter() + printer.print = lambda _: None + return printer + def load_cookies(cookie_file, browser_specification, ydl): cookie_jars = [] @@ -115,7 +142,7 @@ def _extract_firefox_cookies(profile, logger): else: search_root = os.path.join(_firefox_browser_dir(), profile) - cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite') + cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger) if cookie_database_path is None: raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) @@ -126,13 +153,17 @@ def _extract_firefox_cookies(profile, logger): cursor = _open_database_copy(cookie_database_path, tmpdir) cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') jar = YoutubeDLCookieJar() - for host, name, value, path, expiry, is_secure in cursor.fetchall(): - cookie = compat_cookiejar_Cookie( - version=0, name=name, value=value, port=None, port_specified=False, - domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), - path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, - comment=None, comment_url=None, rest={}) - jar.set_cookie(cookie) + with _create_progress_bar(logger) as progress_bar: + table = cursor.fetchall() + total_cookie_count = len(table) + for i, (host, name, value, path, expiry, is_secure) in enumerate(table): + progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) logger.info('Extracted {} cookies from firefox'.format(len(jar))) return jar finally: @@ -232,7 +263,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): logger.error('{} does not support profiles'.format(browser_name)) search_root = config['browser_dir'] - cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies') + cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger) if cookie_database_path is None: raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root)) logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) @@ -251,26 +282,18 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): jar = YoutubeDLCookieJar() failed_cookies = 0 unencrypted_cookies = 0 - for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall(): - host_key = host_key.decode('utf-8') - name = name.decode('utf-8') - value = value.decode('utf-8') - path = path.decode('utf-8') - - if not value and encrypted_value: - value = decryptor.decrypt(encrypted_value) - if value is None: + with _create_progress_bar(logger) as progress_bar: + table = cursor.fetchall() + total_cookie_count = len(table) + for i, line in enumerate(table): + progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') + is_encrypted, cookie = _process_chrome_cookie(decryptor, *line) + if not cookie: failed_cookies += 1 continue - else: - unencrypted_cookies += 1 - - cookie = compat_cookiejar_Cookie( - version=0, name=name, value=value, port=None, port_specified=False, - domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), - path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, - comment=None, comment_url=None, rest={}) - jar.set_cookie(cookie) + elif not is_encrypted: + unencrypted_cookies += 1 + jar.set_cookie(cookie) if failed_cookies > 0: failed_message = ' ({} could not be decrypted)'.format(failed_cookies) else: @@ -285,6 +308,25 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): cursor.connection.close() +def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure): + host_key = host_key.decode('utf-8') + name = name.decode('utf-8') + value = value.decode('utf-8') + path = path.decode('utf-8') + is_encrypted = not value and encrypted_value + + if is_encrypted: + value = decryptor.decrypt(encrypted_value) + if value is None: + return is_encrypted, None + + return is_encrypted, compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, + comment=None, comment_url=None, rest={}) + + class ChromeCookieDecryptor: """ Overview: @@ -547,10 +589,12 @@ def _parse_safari_cookies_page(data, jar, logger): p.skip_to(record_offsets[0], 'unknown page header field') - for record_offset in record_offsets: - p.skip_to(record_offset, 'space between records') - record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) - p.read_bytes(record_length) + with _create_progress_bar(logger) as progress_bar: + for i, record_offset in enumerate(record_offsets): + progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}') + p.skip_to(record_offset, 'space between records') + record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) + p.read_bytes(record_length) p.skip_to_end('space in between pages') @@ -830,10 +874,11 @@ def _get_mac_keyring_password(browser_keyring_name, logger): def _get_windows_v10_key(browser_root, logger): - path = _find_most_recently_used_file(browser_root, 'Local State') + path = _find_most_recently_used_file(browser_root, 'Local State', logger) if path is None: logger.error('could not find local state file') return None + logger.debug(f'Found local state file at "{path}"') with open(path, 'r', encoding='utf8') as f: data = json.load(f) try: @@ -925,13 +970,16 @@ def _get_column_names(cursor, table_name): return [row[1].decode('utf-8') for row in table_info] -def _find_most_recently_used_file(root, filename): +def _find_most_recently_used_file(root, filename, logger): # if there are multiple browser profiles, take the most recently used one - paths = [] - for root, dirs, files in os.walk(root): - for file in files: - if file == filename: - paths.append(os.path.join(root, file)) + i, paths = 0, [] + with _create_progress_bar(logger) as progress_bar: + for curr_root, dirs, files in os.walk(root): + for file in files: + i += 1 + progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') + if file == filename: + paths.append(os.path.join(curr_root, file)) return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime) diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index f9f99e390..d7a8ffddd 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -178,4 +178,4 @@ class MultilinePrinter(MultilinePrinterBase): *text, CONTROL_SEQUENCES['ERASE_LINE'], f'{CONTROL_SEQUENCES["UP"]}{CONTROL_SEQUENCES["ERASE_LINE"]}' * self.maximum) else: - self.write(*text, ' ' * self._lastlength) + self.write('\r', ' ' * self._lastlength, '\r') From ca5300c7edadad46ede0249ad9fa8feaa4ccddd4 Mon Sep 17 00:00:00 2001 From: krichbanana <77071421+krichbanana@users.noreply.github.com> Date: Sat, 9 Apr 2022 15:55:24 -0400 Subject: [PATCH 0936/2552] [youtube] Add `:ytnotifications` extractor (#3347) Authored by: krichbanana --- README.md | 2 +- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/youtube.py | 89 ++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1624a1fcb..f4b55f6d7 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **Youtube improvements**: - * All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) and private playlists supports downloading multiple pages of content + * All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) and private playlists supports downloading multiple pages of content * Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works * Mixes supports downloading multiple pages of content * Some (but not all) age-gated content can be downloaded without cookies diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bc06ab463..0cb686304 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -2100,6 +2100,7 @@ from .youtube import ( YoutubeIE, YoutubeClipIE, YoutubeFavouritesIE, + YoutubeNotificationsIE, YoutubeHistoryIE, YoutubeTabIE, YoutubeLivestreamEmbedIE, diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4ee09ad9a..f284487b8 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -5526,6 +5526,95 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): ie=YoutubeTabIE.ie_key()) +class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor): + IE_NAME = 'youtube:notif' + IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)' + _VALID_URL = r':ytnotif(?:ication)?s?' + _LOGIN_REQUIRED = True + _TESTS = [{ + 'url': ':ytnotif', + 'only_matching': True, + }, { + 'url': ':ytnotifications', + 'only_matching': True, + }] + + def _extract_notification_menu(self, response, continuation_list): + notification_list = traverse_obj( + response, + ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'), + ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'), + expected_type=list) or [] + continuation_list[0] = None + for item in notification_list: + entry = self._extract_notification_renderer(item.get('notificationRenderer')) + if entry: + yield entry + continuation = item.get('continuationItemRenderer') + if continuation: + continuation_list[0] = continuation + + def _extract_notification_renderer(self, notification): + video_id = traverse_obj( + notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) + url = f'https://www.youtube.com/watch?v={video_id}' + channel_id = None + if not video_id: + browse_ep = traverse_obj( + notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict) + channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str) + post_id = self._search_regex( + r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str), + 'post id', default=None) + if not channel_id or not post_id: + return + # The direct /post url redirects to this in the browser + url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}' + + channel = traverse_obj( + notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'), + expected_type=str) + title = self._search_regex( + rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'), + 'video title', default=None) + if title: + title = title.replace('\xad', '') # remove soft hyphens + upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d') + if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key()) + else None) + return { + '_type': 'url', + 'url': url, + 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(), + 'video_id': video_id, + 'title': title, + 'channel_id': channel_id, + 'channel': channel, + 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'), + 'upload_date': upload_date, + } + + def _notification_menu_entries(self, ytcfg): + continuation_list = [None] + response = None + for page in itertools.count(1): + ctoken = traverse_obj( + continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str) + response = self._extract_response( + item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg, + ep='notification/get_notification_menu', check_get_keys='actions', + headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))) + yield from self._extract_notification_menu(response, continuation_list) + if not continuation_list[0]: + break + + def _real_extract(self, url): + display_id = 'notifications' + ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {} + self._report_playlist_authcheck(ytcfg) + return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id) + + class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' From fcdb8d6e88d8b6192bd2d60016c053abe4d4e805 Mon Sep 17 00:00:00 2001 From: mehq <11481344+mehq@users.noreply.github.com> Date: Mon, 11 Apr 2022 10:29:19 +0600 Subject: [PATCH 0937/2552] [Gofile] Fix extraction (#3386) Closes #3380 Authored by: mehq --- yt_dlp/extractor/gofile.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index 62d778cfe..858bac52c 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -19,22 +19,25 @@ class GofileIE(InfoExtractor): 'id': 'de571ac1-5edc-42e2-8ec2-bdac83ad4a31', 'filesize': 928116, 'ext': 'mp4', - 'title': 'nuuh' + 'title': 'nuuh', + 'release_timestamp': 1638338704, + 'release_date': '20211201', } }] - }, { # URL to test mixed file types - 'url': 'https://gofile.io/d/avt34h', + }, { + 'url': 'https://gofile.io/d/is8lKr', 'info_dict': { - 'id': 'avt34h', - }, - 'playlist_mincount': 1, - }, { # URL to test no video/audio error - 'url': 'https://gofile.io/d/aB03lZ', - 'info_dict': { - 'id': 'aB03lZ', + 'id': 'TMjXd9', + 'ext': 'mp4', }, 'playlist_count': 0, 'skip': 'No video/audio found at provided URL.', + }, { + 'url': 'https://gofile.io/d/TMjXd9', + 'info_dict': { + 'id': 'TMjXd9', + }, + 'playlist_count': 1, }] _TOKEN = None @@ -50,9 +53,11 @@ class GofileIE(InfoExtractor): self._set_cookie('gofile.io', 'accountToken', self._TOKEN) def _entries(self, file_id): - files = self._download_json( - f'https://api.gofile.io/getContent?contentId={file_id}&token={self._TOKEN}&websiteToken=websiteToken&cache=true', - 'Gofile', note='Getting filelist') + files = self._download_json('https://api.gofile.io/getContent', 'Gofile', note='Getting filelist', query={ + 'contentId': file_id, + 'token': self._TOKEN, + 'websiteToken': 12345, + }) status = files['status'] if status != 'ok': @@ -65,7 +70,7 @@ class GofileIE(InfoExtractor): continue found_files = True - file_url = file.get('directLink') + file_url = file.get('link') if file_url: yield { 'id': file['id'], From 5a727063c54a2353b0bb58644c74e7f74f553800 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Apr 2022 14:03:13 +0530 Subject: [PATCH 0938/2552] [FFmpegMetadataPP] Remove `\0` from metadata --- yt_dlp/postprocessor/ffmpeg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 27d06cbde..78c6f9107 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -737,6 +737,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) if info.get(key) is not None), None) if value not in ('', None): + value = value.replace('\0', '') # nul character cannot be passed in command line metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) # See [1-4] for some info on media metadata/metadata supported From ab96d1ad1bcdb943aa6519980e5383ca91f7da2b Mon Sep 17 00:00:00 2001 From: felix Date: Sun, 19 Sep 2021 13:16:11 +0200 Subject: [PATCH 0939/2552] [cleanup] Remove unused scripts/tests (#2173) Authored by fstirlitz, pukkandan --- devscripts/buildserver.py | 435 --------- devscripts/create-github-release.py | 112 --- devscripts/gh-pages.unused/add-version.py | 43 - .../gh-pages.unused/generate-download.py | 22 - devscripts/gh-pages.unused/sign-versions.py | 34 - .../gh-pages.unused/update-copyright.py | 21 - devscripts/gh-pages.unused/update-feed.py | 76 -- devscripts/gh-pages.unused/update-sites.py | 37 - devscripts/posix-locale.sh | 6 - devscripts/release.sh | 143 --- devscripts/show-downloads-statistics.py | 49 - devscripts/wine-py2exe.sh | 58 -- test/swftests.unused/.gitignore | 1 - test/swftests.unused/ArrayAccess.as | 19 - test/swftests.unused/ClassCall.as | 17 - test/swftests.unused/ClassConstruction.as | 15 - test/swftests.unused/ConstArrayAccess.as | 18 - test/swftests.unused/ConstantInt.as | 12 - test/swftests.unused/DictCall.as | 10 - test/swftests.unused/EqualsOperator.as | 10 - test/swftests.unused/LocalVars.as | 13 - test/swftests.unused/MemberAssignment.as | 22 - test/swftests.unused/NeOperator.as | 24 - test/swftests.unused/PrivateCall.as | 21 - test/swftests.unused/PrivateVoidCall.as | 22 - test/swftests.unused/StaticAssignment.as | 13 - test/swftests.unused/StaticRetrieval.as | 16 - test/swftests.unused/StringBasics.as | 11 - test/swftests.unused/StringCharCodeAt.as | 11 - test/swftests.unused/StringConversion.as | 11 - test/test_swfinterp.py.disabled | 80 -- test/test_unicode_literals.py.disabled | 63 -- yt_dlp/swfinterp.py.disabled | 834 ------------------ 33 files changed, 2279 deletions(-) delete mode 100644 devscripts/buildserver.py delete mode 100644 devscripts/create-github-release.py delete mode 100644 devscripts/gh-pages.unused/add-version.py delete mode 100644 devscripts/gh-pages.unused/generate-download.py delete mode 100644 devscripts/gh-pages.unused/sign-versions.py delete mode 100644 devscripts/gh-pages.unused/update-copyright.py delete mode 100644 devscripts/gh-pages.unused/update-feed.py delete mode 100644 devscripts/gh-pages.unused/update-sites.py delete mode 100755 devscripts/posix-locale.sh delete mode 100755 devscripts/release.sh delete mode 100644 devscripts/show-downloads-statistics.py delete mode 100755 devscripts/wine-py2exe.sh delete mode 100644 test/swftests.unused/.gitignore delete mode 100644 test/swftests.unused/ArrayAccess.as delete mode 100644 test/swftests.unused/ClassCall.as delete mode 100644 test/swftests.unused/ClassConstruction.as delete mode 100644 test/swftests.unused/ConstArrayAccess.as delete mode 100644 test/swftests.unused/ConstantInt.as delete mode 100644 test/swftests.unused/DictCall.as delete mode 100644 test/swftests.unused/EqualsOperator.as delete mode 100644 test/swftests.unused/LocalVars.as delete mode 100644 test/swftests.unused/MemberAssignment.as delete mode 100644 test/swftests.unused/NeOperator.as delete mode 100644 test/swftests.unused/PrivateCall.as delete mode 100644 test/swftests.unused/PrivateVoidCall.as delete mode 100644 test/swftests.unused/StaticAssignment.as delete mode 100644 test/swftests.unused/StaticRetrieval.as delete mode 100644 test/swftests.unused/StringBasics.as delete mode 100644 test/swftests.unused/StringCharCodeAt.as delete mode 100644 test/swftests.unused/StringConversion.as delete mode 100644 test/test_swfinterp.py.disabled delete mode 100644 test/test_unicode_literals.py.disabled delete mode 100644 yt_dlp/swfinterp.py.disabled diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py deleted file mode 100644 index cd544b816..000000000 --- a/devscripts/buildserver.py +++ /dev/null @@ -1,435 +0,0 @@ -# UNUSED - -#!/usr/bin/python3 - -import argparse -import ctypes -import functools -import shutil -import subprocess -import sys -import tempfile -import threading -import traceback -import os.path - -sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) -from yt_dlp.compat import ( - compat_input, - compat_http_server, - compat_str, - compat_urlparse, -) - -# These are not used outside of buildserver.py thus not in compat.py - -try: - import winreg as compat_winreg -except ImportError: # Python 2 - import _winreg as compat_winreg - -try: - import socketserver as compat_socketserver -except ImportError: # Python 2 - import SocketServer as compat_socketserver - - -class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): - allow_reuse_address = True - - -advapi32 = ctypes.windll.advapi32 - -SC_MANAGER_ALL_ACCESS = 0xf003f -SC_MANAGER_CREATE_SERVICE = 0x02 -SERVICE_WIN32_OWN_PROCESS = 0x10 -SERVICE_AUTO_START = 0x2 -SERVICE_ERROR_NORMAL = 0x1 -DELETE = 0x00010000 -SERVICE_STATUS_START_PENDING = 0x00000002 -SERVICE_STATUS_RUNNING = 0x00000004 -SERVICE_ACCEPT_STOP = 0x1 - -SVCNAME = 'youtubedl_builder' - -LPTSTR = ctypes.c_wchar_p -START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) - - -class SERVICE_TABLE_ENTRY(ctypes.Structure): - _fields_ = [ - ('lpServiceName', LPTSTR), - ('lpServiceProc', START_CALLBACK) - ] - - -HandlerEx = ctypes.WINFUNCTYPE( - ctypes.c_int, # return - ctypes.c_int, # dwControl - ctypes.c_int, # dwEventType - ctypes.c_void_p, # lpEventData, - ctypes.c_void_p, # lpContext, -) - - -def _ctypes_array(c_type, py_array): - ar = (c_type * len(py_array))() - ar[:] = py_array - return ar - - -def win_OpenSCManager(): - res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) - if not res: - raise Exception('Opening service manager failed - ' - 'are you running this as administrator?') - return res - - -def win_install_service(service_name, cmdline): - manager = win_OpenSCManager() - try: - h = advapi32.CreateServiceW( - manager, service_name, None, - SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, - SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, - cmdline, None, None, None, None, None) - if not h: - raise OSError('Service creation failed: %s' % ctypes.FormatError()) - - advapi32.CloseServiceHandle(h) - finally: - advapi32.CloseServiceHandle(manager) - - -def win_uninstall_service(service_name): - manager = win_OpenSCManager() - try: - h = advapi32.OpenServiceW(manager, service_name, DELETE) - if not h: - raise OSError('Could not find service %s: %s' % ( - service_name, ctypes.FormatError())) - - try: - if not advapi32.DeleteService(h): - raise OSError('Deletion failed: %s' % ctypes.FormatError()) - finally: - advapi32.CloseServiceHandle(h) - finally: - advapi32.CloseServiceHandle(manager) - - -def win_service_report_event(service_name, msg, is_error=True): - with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: - f.write(msg + '\n') - - event_log = advapi32.RegisterEventSourceW(None, service_name) - if not event_log: - raise OSError('Could not report event: %s' % ctypes.FormatError()) - - try: - type_id = 0x0001 if is_error else 0x0004 - event_id = 0xc0000000 if is_error else 0x40000000 - lines = _ctypes_array(LPTSTR, [msg]) - - if not advapi32.ReportEventW( - event_log, type_id, 0, event_id, None, len(lines), 0, - lines, None): - raise OSError('Event reporting failed: %s' % ctypes.FormatError()) - finally: - advapi32.DeregisterEventSource(event_log) - - -def win_service_handler(stop_event, *args): - try: - raise ValueError('Handler called with args ' + repr(args)) - TODO - except Exception as e: - tb = traceback.format_exc() - msg = str(e) + '\n' + tb - win_service_report_event(service_name, msg, is_error=True) - raise - - -def win_service_set_status(handle, status_code): - svcStatus = SERVICE_STATUS() - svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS - svcStatus.dwCurrentState = status_code - svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP - - svcStatus.dwServiceSpecificExitCode = 0 - - if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): - raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) - - -def win_service_main(service_name, real_main, argc, argv_raw): - try: - # args = [argv_raw[i].value for i in range(argc)] - stop_event = threading.Event() - handler = HandlerEx(functools.partial(stop_event, win_service_handler)) - h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) - if not h: - raise OSError('Handler registration failed: %s' % - ctypes.FormatError()) - - TODO - except Exception as e: - tb = traceback.format_exc() - msg = str(e) + '\n' + tb - win_service_report_event(service_name, msg, is_error=True) - raise - - -def win_service_start(service_name, real_main): - try: - cb = START_CALLBACK( - functools.partial(win_service_main, service_name, real_main)) - dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ - SERVICE_TABLE_ENTRY( - service_name, - cb - ), - SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) - ]) - - if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): - raise OSError('ctypes start failed: %s' % ctypes.FormatError()) - except Exception as e: - tb = traceback.format_exc() - msg = str(e) + '\n' + tb - win_service_report_event(service_name, msg, is_error=True) - raise - - -def main(args=None): - parser = argparse.ArgumentParser() - parser.add_argument('-i', '--install', - action='store_const', dest='action', const='install', - help='Launch at Windows startup') - parser.add_argument('-u', '--uninstall', - action='store_const', dest='action', const='uninstall', - help='Remove Windows service') - parser.add_argument('-s', '--service', - action='store_const', dest='action', const='service', - help='Run as a Windows service') - parser.add_argument('-b', '--bind', metavar='', - action='store', default='0.0.0.0:8142', - help='Bind to host:port (default %default)') - options = parser.parse_args(args=args) - - if options.action == 'install': - fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') - cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) - win_install_service(SVCNAME, cmdline) - return - - if options.action == 'uninstall': - win_uninstall_service(SVCNAME) - return - - if options.action == 'service': - win_service_start(SVCNAME, main) - return - - host, port_str = options.bind.split(':') - port = int(port_str) - - print('Listening on %s:%d' % (host, port)) - srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) - thr = threading.Thread(target=srv.serve_forever) - thr.start() - compat_input('Press ENTER to shut down') - srv.shutdown() - thr.join() - - -def rmtree(path): - for name in os.listdir(path): - fname = os.path.join(path, name) - if os.path.isdir(fname): - rmtree(fname) - else: - os.chmod(fname, 0o666) - os.remove(fname) - os.rmdir(path) - - -class BuildError(Exception): - def __init__(self, output, code=500): - self.output = output - self.code = code - - def __str__(self): - return self.output - - -class HTTPError(BuildError): - pass - - -class PythonBuilder(object): - def __init__(self, **kwargs): - python_version = kwargs.pop('python', '3.4') - python_path = None - for node in ('Wow6432Node\\', ''): - try: - key = compat_winreg.OpenKey( - compat_winreg.HKEY_LOCAL_MACHINE, - r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version)) - try: - python_path, _ = compat_winreg.QueryValueEx(key, '') - finally: - compat_winreg.CloseKey(key) - break - except Exception: - pass - - if not python_path: - raise BuildError('No such Python version: %s' % python_version) - - self.pythonPath = python_path - - super(PythonBuilder, self).__init__(**kwargs) - - -class GITInfoBuilder(object): - def __init__(self, **kwargs): - try: - self.user, self.repoName = kwargs['path'][:2] - self.rev = kwargs.pop('rev') - except ValueError: - raise BuildError('Invalid path') - except KeyError as e: - raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) - - path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) - if not os.path.exists(path): - os.makedirs(path) - self.basePath = tempfile.mkdtemp(dir=path) - self.buildPath = os.path.join(self.basePath, 'build') - - super(GITInfoBuilder, self).__init__(**kwargs) - - -class GITBuilder(GITInfoBuilder): - def build(self): - try: - subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) - subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) - except subprocess.CalledProcessError as e: - raise BuildError(e.output) - - super(GITBuilder, self).build() - - -class YoutubeDLBuilder(object): - authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org'] - - def __init__(self, **kwargs): - if self.repoName != 'yt-dlp': - raise BuildError('Invalid repository "%s"' % self.repoName) - if self.user not in self.authorizedUsers: - raise HTTPError('Unauthorized user "%s"' % self.user, 401) - - super(YoutubeDLBuilder, self).__init__(**kwargs) - - def build(self): - try: - proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath) - proc.wait() - #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], - # cwd=self.buildPath) - except subprocess.CalledProcessError as e: - raise BuildError(e.output) - - super(YoutubeDLBuilder, self).build() - - -class DownloadBuilder(object): - def __init__(self, **kwargs): - self.handler = kwargs.pop('handler') - self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) - self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) - if not self.srcPath.startswith(self.buildPath): - raise HTTPError(self.srcPath, 401) - - super(DownloadBuilder, self).__init__(**kwargs) - - def build(self): - if not os.path.exists(self.srcPath): - raise HTTPError('No such file', 404) - if os.path.isdir(self.srcPath): - raise HTTPError('Is a directory: %s' % self.srcPath, 401) - - self.handler.send_response(200) - self.handler.send_header('Content-Type', 'application/octet-stream') - self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) - self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) - self.handler.end_headers() - - with open(self.srcPath, 'rb') as src: - shutil.copyfileobj(src, self.handler.wfile) - - super(DownloadBuilder, self).build() - - -class CleanupTempDir(object): - def build(self): - try: - rmtree(self.basePath) - except Exception as e: - print('WARNING deleting "%s": %s' % (self.basePath, e)) - - super(CleanupTempDir, self).build() - - -class Null(object): - def __init__(self, **kwargs): - pass - - def start(self): - pass - - def close(self): - pass - - def build(self): - pass - - -class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): - pass - - -class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler): - actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching. - - def do_GET(self): - path = compat_urlparse.urlparse(self.path) - paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()]) - action, _, path = path.path.strip('/').partition('/') - if path: - path = path.split('/') - if action in self.actionDict: - try: - builder = self.actionDict[action](path=path, handler=self, **paramDict) - builder.start() - try: - builder.build() - finally: - builder.close() - except BuildError as e: - self.send_response(e.code) - msg = compat_str(e).encode('UTF-8') - self.send_header('Content-Type', 'text/plain; charset=UTF-8') - self.send_header('Content-Length', len(msg)) - self.end_headers() - self.wfile.write(msg) - else: - self.send_response(500, 'Unknown build method "%s"' % action) - else: - self.send_response(500, 'Malformed URL') - -if __name__ == '__main__': - main() diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py deleted file mode 100644 index 53b3e0f48..000000000 --- a/devscripts/create-github-release.py +++ /dev/null @@ -1,112 +0,0 @@ -# Unused - -#!/usr/bin/env python3 -from __future__ import unicode_literals - -import io -import json -import mimetypes -import netrc -import optparse -import os -import re -import sys - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from yt_dlp.compat import ( - compat_basestring, - compat_getpass, - compat_print, - compat_urllib_request, -) -from yt_dlp.utils import ( - make_HTTPS_handler, - sanitized_Request, -) - - -class GitHubReleaser(object): - _API_URL = 'https://api.github.com/repos/ytdl-org/youtube-dl/releases' - _UPLOADS_URL = 'https://uploads.github.com/repos/ytdl-org/youtube-dl/releases/%s/assets?name=%s' - _NETRC_MACHINE = 'github.com' - - def __init__(self, debuglevel=0): - self._init_github_account() - https_handler = make_HTTPS_handler({}, debuglevel=debuglevel) - self._opener = compat_urllib_request.build_opener(https_handler) - - def _init_github_account(self): - try: - info = netrc.netrc().authenticators(self._NETRC_MACHINE) - if info is not None: - self._token = info[2] - compat_print('Using GitHub credentials found in .netrc...') - return - else: - compat_print('No GitHub credentials found in .netrc') - except (IOError, netrc.NetrcParseError): - compat_print('Unable to parse .netrc') - self._token = compat_getpass( - 'Type your GitHub PAT (personal access token) and press [Return]: ') - - def _call(self, req): - if isinstance(req, compat_basestring): - req = sanitized_Request(req) - req.add_header('Authorization', 'token %s' % self._token) - response = self._opener.open(req).read().decode('utf-8') - return json.loads(response) - - def list_releases(self): - return self._call(self._API_URL) - - def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False): - data = { - 'tag_name': tag_name, - 'target_commitish': 'master', - 'name': name, - 'body': body, - 'draft': draft, - 'prerelease': prerelease, - } - req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8')) - return self._call(req) - - def create_asset(self, release_id, asset): - asset_name = os.path.basename(asset) - url = self._UPLOADS_URL % (release_id, asset_name) - # Our files are small enough to be loaded directly into memory. - data = open(asset, 'rb').read() - req = sanitized_Request(url, data) - mime_type, _ = mimetypes.guess_type(asset_name) - req.add_header('Content-Type', mime_type or 'application/octet-stream') - return self._call(req) - - -def main(): - parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH') - options, args = parser.parse_args() - if len(args) != 3: - parser.error('Expected a version and a build directory') - - changelog_file, version, build_path = args - - with io.open(changelog_file, encoding='utf-8') as inf: - changelog = inf.read() - - mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog) - body = mobj.group(1) if mobj else '' - - releaser = GitHubReleaser() - - new_release = releaser.create_release( - version, name='yt-dlp %s' % version, body=body) - release_id = new_release['id'] - - for asset in os.listdir(build_path): - compat_print('Uploading %s...' % asset) - releaser.create_asset(release_id, os.path.join(build_path, asset)) - - -if __name__ == '__main__': - main() diff --git a/devscripts/gh-pages.unused/add-version.py b/devscripts/gh-pages.unused/add-version.py deleted file mode 100644 index 9ea01374d..000000000 --- a/devscripts/gh-pages.unused/add-version.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import unicode_literals - -import json -import sys -import hashlib -import os.path - - -if len(sys.argv) <= 1: - print('Specify the version number as parameter') - sys.exit() -version = sys.argv[1] - -with open('update/LATEST_VERSION', 'w') as f: - f.write(version) - -versions_info = json.load(open('update/versions.json')) -if 'signature' in versions_info: - del versions_info['signature'] - -new_version = {} - -filenames = { - 'bin': 'yt-dlp', - 'exe': 'yt-dlp.exe', - 'tar': 'yt-dlp-%s.tar.gz' % version} -build_dir = os.path.join('..', '..', 'build', version) -for key, filename in filenames.items(): - url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename) - fn = os.path.join(build_dir, filename) - with open(fn, 'rb') as f: - data = f.read() - if not data: - raise ValueError('File %s is empty!' % fn) - sha256sum = hashlib.sha256(data).hexdigest() - new_version[key] = (url, sha256sum) - -versions_info['versions'][version] = new_version -versions_info['latest'] = version - -with open('update/versions.json', 'w') as jsonf: - json.dump(versions_info, jsonf, indent=4, sort_keys=True) diff --git a/devscripts/gh-pages.unused/generate-download.py b/devscripts/gh-pages.unused/generate-download.py deleted file mode 100644 index a873d32ee..000000000 --- a/devscripts/gh-pages.unused/generate-download.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import unicode_literals - -import json - -versions_info = json.load(open('update/versions.json')) -version = versions_info['latest'] -version_dict = versions_info['versions'][version] - -# Read template page -with open('download.html.in', 'r', encoding='utf-8') as tmplf: - template = tmplf.read() - -template = template.replace('@PROGRAM_VERSION@', version) -template = template.replace('@PROGRAM_URL@', version_dict['bin'][0]) -template = template.replace('@PROGRAM_SHA256SUM@', version_dict['bin'][1]) -template = template.replace('@EXE_URL@', version_dict['exe'][0]) -template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1]) -template = template.replace('@TAR_URL@', version_dict['tar'][0]) -template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1]) -with open('download.html', 'w', encoding='utf-8') as dlf: - dlf.write(template) diff --git a/devscripts/gh-pages.unused/sign-versions.py b/devscripts/gh-pages.unused/sign-versions.py deleted file mode 100644 index fa389c358..000000000 --- a/devscripts/gh-pages.unused/sign-versions.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import unicode_literals, with_statement - -import rsa -import json -from binascii import hexlify - -try: - input = raw_input -except NameError: - pass - -versions_info = json.load(open('update/versions.json')) -if 'signature' in versions_info: - del versions_info['signature'] - -print('Enter the PKCS1 private key, followed by a blank line:') -privkey = b'' -while True: - try: - line = input() - except EOFError: - break - if line == '': - break - privkey += line.encode('ascii') + b'\n' -privkey = rsa.PrivateKey.load_pkcs1(privkey) - -signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode() -print('signature: ' + signature) - -versions_info['signature'] = signature -with open('update/versions.json', 'w') as versionsf: - json.dump(versions_info, versionsf, indent=4, sort_keys=True) diff --git a/devscripts/gh-pages.unused/update-copyright.py b/devscripts/gh-pages.unused/update-copyright.py deleted file mode 100644 index e122d0283..000000000 --- a/devscripts/gh-pages.unused/update-copyright.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import with_statement, unicode_literals - -import datetime -import glob -import io # For Python 2 compatibility -import os -import re - -year = str(datetime.datetime.now().year) -for fn in glob.glob('*.html*'): - with io.open(fn, encoding='utf-8') as f: - content = f.read() - newc = re.sub(r'(?PCopyright © 2011-)(?P[0-9]{4})', 'Copyright © 2011-' + year, content) - if content != newc: - tmpFn = fn + '.part' - with io.open(tmpFn, 'wt', encoding='utf-8') as outf: - outf.write(newc) - os.rename(tmpFn, fn) diff --git a/devscripts/gh-pages.unused/update-feed.py b/devscripts/gh-pages.unused/update-feed.py deleted file mode 100644 index c9f2fdb07..000000000 --- a/devscripts/gh-pages.unused/update-feed.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import unicode_literals - -import datetime -import io -import json -import textwrap - - -atom_template = textwrap.dedent("""\ - - - - yt-dlp releases - https://yt-dl.org/feed/yt-dlp-updates-feed - @TIMESTAMP@ - @ENTRIES@ - """) - -entry_template = textwrap.dedent(""" - - https://yt-dl.org/feed/yt-dlp-updates-feed/yt-dlp-@VERSION@ - New version @VERSION@ - - -
    - - - The yt-dlp maintainers - - @TIMESTAMP@ - - """) - -now = datetime.datetime.now() -now_iso = now.isoformat() + 'Z' - -atom_template = atom_template.replace('@TIMESTAMP@', now_iso) - -versions_info = json.load(open('update/versions.json')) -versions = list(versions_info['versions'].keys()) -versions.sort() - -entries = [] -for v in versions: - fields = v.split('.') - year, month, day = map(int, fields[:3]) - faked = 0 - patchlevel = 0 - while True: - try: - datetime.date(year, month, day) - except ValueError: - day -= 1 - faked += 1 - assert day > 0 - continue - break - if len(fields) >= 4: - try: - patchlevel = int(fields[3]) - except ValueError: - patchlevel = 1 - timestamp = '%04d-%02d-%02dT00:%02d:%02dZ' % (year, month, day, faked, patchlevel) - - entry = entry_template.replace('@TIMESTAMP@', timestamp) - entry = entry.replace('@VERSION@', v) - entries.append(entry) - -entries_str = textwrap.indent(''.join(entries), '\t') -atom_template = atom_template.replace('@ENTRIES@', entries_str) - -with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file: - atom_file.write(atom_template) diff --git a/devscripts/gh-pages.unused/update-sites.py b/devscripts/gh-pages.unused/update-sites.py deleted file mode 100644 index b53685fcc..000000000 --- a/devscripts/gh-pages.unused/update-sites.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import unicode_literals - -import sys -import os -import textwrap - -# We must be able to import yt_dlp -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) - -import yt_dlp - - -def main(): - with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf: - template = tmplf.read() - - ie_htmls = [] - for ie in yt_dlp.list_extractors(age_limit=None): - ie_html = '{}'.format(ie.IE_NAME) - ie_desc = getattr(ie, 'IE_DESC', None) - if ie_desc is False: - continue - elif ie_desc is not None: - ie_html += ': {}'.format(ie.IE_DESC) - if not ie.working(): - ie_html += ' (Currently broken)' - ie_htmls.append('
  • {}
  • '.format(ie_html)) - - template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t')) - - with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: - sitesf.write(template) - - -if __name__ == '__main__': - main() diff --git a/devscripts/posix-locale.sh b/devscripts/posix-locale.sh deleted file mode 100755 index 0aa7a592d..000000000 --- a/devscripts/posix-locale.sh +++ /dev/null @@ -1,6 +0,0 @@ - -# source this file in your shell to get a POSIX locale (which will break many programs, but that's kind of the point) - -export LC_ALL=POSIX -export LANG=POSIX -export LANGUAGE=POSIX diff --git a/devscripts/release.sh b/devscripts/release.sh deleted file mode 100755 index 188b166e6..000000000 --- a/devscripts/release.sh +++ /dev/null @@ -1,143 +0,0 @@ -# Unused - -#!/bin/bash - -# IMPORTANT: the following assumptions are made -# * the GH repo is on the origin remote -# * the gh-pages branch is named so locally -# * the git config user.signingkey is properly set - -# You will need -# pip install coverage nose rsa wheel - -# TODO -# release notes -# make hash on local files - -set -e - -skip_tests=true -gpg_sign_commits="" -buildserver='localhost:8142' - -while true -do -case "$1" in - --run-tests) - skip_tests=false - shift - ;; - --gpg-sign-commits|-S) - gpg_sign_commits="-S" - shift - ;; - --buildserver) - buildserver="$2" - shift 2 - ;; - --*) - echo "ERROR: unknown option $1" - exit 1 - ;; - *) - break - ;; -esac -done - -if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi -version="$1" -major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p') -if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then - echo "$version does not start with today's date!" - exit 1 -fi - -if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi -if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi -useless_files=$(find yt_dlp -type f -not -name '*.py') -if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in yt_dlp: $useless_files"; exit 1; fi -if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi -if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi -if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi -if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi - -read -p "Is Changelog up to date? (y/n) " -n 1 -if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi - -/bin/echo -e "\n### First of all, testing..." -make clean -if $skip_tests ; then - echo 'SKIPPING TESTS' -else - nosetests --verbose --with-coverage --cover-package=yt_dlp --cover-html test --stop || exit 1 -fi - -/bin/echo -e "\n### Changing version in version.py..." -sed -i "s/__version__ = '.*'/__version__ = '$version'/" yt_dlp/version.py - -/bin/echo -e "\n### Changing version in Changelog..." -sed -i "s//$version/" Changelog.md - -/bin/echo -e "\n### Committing documentation, templates and yt_dlp/version.py..." -make README.md CONTRIBUTING.md issuetemplates supportedsites -git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md yt_dlp/version.py Changelog.md -git commit $gpg_sign_commits -m "release $version" - -/bin/echo -e "\n### Now tagging, signing and pushing..." -git tag -s -m "Release $version" "$version" -git show "$version" -read -p "Is it good, can I push? (y/n) " -n 1 -if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi -echo -MASTER=$(git rev-parse --abbrev-ref HEAD) -git push origin $MASTER:master -git push origin "$version" - -/bin/echo -e "\n### OK, now it is time to build the binaries..." -REV=$(git rev-parse HEAD) -make yt-dlp yt-dlp.tar.gz -read -p "VM running? (y/n) " -n 1 -wget "http://$buildserver/build/ytdl-org/youtube-dl/yt-dlp.exe?rev=$REV" -O yt-dlp.exe -mkdir -p "build/$version" -mv yt-dlp yt-dlp.exe "build/$version" -mv yt-dlp.tar.gz "build/$version/yt-dlp-$version.tar.gz" -RELEASE_FILES="yt-dlp yt-dlp.exe yt-dlp-$version.tar.gz" -(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS) -(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS) -(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) -(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) - -/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..." -for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done - -ROOT=$(pwd) -python devscripts/create-github-release.py Changelog.md $version "$ROOT/build/$version" - -ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" - -/bin/echo -e "\n### Now switching to gh-pages..." -git clone --branch gh-pages --single-branch . build/gh-pages -( - set -e - ORIGIN_URL=$(git config --get remote.origin.url) - cd build/gh-pages - "$ROOT/devscripts/gh-pages/add-version.py" $version - "$ROOT/devscripts/gh-pages/update-feed.py" - "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem" - "$ROOT/devscripts/gh-pages/generate-download.py" - "$ROOT/devscripts/gh-pages/update-copyright.py" - "$ROOT/devscripts/gh-pages/update-sites.py" - git add *.html *.html.in update - git commit $gpg_sign_commits -m "release $version" - git push "$ROOT" gh-pages - git push "$ORIGIN_URL" gh-pages -) -rm -rf build - -make pypi-files -echo "Uploading to PyPi ..." -python setup.py sdist bdist_wheel upload -make clean - -/bin/echo -e "\n### DONE!" diff --git a/devscripts/show-downloads-statistics.py b/devscripts/show-downloads-statistics.py deleted file mode 100644 index 4855aa7c8..000000000 --- a/devscripts/show-downloads-statistics.py +++ /dev/null @@ -1,49 +0,0 @@ -# Unused - -#!/usr/bin/env python3 -from __future__ import unicode_literals - -import itertools -import json -import os -import re -import sys - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from yt_dlp.compat import ( - compat_print, - compat_urllib_request, -) -from yt_dlp.utils import format_bytes - - -def format_size(bytes): - return '%s (%d bytes)' % (format_bytes(bytes), bytes) - - -total_bytes = 0 - -for page in itertools.count(1): - releases = json.loads(compat_urllib_request.urlopen( - 'https://api.github.com/repos/ytdl-org/youtube-dl/releases?page=%s' % page - ).read().decode('utf-8')) - - if not releases: - break - - for release in releases: - compat_print(release['name']) - for asset in release['assets']: - asset_name = asset['name'] - total_bytes += asset['download_count'] * asset['size'] - if all(not re.match(p, asset_name) for p in ( - r'^yt-dlp$', - r'^yt-dlp-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$', - r'^yt-dlp\.exe$')): - continue - compat_print( - ' %s size: %s downloads: %d' - % (asset_name, format_size(asset['size']), asset['download_count'])) - -compat_print('total downloads traffic: %s' % format_size(total_bytes)) diff --git a/devscripts/wine-py2exe.sh b/devscripts/wine-py2exe.sh deleted file mode 100755 index 8bc8ce55b..000000000 --- a/devscripts/wine-py2exe.sh +++ /dev/null @@ -1,58 +0,0 @@ -# UNUSED - -#!/bin/bash - -# Run with as parameter a setup.py that works in the current directory -# e.g. no os.chdir() -# It will run twice, the first time will crash - -set -e - -SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" - -if [ ! -d wine-py2exe ]; then - - sudo apt-get install wine1.3 axel bsdiff - - mkdir wine-py2exe - cd wine-py2exe - export WINEPREFIX=`pwd` - - axel -a "http://www.python.org/ftp/python/2.7/python-2.7.msi" - axel -a "http://downloads.sourceforge.net/project/py2exe/py2exe/0.6.9/py2exe-0.6.9.win32-py2.7.exe" - #axel -a "http://winetricks.org/winetricks" - - # http://appdb.winehq.org/objectManager.php?sClass=version&iId=21957 - echo "Follow python setup on screen" - wine msiexec /i python-2.7.msi - - echo "Follow py2exe setup on screen" - wine py2exe-0.6.9.win32-py2.7.exe - - #echo "Follow Microsoft Visual C++ 2008 Redistributable Package setup on screen" - #bash winetricks vcrun2008 - - rm py2exe-0.6.9.win32-py2.7.exe - rm python-2.7.msi - #rm winetricks - - # http://bugs.winehq.org/show_bug.cgi?id=3591 - - mv drive_c/Python27/Lib/site-packages/py2exe/run.exe drive_c/Python27/Lib/site-packages/py2exe/run.exe.backup - bspatch drive_c/Python27/Lib/site-packages/py2exe/run.exe.backup drive_c/Python27/Lib/site-packages/py2exe/run.exe "$SCRIPT_DIR/SizeOfImage.patch" - mv drive_c/Python27/Lib/site-packages/py2exe/run_w.exe drive_c/Python27/Lib/site-packages/py2exe/run_w.exe.backup - bspatch drive_c/Python27/Lib/site-packages/py2exe/run_w.exe.backup drive_c/Python27/Lib/site-packages/py2exe/run_w.exe "$SCRIPT_DIR/SizeOfImage_w.patch" - - cd - - -else - - export WINEPREFIX="$( cd wine-py2exe && pwd )" - -fi - -wine "C:\\Python27\\python.exe" "$1" py2exe > "py2exe.log" 2>&1 || true -echo '# Copying python27.dll' >> "py2exe.log" -cp "$WINEPREFIX/drive_c/windows/system32/python27.dll" build/bdist.win32/winexe/bundle-2.7/ -wine "C:\\Python27\\python.exe" "$1" py2exe >> "py2exe.log" 2>&1 - diff --git a/test/swftests.unused/.gitignore b/test/swftests.unused/.gitignore deleted file mode 100644 index da97ff7ca..000000000 --- a/test/swftests.unused/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.swf diff --git a/test/swftests.unused/ArrayAccess.as b/test/swftests.unused/ArrayAccess.as deleted file mode 100644 index e22caa386..000000000 --- a/test/swftests.unused/ArrayAccess.as +++ /dev/null @@ -1,19 +0,0 @@ -// input: [["a", "b", "c", "d"]] -// output: ["c", "b", "a", "d"] - -package { -public class ArrayAccess { - public static function main(ar:Array):Array { - var aa:ArrayAccess = new ArrayAccess(); - return aa.f(ar, 2); - } - - private function f(ar:Array, num:Number):Array{ - var x:String = ar[0]; - var y:String = ar[num % ar.length]; - ar[0] = y; - ar[num] = x; - return ar; - } -} -} diff --git a/test/swftests.unused/ClassCall.as b/test/swftests.unused/ClassCall.as deleted file mode 100644 index aef58daf3..000000000 --- a/test/swftests.unused/ClassCall.as +++ /dev/null @@ -1,17 +0,0 @@ -// input: [] -// output: 121 - -package { -public class ClassCall { - public static function main():int{ - var f:OtherClass = new OtherClass(); - return f.func(100,20); - } -} -} - -class OtherClass { - public function func(x: int, y: int):int { - return x+y+1; - } -} diff --git a/test/swftests.unused/ClassConstruction.as b/test/swftests.unused/ClassConstruction.as deleted file mode 100644 index 436479f8f..000000000 --- a/test/swftests.unused/ClassConstruction.as +++ /dev/null @@ -1,15 +0,0 @@ -// input: [] -// output: 0 - -package { -public class ClassConstruction { - public static function main():int{ - var f:Foo = new Foo(); - return 0; - } -} -} - -class Foo { - -} diff --git a/test/swftests.unused/ConstArrayAccess.as b/test/swftests.unused/ConstArrayAccess.as deleted file mode 100644 index 07dc3f460..000000000 --- a/test/swftests.unused/ConstArrayAccess.as +++ /dev/null @@ -1,18 +0,0 @@ -// input: [] -// output: 4 - -package { -public class ConstArrayAccess { - private static const x:int = 2; - private static const ar:Array = ["42", "3411"]; - - public static function main():int{ - var c:ConstArrayAccess = new ConstArrayAccess(); - return c.f(); - } - - public function f(): int { - return ar[1].length; - } -} -} diff --git a/test/swftests.unused/ConstantInt.as b/test/swftests.unused/ConstantInt.as deleted file mode 100644 index e0bbb6166..000000000 --- a/test/swftests.unused/ConstantInt.as +++ /dev/null @@ -1,12 +0,0 @@ -// input: [] -// output: 2 - -package { -public class ConstantInt { - private static const x:int = 2; - - public static function main():int{ - return x; - } -} -} diff --git a/test/swftests.unused/DictCall.as b/test/swftests.unused/DictCall.as deleted file mode 100644 index c2d174cc2..000000000 --- a/test/swftests.unused/DictCall.as +++ /dev/null @@ -1,10 +0,0 @@ -// input: [{"x": 1, "y": 2}] -// output: 3 - -package { -public class DictCall { - public static function main(d:Object):int{ - return d.x + d.y; - } -} -} diff --git a/test/swftests.unused/EqualsOperator.as b/test/swftests.unused/EqualsOperator.as deleted file mode 100644 index 837a69a46..000000000 --- a/test/swftests.unused/EqualsOperator.as +++ /dev/null @@ -1,10 +0,0 @@ -// input: [] -// output: false - -package { -public class EqualsOperator { - public static function main():Boolean{ - return 1 == 2; - } -} -} diff --git a/test/swftests.unused/LocalVars.as b/test/swftests.unused/LocalVars.as deleted file mode 100644 index b2911a9f3..000000000 --- a/test/swftests.unused/LocalVars.as +++ /dev/null @@ -1,13 +0,0 @@ -// input: [1, 2] -// output: 3 - -package { -public class LocalVars { - public static function main(a:int, b:int):int{ - var c:int = a + b + b; - var d:int = c - b; - var e:int = d; - return e; - } -} -} diff --git a/test/swftests.unused/MemberAssignment.as b/test/swftests.unused/MemberAssignment.as deleted file mode 100644 index dcba5e3ff..000000000 --- a/test/swftests.unused/MemberAssignment.as +++ /dev/null @@ -1,22 +0,0 @@ -// input: [1] -// output: 2 - -package { -public class MemberAssignment { - public var v:int; - - public function g():int { - return this.v; - } - - public function f(a:int):int{ - this.v = a; - return this.v + this.g(); - } - - public static function main(a:int): int { - var v:MemberAssignment = new MemberAssignment(); - return v.f(a); - } -} -} diff --git a/test/swftests.unused/NeOperator.as b/test/swftests.unused/NeOperator.as deleted file mode 100644 index 61dcbc4e9..000000000 --- a/test/swftests.unused/NeOperator.as +++ /dev/null @@ -1,24 +0,0 @@ -// input: [] -// output: 123 - -package { -public class NeOperator { - public static function main(): int { - var res:int = 0; - if (1 != 2) { - res += 3; - } else { - res += 4; - } - if (2 != 2) { - res += 10; - } else { - res += 20; - } - if (9 == 9) { - res += 100; - } - return res; - } -} -} diff --git a/test/swftests.unused/PrivateCall.as b/test/swftests.unused/PrivateCall.as deleted file mode 100644 index f1c110a37..000000000 --- a/test/swftests.unused/PrivateCall.as +++ /dev/null @@ -1,21 +0,0 @@ -// input: [] -// output: 9 - -package { -public class PrivateCall { - public static function main():int{ - var f:OtherClass = new OtherClass(); - return f.func(); - } -} -} - -class OtherClass { - private function pf():int { - return 9; - } - - public function func():int { - return this.pf(); - } -} diff --git a/test/swftests.unused/PrivateVoidCall.as b/test/swftests.unused/PrivateVoidCall.as deleted file mode 100644 index 2cc016797..000000000 --- a/test/swftests.unused/PrivateVoidCall.as +++ /dev/null @@ -1,22 +0,0 @@ -// input: [] -// output: 9 - -package { -public class PrivateVoidCall { - public static function main():int{ - var f:OtherClass = new OtherClass(); - f.func(); - return 9; - } -} -} - -class OtherClass { - private function pf():void { - ; - } - - public function func():void { - this.pf(); - } -} diff --git a/test/swftests.unused/StaticAssignment.as b/test/swftests.unused/StaticAssignment.as deleted file mode 100644 index b061c219d..000000000 --- a/test/swftests.unused/StaticAssignment.as +++ /dev/null @@ -1,13 +0,0 @@ -// input: [1] -// output: 1 - -package { -public class StaticAssignment { - public static var v:int; - - public static function main(a:int):int{ - v = a; - return v; - } -} -} diff --git a/test/swftests.unused/StaticRetrieval.as b/test/swftests.unused/StaticRetrieval.as deleted file mode 100644 index c8352d819..000000000 --- a/test/swftests.unused/StaticRetrieval.as +++ /dev/null @@ -1,16 +0,0 @@ -// input: [] -// output: 1 - -package { -public class StaticRetrieval { - public static var v:int; - - public static function main():int{ - if (v) { - return 0; - } else { - return 1; - } - } -} -} diff --git a/test/swftests.unused/StringBasics.as b/test/swftests.unused/StringBasics.as deleted file mode 100644 index d27430b13..000000000 --- a/test/swftests.unused/StringBasics.as +++ /dev/null @@ -1,11 +0,0 @@ -// input: [] -// output: 3 - -package { -public class StringBasics { - public static function main():int{ - var s:String = "abc"; - return s.length; - } -} -} diff --git a/test/swftests.unused/StringCharCodeAt.as b/test/swftests.unused/StringCharCodeAt.as deleted file mode 100644 index c20d74d65..000000000 --- a/test/swftests.unused/StringCharCodeAt.as +++ /dev/null @@ -1,11 +0,0 @@ -// input: [] -// output: 9897 - -package { -public class StringCharCodeAt { - public static function main():int{ - var s:String = "abc"; - return s.charCodeAt(1) * 100 + s.charCodeAt(); - } -} -} diff --git a/test/swftests.unused/StringConversion.as b/test/swftests.unused/StringConversion.as deleted file mode 100644 index c976f5042..000000000 --- a/test/swftests.unused/StringConversion.as +++ /dev/null @@ -1,11 +0,0 @@ -// input: [] -// output: 2 - -package { -public class StringConversion { - public static function main():int{ - var s:String = String(99); - return s.length; - } -} -} diff --git a/test/test_swfinterp.py.disabled b/test/test_swfinterp.py.disabled deleted file mode 100644 index 5d5b21e6d..000000000 --- a/test/test_swfinterp.py.disabled +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 -from __future__ import unicode_literals - -# Allow direct execution -import os -import sys -import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - - -import errno -import io -import json -import re -import subprocess - -from yt_dlp.swfinterp import SWFInterpreter - - -TEST_DIR = os.path.join( - os.path.dirname(os.path.abspath(__file__)), 'swftests') - - -class TestSWFInterpreter(unittest.TestCase): - pass - - -def _make_testfunc(testfile): - m = re.match(r'^(.*)\.(as)$', testfile) - if not m: - return - test_id = m.group(1) - - def test_func(self): - as_file = os.path.join(TEST_DIR, testfile) - swf_file = os.path.join(TEST_DIR, test_id + '.swf') - if ((not os.path.exists(swf_file)) - or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): - # Recompile - try: - subprocess.check_call([ - 'mxmlc', '-output', swf_file, - '-static-link-runtime-shared-libraries', as_file]) - except OSError as ose: - if ose.errno == errno.ENOENT: - print('mxmlc not found! Skipping test.') - return - raise - - with open(swf_file, 'rb') as swf_f: - swf_content = swf_f.read() - swfi = SWFInterpreter(swf_content) - - with io.open(as_file, 'r', encoding='utf-8') as as_f: - as_content = as_f.read() - - def _find_spec(key): - m = re.search( - r'(?m)^//\s*%s:\s*(.*?)\n' % re.escape(key), as_content) - if not m: - raise ValueError('Cannot find %s in %s' % (key, testfile)) - return json.loads(m.group(1)) - - input_args = _find_spec('input') - output = _find_spec('output') - - swf_class = swfi.extract_class(test_id) - func = swfi.extract_function(swf_class, 'main') - res = func(input_args) - self.assertEqual(res, output) - - test_func.__name__ = str('test_swf_' + test_id) - setattr(TestSWFInterpreter, test_func.__name__, test_func) - - -for testfile in os.listdir(TEST_DIR): - _make_testfunc(testfile) - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_unicode_literals.py.disabled b/test/test_unicode_literals.py.disabled deleted file mode 100644 index 6c1b7ec91..000000000 --- a/test/test_unicode_literals.py.disabled +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import unicode_literals - -# Allow direct execution -import os -import sys -import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import io -import re - -rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -IGNORED_FILES = [ - 'setup.py', # http://bugs.python.org/issue13943 - 'conf.py', - 'buildserver.py', -] - -IGNORED_DIRS = [ - '.git', - '.tox', -] - -from test.helper import assertRegexpMatches - - -class TestUnicodeLiterals(unittest.TestCase): - def test_all_files(self): - for dirpath, dirnames, filenames in os.walk(rootDir): - for ignore_dir in IGNORED_DIRS: - if ignore_dir in dirnames: - # If we remove the directory from dirnames os.walk won't - # recurse into it - dirnames.remove(ignore_dir) - for basename in filenames: - if not basename.endswith('.py'): - continue - if basename in IGNORED_FILES: - continue - - fn = os.path.join(dirpath, basename) - with io.open(fn, encoding='utf-8') as inf: - code = inf.read() - - if "'" not in code and '"' not in code: - continue - assertRegexpMatches( - self, - code, - r'(?:(?:#.*?|\s*)\n)*from __future__ import (?:[a-z_]+,\s*)*unicode_literals', - 'unicode_literals import missing in %s' % fn) - - m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code) - if m is not None: - self.assertTrue( - m is None, - 'u present in %s, around %s' % ( - fn, code[m.start() - 10:m.end() + 10])) - - -if __name__ == '__main__': - unittest.main() diff --git a/yt_dlp/swfinterp.py.disabled b/yt_dlp/swfinterp.py.disabled deleted file mode 100644 index 0c7158575..000000000 --- a/yt_dlp/swfinterp.py.disabled +++ /dev/null @@ -1,834 +0,0 @@ -from __future__ import unicode_literals - -import collections -import io -import zlib - -from .compat import ( - compat_str, - compat_struct_unpack, -) -from .utils import ( - ExtractorError, -) - - -def _extract_tags(file_contents): - if file_contents[1:3] != b'WS': - raise ExtractorError( - 'Not an SWF file; header is %r' % file_contents[:3]) - if file_contents[:1] == b'C': - content = zlib.decompress(file_contents[8:]) - else: - raise NotImplementedError( - 'Unsupported compression format %r' % - file_contents[:1]) - - # Determine number of bits in framesize rectangle - framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3 - framesize_len = (5 + 4 * framesize_nbits + 7) // 8 - - pos = framesize_len + 2 + 2 - while pos < len(content): - header16 = compat_struct_unpack('> 6 - tag_len = header16 & 0x3f - if tag_len == 0x3f: - tag_len = compat_struct_unpack('= 0x80) else b'\x00' - return compat_struct_unpack('= 0 - resb = reader.read(count) - assert len(resb) == count - return resb - - -def _read_byte(reader): - resb = _read_bytes(1, reader=reader) - res = compat_struct_unpack('> 4 - methods = {} - constants = None - if kind == 0x00: # Slot - u30() # Slot id - u30() # type_name_idx - vindex = u30() - if vindex != 0: - read_byte() # vkind - elif kind == 0x06: # Const - u30() # Slot id - u30() # type_name_idx - vindex = u30() - vkind = 'any' - if vindex != 0: - vkind = read_byte() - if vkind == 0x03: # Constant_Int - value = self.constant_ints[vindex] - elif vkind == 0x04: # Constant_UInt - value = self.constant_uints[vindex] - else: - return {}, None # Ignore silently for now - constants = {self.multinames[trait_name_idx]: value} - elif kind in (0x01, 0x02, 0x03): # Method / Getter / Setter - u30() # disp_id - method_idx = u30() - methods[self.multinames[trait_name_idx]] = method_idx - elif kind == 0x04: # Class - u30() # slot_id - u30() # classi - elif kind == 0x05: # Function - u30() # slot_id - function_idx = u30() - methods[function_idx] = self.multinames[trait_name_idx] - else: - raise ExtractorError('Unsupported trait kind %d' % kind) - - if attrs & 0x4 != 0: # Metadata present - metadata_count = u30() - for _c3 in range(metadata_count): - u30() # metadata index - - return methods, constants - - # Classes - class_count = u30() - classes = [] - for class_id in range(class_count): - name_idx = u30() - - cname = self.multinames[name_idx] - avm_class = _AVMClass(name_idx, cname) - classes.append(avm_class) - - u30() # super_name idx - flags = read_byte() - if flags & 0x08 != 0: # Protected namespace is present - u30() # protected_ns_idx - intrf_count = u30() - for _c2 in range(intrf_count): - u30() - u30() # iinit - trait_count = u30() - for _c2 in range(trait_count): - trait_methods, trait_constants = parse_traits_info() - avm_class.register_methods(trait_methods) - if trait_constants: - avm_class.constants.update(trait_constants) - - assert len(classes) == class_count - self._classes_by_name = dict((c.name, c) for c in classes) - - for avm_class in classes: - avm_class.cinit_idx = u30() - trait_count = u30() - for _c2 in range(trait_count): - trait_methods, trait_constants = parse_traits_info() - avm_class.register_methods(trait_methods) - if trait_constants: - avm_class.constants.update(trait_constants) - - # Scripts - script_count = u30() - for _c in range(script_count): - u30() # init - trait_count = u30() - for _c2 in range(trait_count): - parse_traits_info() - - # Method bodies - method_body_count = u30() - Method = collections.namedtuple('Method', ['code', 'local_count']) - self._all_methods = [] - for _c in range(method_body_count): - method_idx = u30() - u30() # max_stack - local_count = u30() - u30() # init_scope_depth - u30() # max_scope_depth - code_length = u30() - code = read_bytes(code_length) - m = Method(code, local_count) - self._all_methods.append(m) - for avm_class in classes: - if method_idx in avm_class.method_idxs: - avm_class.methods[avm_class.method_idxs[method_idx]] = m - exception_count = u30() - for _c2 in range(exception_count): - u30() # from - u30() # to - u30() # target - u30() # exc_type - u30() # var_name - trait_count = u30() - for _c2 in range(trait_count): - parse_traits_info() - - assert p + code_reader.tell() == len(code_tag) - - def patch_function(self, avm_class, func_name, f): - self._patched_functions[(avm_class, func_name)] = f - - def extract_class(self, class_name, call_cinit=True): - try: - res = self._classes_by_name[class_name] - except KeyError: - raise ExtractorError('Class %r not found' % class_name) - - if call_cinit and hasattr(res, 'cinit_idx'): - res.register_methods({'$cinit': res.cinit_idx}) - res.methods['$cinit'] = self._all_methods[res.cinit_idx] - cinit = self.extract_function(res, '$cinit') - cinit([]) - - return res - - def extract_function(self, avm_class, func_name): - p = self._patched_functions.get((avm_class, func_name)) - if p: - return p - if func_name in avm_class.method_pyfunctions: - return avm_class.method_pyfunctions[func_name] - if func_name in self._classes_by_name: - return self._classes_by_name[func_name].make_object() - if func_name not in avm_class.methods: - raise ExtractorError('Cannot find function %s.%s' % ( - avm_class.name, func_name)) - m = avm_class.methods[func_name] - - def resfunc(args): - # Helper functions - coder = io.BytesIO(m.code) - s24 = lambda: _s24(coder) - u30 = lambda: _u30(coder) - - registers = [avm_class.variables] + list(args) + [None] * m.local_count - stack = [] - scopes = collections.deque([ - self._classes_by_name, avm_class.constants, avm_class.variables]) - while True: - opcode = _read_byte(coder) - if opcode == 9: # label - pass # Spec says: "Do nothing." - elif opcode == 16: # jump - offset = s24() - coder.seek(coder.tell() + offset) - elif opcode == 17: # iftrue - offset = s24() - value = stack.pop() - if value: - coder.seek(coder.tell() + offset) - elif opcode == 18: # iffalse - offset = s24() - value = stack.pop() - if not value: - coder.seek(coder.tell() + offset) - elif opcode == 19: # ifeq - offset = s24() - value2 = stack.pop() - value1 = stack.pop() - if value2 == value1: - coder.seek(coder.tell() + offset) - elif opcode == 20: # ifne - offset = s24() - value2 = stack.pop() - value1 = stack.pop() - if value2 != value1: - coder.seek(coder.tell() + offset) - elif opcode == 21: # iflt - offset = s24() - value2 = stack.pop() - value1 = stack.pop() - if value1 < value2: - coder.seek(coder.tell() + offset) - elif opcode == 32: # pushnull - stack.append(None) - elif opcode == 33: # pushundefined - stack.append(undefined) - elif opcode == 36: # pushbyte - v = _read_byte(coder) - stack.append(v) - elif opcode == 37: # pushshort - v = u30() - stack.append(v) - elif opcode == 38: # pushtrue - stack.append(True) - elif opcode == 39: # pushfalse - stack.append(False) - elif opcode == 40: # pushnan - stack.append(float('NaN')) - elif opcode == 42: # dup - value = stack[-1] - stack.append(value) - elif opcode == 44: # pushstring - idx = u30() - stack.append(self.constant_strings[idx]) - elif opcode == 48: # pushscope - new_scope = stack.pop() - scopes.append(new_scope) - elif opcode == 66: # construct - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - res = obj.avm_class.make_object() - stack.append(res) - elif opcode == 70: # callproperty - index = u30() - mname = self.multinames[index] - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - - if obj == StringClass: - if mname == 'String': - assert len(args) == 1 - assert isinstance(args[0], ( - int, compat_str, _Undefined)) - if args[0] == undefined: - res = 'undefined' - else: - res = compat_str(args[0]) - stack.append(res) - continue - else: - raise NotImplementedError( - 'Function String.%s is not yet implemented' - % mname) - elif isinstance(obj, _AVMClass_Object): - func = self.extract_function(obj.avm_class, mname) - res = func(args) - stack.append(res) - continue - elif isinstance(obj, _AVMClass): - func = self.extract_function(obj, mname) - res = func(args) - stack.append(res) - continue - elif isinstance(obj, _ScopeDict): - if mname in obj.avm_class.method_names: - func = self.extract_function(obj.avm_class, mname) - res = func(args) - else: - res = obj[mname] - stack.append(res) - continue - elif isinstance(obj, compat_str): - if mname == 'split': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - if args[0] == '': - res = list(obj) - else: - res = obj.split(args[0]) - stack.append(res) - continue - elif mname == 'charCodeAt': - assert len(args) <= 1 - idx = 0 if len(args) == 0 else args[0] - assert isinstance(idx, int) - res = ord(obj[idx]) - stack.append(res) - continue - elif isinstance(obj, list): - if mname == 'slice': - assert len(args) == 1 - assert isinstance(args[0], int) - res = obj[args[0]:] - stack.append(res) - continue - elif mname == 'join': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - res = args[0].join(obj) - stack.append(res) - continue - raise NotImplementedError( - 'Unsupported property %r on %r' - % (mname, obj)) - elif opcode == 71: # returnvoid - res = undefined - return res - elif opcode == 72: # returnvalue - res = stack.pop() - return res - elif opcode == 73: # constructsuper - # Not yet implemented, just hope it works without it - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - elif opcode == 74: # constructproperty - index = u30() - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - - mname = self.multinames[index] - assert isinstance(obj, _AVMClass) - - # We do not actually call the constructor for now; - # we just pretend it does nothing - stack.append(obj.make_object()) - elif opcode == 79: # callpropvoid - index = u30() - mname = self.multinames[index] - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - if isinstance(obj, _AVMClass_Object): - func = self.extract_function(obj.avm_class, mname) - res = func(args) - assert res is undefined - continue - if isinstance(obj, _ScopeDict): - assert mname in obj.avm_class.method_names - func = self.extract_function(obj.avm_class, mname) - res = func(args) - assert res is undefined - continue - if mname == 'reverse': - assert isinstance(obj, list) - obj.reverse() - else: - raise NotImplementedError( - 'Unsupported (void) property %r on %r' - % (mname, obj)) - elif opcode == 86: # newarray - arg_count = u30() - arr = [] - for i in range(arg_count): - arr.append(stack.pop()) - arr = arr[::-1] - stack.append(arr) - elif opcode == 93: # findpropstrict - index = u30() - mname = self.multinames[index] - for s in reversed(scopes): - if mname in s: - res = s - break - else: - res = scopes[0] - if mname not in res and mname in _builtin_classes: - stack.append(_builtin_classes[mname]) - else: - stack.append(res[mname]) - elif opcode == 94: # findproperty - index = u30() - mname = self.multinames[index] - for s in reversed(scopes): - if mname in s: - res = s - break - else: - res = avm_class.variables - stack.append(res) - elif opcode == 96: # getlex - index = u30() - mname = self.multinames[index] - for s in reversed(scopes): - if mname in s: - scope = s - break - else: - scope = avm_class.variables - - if mname in scope: - res = scope[mname] - elif mname in _builtin_classes: - res = _builtin_classes[mname] - else: - # Assume uninitialized - # TODO warn here - res = undefined - stack.append(res) - elif opcode == 97: # setproperty - index = u30() - value = stack.pop() - idx = self.multinames[index] - if isinstance(idx, _Multiname): - idx = stack.pop() - obj = stack.pop() - obj[idx] = value - elif opcode == 98: # getlocal - index = u30() - stack.append(registers[index]) - elif opcode == 99: # setlocal - index = u30() - value = stack.pop() - registers[index] = value - elif opcode == 102: # getproperty - index = u30() - pname = self.multinames[index] - if pname == 'length': - obj = stack.pop() - assert isinstance(obj, (compat_str, list)) - stack.append(len(obj)) - elif isinstance(pname, compat_str): # Member access - obj = stack.pop() - if isinstance(obj, _AVMClass): - res = obj.static_properties[pname] - stack.append(res) - continue - - assert isinstance(obj, (dict, _ScopeDict)),\ - 'Accessing member %r on %r' % (pname, obj) - res = obj.get(pname, undefined) - stack.append(res) - else: # Assume attribute access - idx = stack.pop() - assert isinstance(idx, int) - obj = stack.pop() - assert isinstance(obj, list) - stack.append(obj[idx]) - elif opcode == 104: # initproperty - index = u30() - value = stack.pop() - idx = self.multinames[index] - if isinstance(idx, _Multiname): - idx = stack.pop() - obj = stack.pop() - obj[idx] = value - elif opcode == 115: # convert_ - value = stack.pop() - intvalue = int(value) - stack.append(intvalue) - elif opcode == 128: # coerce - u30() - elif opcode == 130: # coerce_a - value = stack.pop() - # um, yes, it's any value - stack.append(value) - elif opcode == 133: # coerce_s - assert isinstance(stack[-1], (type(None), compat_str)) - elif opcode == 147: # decrement - value = stack.pop() - assert isinstance(value, int) - stack.append(value - 1) - elif opcode == 149: # typeof - value = stack.pop() - return { - _Undefined: 'undefined', - compat_str: 'String', - int: 'Number', - float: 'Number', - }[type(value)] - elif opcode == 160: # add - value2 = stack.pop() - value1 = stack.pop() - res = value1 + value2 - stack.append(res) - elif opcode == 161: # subtract - value2 = stack.pop() - value1 = stack.pop() - res = value1 - value2 - stack.append(res) - elif opcode == 162: # multiply - value2 = stack.pop() - value1 = stack.pop() - res = value1 * value2 - stack.append(res) - elif opcode == 164: # modulo - value2 = stack.pop() - value1 = stack.pop() - res = value1 % value2 - stack.append(res) - elif opcode == 168: # bitand - value2 = stack.pop() - value1 = stack.pop() - assert isinstance(value1, int) - assert isinstance(value2, int) - res = value1 & value2 - stack.append(res) - elif opcode == 171: # equals - value2 = stack.pop() - value1 = stack.pop() - result = value1 == value2 - stack.append(result) - elif opcode == 175: # greaterequals - value2 = stack.pop() - value1 = stack.pop() - result = value1 >= value2 - stack.append(result) - elif opcode == 192: # increment_i - value = stack.pop() - assert isinstance(value, int) - stack.append(value + 1) - elif opcode == 208: # getlocal_0 - stack.append(registers[0]) - elif opcode == 209: # getlocal_1 - stack.append(registers[1]) - elif opcode == 210: # getlocal_2 - stack.append(registers[2]) - elif opcode == 211: # getlocal_3 - stack.append(registers[3]) - elif opcode == 212: # setlocal_0 - registers[0] = stack.pop() - elif opcode == 213: # setlocal_1 - registers[1] = stack.pop() - elif opcode == 214: # setlocal_2 - registers[2] = stack.pop() - elif opcode == 215: # setlocal_3 - registers[3] = stack.pop() - else: - raise NotImplementedError( - 'Unsupported opcode %d' % opcode) - - avm_class.method_pyfunctions[func_name] = resfunc - return resfunc From cfb0511d822b39748c5a64dfe86b61ff8d5af176 Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 30 Dec 2021 13:23:36 +0100 Subject: [PATCH 0940/2552] [cleanup] Remove unused code paths (#2173) Notes: * `_windows_write_string`: Fixed in 3.6 * https://bugs.python.org/issue1602 * PEP: https://www.python.org/dev/peps/pep-0528 * Windows UTF-8 fix: Fixed in 3.3 * https://bugs.python.org/issue13216 * `__loader__`: is always present in 3.3+ * https://bugs.python.org/issue14646 * `workaround_optparse_bug9161`: Fixed in 2.7 * https://bugs.python.org/issue9161 Authored by: fstirlitz --- test/helper.py | 2 +- test/test_execution.py | 3 +- test/test_http.py | 29 +--- test/test_utils.py | 3 - yt_dlp/YoutubeDL.py | 6 - yt_dlp/__init__.py | 9 -- yt_dlp/__main__.py | 3 +- yt_dlp/compat.py | 24 +-- yt_dlp/extractor/bpb.py | 1 - yt_dlp/extractor/common.py | 4 +- yt_dlp/extractor/commonmistakes.py | 6 +- yt_dlp/extractor/generic.py | 4 - yt_dlp/update.py | 36 +---- yt_dlp/utils.py | 247 +++-------------------------- 14 files changed, 32 insertions(+), 345 deletions(-) diff --git a/test/helper.py b/test/helper.py index 28c21b2eb..804e954a3 100644 --- a/test/helper.py +++ b/test/helper.py @@ -64,7 +64,7 @@ def report_warning(message): else: _msg_header = 'WARNING:' output = '%s %s\n' % (_msg_header, message) - if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: + if 'b' in getattr(sys.stderr, 'mode', ''): output = output.encode(preferredencoding()) sys.stderr.write(output) diff --git a/test/test_execution.py b/test/test_execution.py index cf6b6b913..4981786e1 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -26,8 +26,7 @@ class TestExecution(unittest.TestCase): subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir) def test_module_exec(self): - if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution - subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) diff --git a/test/test_http.py b/test/test_http.py index 40df167e0..eec8684b1 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -32,17 +32,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): self.send_header('Content-Type', 'video/mp4') self.end_headers() self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]') - elif self.path == '/302': - if sys.version_info[0] == 3: - # XXX: Python 3 http server does not allow non-ASCII header values - self.send_response(404) - self.end_headers() - return - - new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server) - self.send_response(302) - self.send_header(b'Location', new_url.encode('utf-8')) - self.end_headers() elif self.path == '/%E4%B8%AD%E6%96%87.html': self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') @@ -72,15 +61,6 @@ class TestHTTP(unittest.TestCase): self.server_thread.daemon = True self.server_thread.start() - def test_unicode_path_redirection(self): - # XXX: Python 3 http server does not allow non-ASCII header values - if sys.version_info[0] == 3: - return - - ydl = YoutubeDL({'logger': FakeLogger()}) - r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port) - self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port) - class TestHTTPS(unittest.TestCase): def setUp(self): @@ -95,11 +75,10 @@ class TestHTTPS(unittest.TestCase): self.server_thread.start() def test_nocheckcertificate(self): - if sys.version_info >= (2, 7, 9): # No certificate checking anyways - ydl = YoutubeDL({'logger': FakeLogger()}) - self.assertRaises( - Exception, - ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port) + ydl = YoutubeDL({'logger': FakeLogger()}) + self.assertRaises( + Exception, + ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port) ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) diff --git a/test/test_utils.py b/test/test_utils.py index 1f826c2f2..c1228c74a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -538,9 +538,6 @@ class TestUtil(unittest.TestCase): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int(523), 523) - # Python 3 has no long - if sys.version_info < (3, 0): - eval('self.assertEqual(str_to_int(123456L), 123456)') self.assertEqual(str_to_int('noninteger'), None) self.assertEqual(str_to_int([]), None) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index fef05d517..c626ea3fd 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1008,12 +1008,6 @@ class YoutubeDL(object): expand_path(paths.get('home', '').strip()), expand_path(paths.get(dir_type, '').strip()) if dir_type else '', filename or '') - - # Temporary fix for #4787 - # 'Treat' all problem characters by passing filename through preferredencoding - # to workaround encoding issues with subprocess on python2 @ Windows - if sys.version_info < (3, 0) and sys.platform == 'win32': - path = encodeFilename(path, True).decode(preferredencoding()) return sanitize_path(path, force=self.params.get('windowsfilenames')) @staticmethod diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index ebf2d227a..10dc221b4 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -5,7 +5,6 @@ f'You are using an unsupported version of Python. Only Python versions 3.6 and a __license__ = 'Public Domain' -import codecs import io import itertools import os @@ -18,7 +17,6 @@ from .compat import ( compat_getpass, compat_os_name, compat_shlex_quote, - workaround_optparse_bug9161, ) from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .utils import ( @@ -807,13 +805,6 @@ def parse_options(argv=None): def _real_main(argv=None): - # Compatibility fixes for Windows - if sys.platform == 'win32': - # https://github.com/ytdl-org/youtube-dl/issues/820 - codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) - - workaround_optparse_bug9161() - setproctitle('yt-dlp') parser, opts, all_urls, ydl_opts = parse_options(argv) diff --git a/yt_dlp/__main__.py b/yt_dlp/__main__.py index c9f41473d..fb2726bd3 100644 --- a/yt_dlp/__main__.py +++ b/yt_dlp/__main__.py @@ -2,8 +2,7 @@ from __future__ import unicode_literals # Execute with -# $ python yt_dlp/__main__.py (2.6+) -# $ python -m yt_dlp (2.7+) +# $ python -m yt_dlp import sys diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 0a0d3b351..6128ff524 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -13,7 +13,6 @@ import http.cookiejar import http.cookies import http.server import itertools -import optparse import os import re import shlex @@ -86,28 +85,6 @@ def compat_print(s): assert isinstance(s, compat_str) print(s) - -# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 -# See http://bugs.python.org/issue9161 for what is broken -def workaround_optparse_bug9161(): - op = optparse.OptionParser() - og = optparse.OptionGroup(op, 'foo') - try: - og.add_option('-t') - except TypeError: - real_add_option = optparse.OptionGroup.add_option - - def _compat_add_option(self, *args, **kwargs): - enc = lambda v: ( - v.encode('ascii', 'replace') if isinstance(v, compat_str) - else v) - bargs = [enc(a) for a in args] - bkwargs = dict( - (k, enc(v)) for k, v in kwargs.items()) - return real_add_option(self, *bargs, **bkwargs) - optparse.OptionGroup.add_option = _compat_add_option - - try: compat_Pattern = re.Pattern except AttributeError: @@ -207,6 +184,7 @@ compat_numeric_types = (int, float, complex) compat_str = str compat_xpath = lambda xpath: xpath compat_zip = zip +workaround_optparse_bug9161 = lambda: None compat_collections_abc = collections.abc compat_HTMLParser = html.parser.HTMLParser diff --git a/yt_dlp/extractor/bpb.py b/yt_dlp/extractor/bpb.py index 8f6ef3cf0..98491975c 100644 --- a/yt_dlp/extractor/bpb.py +++ b/yt_dlp/extractor/bpb.py @@ -16,7 +16,6 @@ class BpbIE(InfoExtractor): _TEST = { 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', - # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2 'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f', 'info_dict': { 'id': '297', diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 9914910d0..48f302f86 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3606,9 +3606,7 @@ class InfoExtractor(object): for header, cookies in url_handle.headers.items(): if header.lower() != 'set-cookie': continue - if sys.version_info[0] >= 3: - cookies = cookies.encode('iso-8859-1') - cookies = cookies.decode('utf-8') + cookies = cookies.encode('iso-8859-1').decode('utf-8') cookie_value = re.search( r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies) if cookie_value: diff --git a/yt_dlp/extractor/commonmistakes.py b/yt_dlp/extractor/commonmistakes.py index 051269652..e0a9f5956 100644 --- a/yt_dlp/extractor/commonmistakes.py +++ b/yt_dlp/extractor/commonmistakes.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import sys - from .common import InfoExtractor from ..utils import ExtractorError @@ -35,9 +33,7 @@ class UnicodeBOMIE(InfoExtractor): IE_DESC = False _VALID_URL = r'(?P\ufeff)(?P.*)$' - # Disable test for python 3.2 since BOM is broken in re in this version - # (see https://github.com/ytdl-org/youtube-dl/issues/9751) - _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ + _TESTS = [{ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', 'only_matching': True, }] diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index bd56ad289..f11fc844d 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import os import re -import sys from .common import InfoExtractor from .youtube import YoutubeIE @@ -4011,9 +4010,6 @@ class GenericIE(InfoExtractor): # Look also in Refresh HTTP header refresh_header = head_response.headers.get('Refresh') if refresh_header: - # In python 2 response HTTP headers are bytestrings - if sys.version_info < (3, 0) and isinstance(refresh_header, str): - refresh_header = refresh_header.decode('iso-8859-1') found = re.search(REDIRECT_REGEX, refresh_header) if found: new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index a208e163c..f6ac207a1 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -15,22 +15,6 @@ from .utils import encode_compat_str, Popen, write_string from .version import __version__ -''' # Not signed -def rsa_verify(message, signature, key): - from hashlib import sha256 - assert isinstance(message, bytes) - byte_size = (len(bin(key[0])) - 2 + 8 - 1) // 8 - signature = ('%x' % pow(int(signature, 16), key[1], key[0])).encode() - signature = (byte_size * 2 - len(signature)) * b'0' + signature - asn1 = b'3031300d060960864801650304020105000420' - asn1 += sha256(message).hexdigest().encode() - if byte_size < len(asn1) // 2 + 11: - return False - expected = b'0001' + (byte_size - len(asn1) // 2 - 3) * b'ff' + b'00' + asn1 - return expected == signature -''' - - def detect_variant(): if hasattr(sys, 'frozen'): prefix = 'mac' if sys.platform == 'darwin' else 'win' @@ -39,7 +23,7 @@ def detect_variant(): return f'{prefix}_dir' return f'{prefix}_exe' return 'py2exe' - elif isinstance(globals().get('__loader__'), zipimporter): + elif isinstance(__loader__, zipimporter): return 'zip' elif os.path.basename(sys.argv[0]) == '__main__.py': return 'source' @@ -232,24 +216,6 @@ def run_update(ydl): assert False, f'Unhandled variant: {variant}' -''' # UNUSED -def get_notes(versions, fromVersion): - notes = [] - for v, vdata in sorted(versions.items()): - if v > fromVersion: - notes.extend(vdata.get('notes', [])) - return notes - - -def print_notes(to_screen, versions, fromVersion=__version__): - notes = get_notes(versions, fromVersion) - if notes: - to_screen('PLEASE NOTE:') - for note in notes: - to_screen(note) -''' - - # Deprecated def update_self(to_screen, verbose, opener): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 14dbbf59f..324b54e78 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -50,7 +50,6 @@ from .compat import ( compat_brotli, compat_chr, compat_cookiejar, - compat_ctypes_WINFUNCTYPE, compat_etree_fromstring, compat_expanduser, compat_html_entities, @@ -288,37 +287,9 @@ def preferredencoding(): def write_json_file(obj, fn): """ Encode obj as JSON and write it to fn, atomically if possible """ - fn = encodeFilename(fn) - if sys.version_info < (3, 0) and sys.platform != 'win32': - encoding = get_filesystem_encoding() - # os.path.basename returns a bytes object, but NamedTemporaryFile - # will fail if the filename contains non ascii characters unless we - # use a unicode object - path_basename = lambda f: os.path.basename(fn).decode(encoding) - # the same for os.path.dirname - path_dirname = lambda f: os.path.dirname(fn).decode(encoding) - else: - path_basename = os.path.basename - path_dirname = os.path.dirname - - args = { - 'suffix': '.tmp', - 'prefix': path_basename(fn) + '.', - 'dir': path_dirname(fn), - 'delete': False, - } - - # In Python 2.x, json.dump expects a bytestream. - # In Python 3.x, it writes to a character stream - if sys.version_info < (3, 0): - args['mode'] = 'wb' - else: - args.update({ - 'mode': 'w', - 'encoding': 'utf-8', - }) - - tf = tempfile.NamedTemporaryFile(**compat_kwargs(args)) + tf = tempfile.NamedTemporaryFile( + prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn), + suffix='.tmp', delete=False, mode='w', encoding='utf-8') try: with tf: @@ -345,20 +316,11 @@ def write_json_file(obj, fn): raise -if sys.version_info >= (2, 7): - def find_xpath_attr(node, xpath, key, val=None): - """ Find the xpath xpath[@key=val] """ - assert re.match(r'^[a-zA-Z_-]+$', key) - expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) - return node.find(expr) -else: - def find_xpath_attr(node, xpath, key, val=None): - for f in node.findall(compat_xpath(xpath)): - if key not in f.attrib: - continue - if val is None or f.attrib.get(key) == val: - return f - return None +def find_xpath_attr(node, xpath, key, val=None): + """ Find the xpath xpath[@key=val] """ + assert re.match(r'^[a-zA-Z_-]+$', key) + expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) + return node.find(expr) # On python2.6 the xml.etree.ElementTree.Element methods don't support # the namespace parameter @@ -626,8 +588,6 @@ def extract_attributes(html_element): 'empty': '', 'noval': None, 'entity': '&', 'sq': '"', 'dq': '\'' }. - NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, - but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. """ parser = HTMLAttributeParser() try: @@ -763,8 +723,6 @@ def sanitize_path(s, force=False): if sys.platform == 'win32': force = False drive_or_unc, _ = os.path.splitdrive(s) - if sys.version_info < (2, 7) and not drive_or_unc: - drive_or_unc, _ = os.path.splitunc(s) elif force: drive_or_unc = '' else: @@ -922,51 +880,23 @@ def get_subprocess_encoding(): def encodeFilename(s, for_subprocess=False): - """ - @param s The name of the file - """ - - assert type(s) == compat_str - - # Python 3 has a Unicode API - if sys.version_info >= (3, 0): - return s - - # Pass '' directly to use Unicode APIs on Windows 2000 and up - # (Detecting Windows NT 4 is tricky because 'major >= 4' would - # match Windows 9x series as well. Besides, NT 4 is obsolete.) - if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: - return s - - # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible - if sys.platform.startswith('java'): - return s - - return s.encode(get_subprocess_encoding(), 'ignore') + assert type(s) == str + return s def decodeFilename(b, for_subprocess=False): - - if sys.version_info >= (3, 0): - return b - - if not isinstance(b, bytes): - return b - - return b.decode(get_subprocess_encoding(), 'ignore') + return b def encodeArgument(s): - if not isinstance(s, compat_str): - # Legacy code that uses byte strings - # Uncomment the following line after fixing all post processors - # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) - s = s.decode('ascii') - return encodeFilename(s, True) + # Legacy code that uses byte strings + # Uncomment the following line after fixing all post processors + # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) + return s if isinstance(s, str) else s.decode('ascii') def decodeArgument(b): - return decodeFilename(b, True) + return b def decodeOption(optval): @@ -1263,11 +1193,6 @@ class XAttrUnavailableError(YoutubeDLError): def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): - # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting - # expected HTTP responses to meet HTTP/1.0 or later (see also - # https://github.com/ytdl-org/youtube-dl/issues/6727) - if sys.version_info < (3, 0): - kwargs['strict'] = True hc = http_class(*args, **compat_kwargs(kwargs)) source_address = ydl_handler._params.get('source_address') @@ -1309,20 +1234,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): raise socket.error('getaddrinfo returns an empty list') if hasattr(hc, '_create_connection'): hc._create_connection = _create_connection - sa = (source_address, 0) - if hasattr(hc, 'source_address'): # Python 2.7+ - hc.source_address = sa - else: # Python 2.6 - def _hc_connect(self, *args, **kwargs): - sock = _create_connection( - (self.host, self.port), self.timeout, sa) - if is_https: - self.sock = ssl.wrap_socket( - sock, self.key_file, self.cert_file, - ssl_version=ssl.PROTOCOL_TLSv1) - else: - self.sock = sock - hc.connect = functools.partial(_hc_connect, hc) + hc.source_address = (source_address, 0) return hc @@ -1413,11 +1325,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): req.headers = handle_youtubedl_headers(req.headers) - if sys.version_info < (2, 7) and '#' in req.get_full_url(): - # Python 2.6 is brain-dead when it comes to fragments - req._Request__original = req._Request__original.partition('#')[0] - req._Request__r_type = req._Request__r_type.partition('#')[0] - return req def http_response(self, req, resp): @@ -1461,15 +1368,10 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): location = resp.headers.get('Location') if location: # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 - if sys.version_info >= (3, 0): - location = location.encode('iso-8859-1').decode('utf-8') - else: - location = location.decode('utf-8') + location = location.encode('iso-8859-1').decode('utf-8') location_escaped = escape_url(location) if location != location_escaped: del resp.headers['Location'] - if sys.version_info < (3, 0): - location_escaped = location_escaped.encode('utf-8') resp.headers['Location'] = location_escaped return resp @@ -1668,19 +1570,6 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar) def http_response(self, request, response): - # Python 2 will choke on next HTTP request in row if there are non-ASCII - # characters in Set-Cookie HTTP header of last response (see - # https://github.com/ytdl-org/youtube-dl/issues/6769). - # In order to at least prevent crashing we will percent encode Set-Cookie - # header before HTTPCookieProcessor starts processing it. - # if sys.version_info < (3, 0) and response.headers: - # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): - # set_cookie = response.headers.get(set_cookie_header) - # if set_cookie: - # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") - # if set_cookie != set_cookie_escaped: - # del response.headers[set_cookie_header] - # response.headers[set_cookie_header] = set_cookie_escaped return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response) https_request = compat_urllib_request.HTTPCookieProcessor.http_request @@ -1724,12 +1613,6 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): # essentially all clients do redirect in this case, so we do # the same. - # On python 2 urlh.geturl() may sometimes return redirect URL - # as byte string instead of unicode. This workaround allows - # to force it always return unicode. - if sys.version_info[0] < 3: - newurl = compat_str(newurl) - # Be conciliant with URIs containing a space. This is mainly # redundant with the more complete encoding done in http_error_302(), # but it is kept for compatibility with other callers. @@ -2013,91 +1896,12 @@ def get_windows_version(): return None -def _windows_write_string(s, out): - """ Returns True if the string was written using special methods, - False if it has yet to be written out.""" - # Adapted from http://stackoverflow.com/a/3259271/35070 - - import ctypes.wintypes - - WIN_OUTPUT_IDS = { - 1: -11, - 2: -12, - } - - try: - fileno = out.fileno() - except AttributeError: - # If the output stream doesn't have a fileno, it's virtual - return False - except io.UnsupportedOperation: - # Some strange Windows pseudo files? - return False - if fileno not in WIN_OUTPUT_IDS: - return False - - GetStdHandle = compat_ctypes_WINFUNCTYPE( - ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)( - ('GetStdHandle', ctypes.windll.kernel32)) - h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) - - WriteConsoleW = compat_ctypes_WINFUNCTYPE( - ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR, - ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD), - ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32)) - written = ctypes.wintypes.DWORD(0) - - GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32)) - FILE_TYPE_CHAR = 0x0002 - FILE_TYPE_REMOTE = 0x8000 - GetConsoleMode = compat_ctypes_WINFUNCTYPE( - ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, - ctypes.POINTER(ctypes.wintypes.DWORD))( - ('GetConsoleMode', ctypes.windll.kernel32)) - INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value - - def not_a_console(handle): - if handle == INVALID_HANDLE_VALUE or handle is None: - return True - return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR - or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) - - if not_a_console(h): - return False - - def next_nonbmp_pos(s): - try: - return next(i for i, c in enumerate(s) if ord(c) > 0xffff) - except StopIteration: - return len(s) - - while s: - count = min(next_nonbmp_pos(s), 1024) - - ret = WriteConsoleW( - h, s, count if count else 2, ctypes.byref(written), None) - if ret == 0: - raise OSError('Failed to write string') - if not count: # We just wrote a non-BMP character - assert written.value == 2 - s = s[1:] - else: - assert written.value > 0 - s = s[written.value:] - return True - - def write_string(s, out=None, encoding=None): if out is None: out = sys.stderr assert type(s) == compat_str - if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'): - if _windows_write_string(s, out): - return - - if ('b' in getattr(out, 'mode', '') - or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr + if 'b' in getattr(out, 'mode', ''): byt = s.encode(encoding or preferredencoding(), 'ignore') out.write(byt) elif hasattr(out, 'buffer'): @@ -2985,8 +2789,6 @@ def lowercase_escape(s): def escape_rfc3986(s): """Escape non-ASCII characters as suggested by RFC 3986""" - if sys.version_info < (3, 0) and isinstance(s, compat_str): - s = s.encode('utf-8') return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") @@ -3335,12 +3137,7 @@ def args_to_str(args): def error_to_compat_str(err): - err_str = str(err) - # On python 2 error byte string must be decoded with proper - # encoding rather than ascii - if sys.version_info[0] < 3: - err_str = err_str.decode(preferredencoding()) - return err_str + return str(err) def error_to_str(err): @@ -5144,7 +4941,7 @@ def get_executable_path(): from zipimport import zipimporter if hasattr(sys, 'frozen'): # Running from PyInstaller path = os.path.dirname(sys.executable) - elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP + elif isinstance(__loader__, zipimporter): # Running from ZIP path = os.path.join(os.path.dirname(__file__), '../..') else: path = os.path.join(os.path.dirname(__file__), '..') @@ -5436,8 +5233,6 @@ class Config: try: # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56 contents = optionf.read() - if sys.version_info < (3,): - contents = contents.decode(preferredencoding()) res = compat_shlex_split(contents, comments=True) finally: optionf.close() From f9934b96145af8ac5dfdcbf684827aeaea9912a7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 12 Apr 2022 01:39:26 +0530 Subject: [PATCH 0941/2552] [cleanup] Mark some compat variables for removal (#2173) Authored by fstirlitz, pukkandan --- test/test_compat.py | 36 ----------- test/test_youtube_signature.py | 5 +- yt_dlp/YoutubeDL.py | 16 ++--- yt_dlp/compat.py | 71 ++++++++++----------- yt_dlp/downloader/ism.py | 22 +++---- yt_dlp/extractor/abematv.py | 8 +-- yt_dlp/extractor/adobepass.py | 3 +- yt_dlp/extractor/afreecatv.py | 5 +- yt_dlp/extractor/bbc.py | 4 +- yt_dlp/extractor/brightcove.py | 4 +- yt_dlp/extractor/common.py | 11 ++-- yt_dlp/extractor/crunchyroll.py | 10 +-- yt_dlp/extractor/generic.py | 6 +- yt_dlp/extractor/microsoftvirtualacademy.py | 9 +-- yt_dlp/extractor/mildom.py | 4 +- yt_dlp/extractor/mixcloud.py | 3 +- yt_dlp/extractor/mtv.py | 5 +- yt_dlp/extractor/noz.py | 3 +- yt_dlp/extractor/openload.py | 3 +- yt_dlp/extractor/soundcloud.py | 3 +- yt_dlp/extractor/udemy.py | 3 +- yt_dlp/extractor/vimeo.py | 3 +- yt_dlp/options.py | 21 +++--- yt_dlp/postprocessor/sponskrub.py | 4 +- yt_dlp/utils.py | 44 ++++++------- yt_dlp/webvtt.py | 1 - 26 files changed, 121 insertions(+), 186 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index c9bc4d7fb..6cbffd6fe 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -13,14 +13,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.compat import ( compat_getenv, compat_setenv, - compat_etree_Element, compat_etree_fromstring, compat_expanduser, - compat_shlex_split, compat_str, compat_struct_unpack, - compat_urllib_parse_quote, - compat_urllib_parse_quote_plus, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, @@ -55,27 +51,6 @@ class TestCompat(unittest.TestCase): dir(yt_dlp.compat))) - set(['unicode_literals']) self.assertEqual(all_names, sorted(present_names)) - def test_compat_urllib_parse_quote(self): - self.assertEqual(compat_urllib_parse_quote('abc def'), 'abc%20def') - self.assertEqual(compat_urllib_parse_quote('/user/abc+def'), '/user/abc%2Bdef') - self.assertEqual(compat_urllib_parse_quote('/user/abc+def', safe='+'), '%2Fuser%2Fabc+def') - self.assertEqual(compat_urllib_parse_quote(''), '') - self.assertEqual(compat_urllib_parse_quote('%'), '%25') - self.assertEqual(compat_urllib_parse_quote('%', safe='%'), '%') - self.assertEqual(compat_urllib_parse_quote('津波'), '%E6%B4%A5%E6%B3%A2') - self.assertEqual( - compat_urllib_parse_quote(''' -%%a''', safe='<>=":%/ \r\n'), - ''' -%%a''') - self.assertEqual( - compat_urllib_parse_quote('''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%25Things%''', safe='% '), - '''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%''') - - def test_compat_urllib_parse_quote_plus(self): - self.assertEqual(compat_urllib_parse_quote_plus('abc def'), 'abc+def') - self.assertEqual(compat_urllib_parse_quote_plus('/abc def'), '%2Fabc+def') - def test_compat_urllib_parse_unquote(self): self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def') @@ -109,17 +84,6 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') - def test_compat_shlex_split(self): - self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) - self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) - self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文']) - - def test_compat_etree_Element(self): - try: - compat_etree_Element.items - except AttributeError: - self.fail('compat_etree_Element is not a type') - def test_compat_etree_fromstring(self): xml = ''' diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index bbbba073f..6412acce0 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -11,11 +11,12 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import io import re import string +import urllib.request from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE from yt_dlp.jsinterp import JSInterpreter -from yt_dlp.compat import compat_str, compat_urlretrieve +from yt_dlp.compat import compat_str _SIG_TESTS = [ ( @@ -147,7 +148,7 @@ def t_factory(name, sig_func, url_pattern): fn = os.path.join(self.TESTDATA_DIR, basename) if not os.path.exists(fn): - compat_urlretrieve(url, fn) + urllib.request.urlretrieve(url, fn) with io.open(fn, encoding='utf-8') as testf: jscode = testf.read() self.assertEqual(sig_func(jscode, sig_input), expected_sig) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index c626ea3fd..4bf5a8942 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -26,24 +26,20 @@ import tokenize import traceback import random import unicodedata +import urllib.request from enum import Enum from string import ascii_letters from .compat import ( - compat_basestring, compat_brotli, compat_get_terminal_size, - compat_kwargs, - compat_numeric_types, compat_os_name, compat_pycrypto_AES, compat_shlex_quote, compat_str, - compat_tokenize_tokenize, compat_urllib_error, compat_urllib_request, - compat_urllib_request_DataHandler, windows_enable_vt_mode, ) from .cookies import load_cookies @@ -682,7 +678,7 @@ class YoutubeDL(object): pp_def = dict(pp_def_raw) when = pp_def.pop('when', 'post_process') self.add_post_processor( - get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)), + get_postprocessor(pp_def.pop('key'))(self, **pp_def), when=when) self._setup_opener() @@ -2244,7 +2240,7 @@ class YoutubeDL(object): stream = io.BytesIO(format_spec.encode('utf-8')) try: - tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) + tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) @@ -2406,7 +2402,7 @@ class YoutubeDL(object): def sanitize_numeric_fields(info): for numeric_field in self._NUMERIC_FIELDS: field = info.get(numeric_field) - if field is None or isinstance(field, compat_numeric_types): + if field is None or isinstance(field, (int, float)): continue report_force_conversion(numeric_field, 'numeric', 'int') info[numeric_field] = int_or_none(field) @@ -3589,7 +3585,7 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ - if isinstance(req, compat_basestring): + if isinstance(req, str): req = sanitized_Request(req) return self._opener.open(req, timeout=self._socket_timeout) @@ -3739,7 +3735,7 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) redirect_handler = YoutubeDLRedirectHandler() - data_handler = compat_urllib_request_DataHandler() + data_handler = urllib.request.DataHandler() # When passing our own FileHandler instance, build_opener won't add the # default FileHandler and allows us to disable the file protocol, which diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 6128ff524..5bac87c10 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -81,10 +81,6 @@ else: compat_realpath = os.path.realpath -def compat_print(s): - assert isinstance(s, compat_str) - print(s) - try: compat_Pattern = re.Pattern except AttributeError: @@ -173,61 +169,64 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho # Deprecated -compat_basestring = str -compat_chr = chr -compat_filter = filter -compat_input = input -compat_integer_types = (int, ) -compat_kwargs = lambda kwargs: kwargs -compat_map = map -compat_numeric_types = (int, float, complex) -compat_str = str -compat_xpath = lambda xpath: xpath -compat_zip = zip -workaround_optparse_bug9161 = lambda: None - -compat_collections_abc = collections.abc -compat_HTMLParser = html.parser.HTMLParser -compat_HTTPError = urllib.error.HTTPError -compat_Struct = struct.Struct compat_b64decode = base64.b64decode +compat_chr = chr compat_cookiejar = http.cookiejar -compat_cookiejar_Cookie = compat_cookiejar.Cookie -compat_cookies = http.cookies -compat_cookies_SimpleCookie = compat_cookies.SimpleCookie -compat_etree_Element = etree.Element -compat_etree_register_namespace = etree.register_namespace +compat_cookiejar_Cookie = http.cookiejar.Cookie +compat_cookies_SimpleCookie = http.cookies.SimpleCookie compat_get_terminal_size = shutil.get_terminal_size compat_getenv = os.getenv compat_getpass = getpass.getpass compat_html_entities = html.entities -compat_html_entities_html5 = compat_html_entities.html5 +compat_html_entities_html5 = html.entities.html5 +compat_HTMLParser = html.parser.HTMLParser compat_http_client = http.client compat_http_server = http.server +compat_HTTPError = urllib.error.HTTPError compat_itertools_count = itertools.count compat_parse_qs = urllib.parse.parse_qs -compat_shlex_split = shlex.split -compat_socket_create_connection = socket.create_connection +compat_str = str compat_struct_pack = struct.pack compat_struct_unpack = struct.unpack -compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_tokenize_tokenize = tokenize.tokenize compat_urllib_error = urllib.error -compat_urllib_parse = urllib.parse -compat_urllib_parse_quote = urllib.parse.quote -compat_urllib_parse_quote_plus = urllib.parse.quote_plus compat_urllib_parse_unquote = urllib.parse.unquote compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus -compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes compat_urllib_parse_urlencode = urllib.parse.urlencode compat_urllib_parse_urlparse = urllib.parse.urlparse -compat_urllib_parse_urlunparse = urllib.parse.urlunparse compat_urllib_request = urllib.request +compat_urlparse = compat_urllib_parse = urllib.parse + + +# To be removed + +compat_basestring = str +compat_collections_abc = collections.abc +compat_cookies = http.cookies +compat_etree_Element = etree.Element +compat_etree_register_namespace = etree.register_namespace +compat_filter = filter +compat_input = input +compat_integer_types = (int, ) +compat_kwargs = lambda kwargs: kwargs +compat_map = map +compat_numeric_types = (int, float, complex) +compat_print = print +compat_shlex_split = shlex.split +compat_socket_create_connection = socket.create_connection +compat_Struct = struct.Struct +compat_subprocess_get_DEVNULL = lambda: DEVNULL +compat_urllib_parse_quote = urllib.parse.quote +compat_urllib_parse_quote_plus = urllib.parse.quote_plus +compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes +compat_urllib_parse_urlunparse = urllib.parse.urlunparse compat_urllib_request_DataHandler = urllib.request.DataHandler compat_urllib_response = urllib.response -compat_urlparse = urllib.parse compat_urlretrieve = urllib.request.urlretrieve compat_xml_parse_error = etree.ParseError +compat_xpath = lambda xpath: xpath +compat_zip = zip +workaround_optparse_bug9161 = lambda: None # Set public objects diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index 4d5618c83..2ba36085e 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -3,25 +3,25 @@ from __future__ import unicode_literals import time import binascii import io +import struct from .fragment import FragmentFD from ..compat import ( - compat_Struct, compat_urllib_error, ) -u8 = compat_Struct('>B') -u88 = compat_Struct('>Bx') -u16 = compat_Struct('>H') -u1616 = compat_Struct('>Hxx') -u32 = compat_Struct('>I') -u64 = compat_Struct('>Q') +u8 = struct.Struct('>B') +u88 = struct.Struct('>Bx') +u16 = struct.Struct('>H') +u1616 = struct.Struct('>Hxx') +u32 = struct.Struct('>I') +u64 = struct.Struct('>Q') -s88 = compat_Struct('>bx') -s16 = compat_Struct('>h') -s1616 = compat_Struct('>hxx') -s32 = compat_Struct('>i') +s88 = struct.Struct('>bx') +s16 = struct.Struct('>h') +s1616 = struct.Struct('>hxx') +s32 = struct.Struct('>i') unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index a839f0c1f..c7db05475 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -5,13 +5,14 @@ import hashlib import hmac import re import struct +import urllib.response +import uuid from base64 import urlsafe_b64encode from binascii import unhexlify from .common import InfoExtractor from ..aes import aes_ecb_decrypt from ..compat import ( - compat_urllib_response, compat_urllib_parse_urlparse, compat_urllib_request, ) @@ -19,7 +20,6 @@ from ..utils import ( ExtractorError, decode_base, int_or_none, - random_uuidv4, request_to_url, time_seconds, update_url_query, @@ -141,7 +141,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): url = request_to_url(url) ticket = compat_urllib_parse_urlparse(url).netloc response_data = self._get_videokey_from_ticket(ticket) - return compat_urllib_response.addinfourl(io.BytesIO(response_data), headers={ + return urllib.response.addinfourl(io.BytesIO(response_data), headers={ 'Content-Length': len(response_data), }, url=url, code=200) @@ -253,7 +253,7 @@ class AbemaTVIE(AbemaTVBaseIE): if self._USERTOKEN: return self._USERTOKEN - self._DEVICE_ID = random_uuidv4() + self._DEVICE_ID = str(uuid.uuid4()) aks = self._generate_aks(self._DEVICE_ID) user_data = self._download_json( 'https://api.abema.io/v1/users', None, note='Authorizing', diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 5d98301b8..1292484c6 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -8,7 +8,6 @@ import xml.etree.ElementTree as etree from .common import InfoExtractor from ..compat import ( - compat_kwargs, compat_urlparse, compat_getpass ) @@ -1365,7 +1364,7 @@ class AdobePassIE(InfoExtractor): headers.update(kwargs.get('headers', {})) kwargs['headers'] = headers return super(AdobePassIE, self)._download_webpage_handle( - *args, **compat_kwargs(kwargs)) + *args, **kwargs) @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 28946e9dd..44bfb8bc2 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -5,7 +5,6 @@ import functools import re from .common import InfoExtractor -from ..compat import compat_xpath from ..utils import ( ExtractorError, OnDemandPagedList, @@ -282,7 +281,7 @@ class AfreecaTVIE(InfoExtractor): else: raise ExtractorError('Unable to download video info') - video_element = video_xml.findall(compat_xpath('./track/video'))[-1] + video_element = video_xml.findall('./track/video')[-1] if video_element is None or video_element.text is None: raise ExtractorError( 'Video %s does not exist' % video_id, expected=True) @@ -312,7 +311,7 @@ class AfreecaTVIE(InfoExtractor): if not video_url: entries = [] - file_elements = video_element.findall(compat_xpath('./file')) + file_elements = video_element.findall('./file') one = len(file_elements) == 1 for file_num, file_element in enumerate(file_elements, start=1): file_url = url_or_none(file_element.text) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 29ad7ded7..5bc8d3110 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import xml.etree.ElementTree import functools import itertools import json @@ -8,7 +9,6 @@ import re from .common import InfoExtractor from ..compat import ( - compat_etree_Element, compat_HTTPError, compat_str, compat_urllib_error, @@ -318,7 +318,7 @@ class BBCCoUkIE(InfoExtractor): continue captions = self._download_xml( cc_url, programme_id, 'Downloading captions', fatal=False) - if not isinstance(captions, compat_etree_Element): + if not isinstance(captions, xml.etree.ElementTree.Element): continue subtitles['en'] = [ { diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index dcd332b43..60c853898 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import base64 import re import struct +import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor @@ -12,7 +13,6 @@ from ..compat import ( compat_HTTPError, compat_parse_qs, compat_urlparse, - compat_xml_parse_error, ) from ..utils import ( clean_html, @@ -166,7 +166,7 @@ class BrightcoveLegacyIE(InfoExtractor): try: object_doc = compat_etree_fromstring(object_str.encode('utf-8')) - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: return fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 48f302f86..8da21a3dc 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import base64 import collections +import xml.etree.ElementTree import hashlib import itertools import json @@ -17,7 +18,6 @@ import math from ..compat import ( compat_cookiejar_Cookie, compat_cookies_SimpleCookie, - compat_etree_Element, compat_etree_fromstring, compat_expanduser, compat_getpass, @@ -30,7 +30,6 @@ from ..compat import ( compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, - compat_xml_parse_error, ) from ..downloader import FileDownloader from ..downloader.f4m import ( @@ -951,7 +950,7 @@ class InfoExtractor(object): fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return a tuple (xml as an compat_etree_Element, URL handle). + Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). See _download_webpage docstring for arguments specification. """ @@ -972,7 +971,7 @@ class InfoExtractor(object): transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return the xml as an compat_etree_Element. + Return the xml as an xml.etree.ElementTree.Element. See _download_webpage docstring for arguments specification. """ @@ -988,7 +987,7 @@ class InfoExtractor(object): xml_string = transform_source(xml_string) try: return compat_etree_fromstring(xml_string.encode('utf-8')) - except compat_xml_parse_error as ve: + except xml.etree.ElementTree.ParseError as ve: errmsg = '%s: Failed to parse XML ' % video_id if fatal: raise ExtractorError(errmsg, cause=ve) @@ -2008,7 +2007,7 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): - if not isinstance(manifest, compat_etree_Element) and not fatal: + if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: return [] # currently yt-dlp cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 7edb645f8..d7696bbd9 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -6,13 +6,13 @@ import re import json import zlib +import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor from .vrv import VRVBaseIE from ..compat import ( compat_b64decode, - compat_etree_Element, compat_etree_fromstring, compat_str, compat_urllib_parse_urlencode, @@ -395,7 +395,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if not isinstance(sub_doc, compat_etree_Element): + if not isinstance(sub_doc, xml.etree.ElementTree.Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -525,7 +525,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if isinstance(streamdata, compat_etree_Element): + if isinstance(streamdata, xml.etree.ElementTree.Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -536,7 +536,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if isinstance(stream_info, compat_etree_Element): + if isinstance(stream_info, xml.etree.ElementTree.Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -611,7 +611,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text season = episode = episode_number = duration = None - if isinstance(metadata, compat_etree_Element): + if isinstance(metadata, xml.etree.ElementTree.Element): season = xpath_text(metadata, 'series_title') episode = xpath_text(metadata, 'episode_title') episode_number = int_or_none(xpath_text(metadata, 'episode_number')) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index f11fc844d..fd620217e 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import os import re +import xml.etree.ElementTree from .common import InfoExtractor from .youtube import YoutubeIE @@ -12,7 +13,6 @@ from ..compat import ( compat_str, compat_urllib_parse_unquote, compat_urlparse, - compat_xml_parse_error, ) from ..utils import ( determine_ext, @@ -2827,7 +2827,7 @@ class GenericIE(InfoExtractor): try: try: doc = compat_etree_fromstring(webpage) - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': self.report_detected('RSS feed') @@ -2862,7 +2862,7 @@ class GenericIE(InfoExtractor): self.report_detected('F4M manifest') self._sort_formats(info_dict['formats']) return info_dict - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: pass # Is it a Camtasia project? diff --git a/yt_dlp/extractor/microsoftvirtualacademy.py b/yt_dlp/extractor/microsoftvirtualacademy.py index 46abd2a6d..9255a7964 100644 --- a/yt_dlp/extractor/microsoftvirtualacademy.py +++ b/yt_dlp/extractor/microsoftvirtualacademy.py @@ -3,9 +3,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_xpath, -) from ..utils import ( int_or_none, parse_duration, @@ -70,9 +67,9 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): formats = [] - for sources in settings.findall(compat_xpath('.//MediaSources')): + for sources in settings.findall('.//MediaSources'): sources_type = sources.get('videoType') - for source in sources.findall(compat_xpath('./MediaSource')): + for source in sources.findall('./MediaSource'): video_url = source.text if not video_url or not video_url.startswith('http'): continue @@ -101,7 +98,7 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): self._sort_formats(formats) subtitles = {} - for source in settings.findall(compat_xpath('.//MarkerResourceSource')): + for source in settings.findall('.//MarkerResourceSource'): subtitle_url = source.text if not subtitle_url: continue diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index 5f2df29c6..4de8e9ef4 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import functools import json +import uuid from .common import InfoExtractor from ..utils import ( @@ -11,7 +12,6 @@ from ..utils import ( ExtractorError, float_or_none, OnDemandPagedList, - random_uuidv4, traverse_obj, ) @@ -21,7 +21,7 @@ class MildomBaseIE(InfoExtractor): def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None): if not self._GUEST_ID: - self._GUEST_ID = f'pc-gp-{random_uuidv4()}' + self._GUEST_ID = f'pc-gp-{str(uuid.uuid4())}' content = self._download_json( url, video_id, note=note, data=json.dumps(body).encode() if body else None, diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py index c2dd078ac..b19e59b1a 100644 --- a/yt_dlp/extractor/mixcloud.py +++ b/yt_dlp/extractor/mixcloud.py @@ -9,7 +9,6 @@ from ..compat import ( compat_ord, compat_str, compat_urllib_parse_unquote, - compat_zip ) from ..utils import ( ExtractorError, @@ -76,7 +75,7 @@ class MixcloudIE(MixcloudBaseIE): """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" return ''.join([ compat_chr(compat_ord(ch) ^ compat_ord(k)) - for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) + for ch, k in zip(ciphertext, itertools.cycle(key))]) def _real_extract(self, url): username, slug = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index be5de0a70..cff314e27 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, - compat_xpath, ) from ..utils import ( ExtractorError, @@ -167,9 +166,9 @@ class MTVServicesInfoExtractor(InfoExtractor): itemdoc, './/{http://search.yahoo.com/mrss/}category', 'scheme', 'urn:mtvn:video_title') if title_el is None: - title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title')) + title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') if title_el is None: - title_el = itemdoc.find(compat_xpath('.//title')) + title_el = itemdoc.find('.//title') if title_el.text is None: title_el = None diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index ccafd7723..bdc2efcd7 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote, - compat_xpath, ) from ..utils import ( int_or_none, @@ -50,7 +49,7 @@ class NozIE(InfoExtractor): duration = int_or_none(xpath_text( doc, './/article/movie/file/duration')) formats = [] - for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')): + for qnode in doc.findall('.//article/movie/file/qualities/qual'): http_url_ele = find_xpath_attr( qnode, './html_urls/video_url', 'format', 'video/mp4') http_url = http_url_ele.text if http_url_ele is not None else None diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index fe4740aae..c19d04900 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -8,7 +8,6 @@ import tempfile from ..compat import ( compat_urlparse, - compat_kwargs, ) from ..utils import ( check_executable, @@ -158,7 +157,7 @@ class PhantomJSwrapper(object): cookie['rest'] = {'httpOnly': None} if 'expiry' in cookie: cookie['expire_time'] = cookie['expiry'] - self.extractor._set_cookie(**compat_kwargs(cookie)) + self.extractor._set_cookie(**cookie) def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): """ diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index bbc79c2be..749e6dda3 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -12,7 +12,6 @@ from .common import ( ) from ..compat import ( compat_HTTPError, - compat_kwargs, compat_str, ) from ..utils import ( @@ -96,7 +95,7 @@ class SoundcloudBaseIE(InfoExtractor): query['client_id'] = self._CLIENT_ID kwargs['query'] = query try: - return super()._download_json(*args, **compat_kwargs(kwargs)) + return super()._download_json(*args, **kwargs) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): self._store_client_id(None) diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 235f89713..77485247f 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_kwargs, compat_str, compat_urllib_request, compat_urlparse, @@ -132,7 +131,7 @@ class UdemyIE(InfoExtractor): headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' kwargs['headers'] = headers ret = super(UdemyIE, self)._download_webpage_handle( - *args, **compat_kwargs(kwargs)) + *args, **kwargs) if not ret: return ret webpage, _ = ret diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 972fb480b..a00b387f3 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -8,7 +8,6 @@ import itertools from .common import InfoExtractor from ..compat import ( - compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -109,7 +108,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): vimeo_config = self._search_regex( r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', - webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + webpage, 'vimeo config', *args, **kwargs) if vimeo_config: return self._parse_json(vimeo_config, video_id) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 67db6d067..8839b44d4 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -3,14 +3,13 @@ from __future__ import unicode_literals import os.path import optparse import re +import shlex import sys from .compat import ( compat_expanduser, compat_get_terminal_size, compat_getenv, - compat_kwargs, - compat_shlex_split, ) from .utils import ( Config, @@ -223,14 +222,12 @@ def create_parser(): fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) fmt.format_option_strings = _format_option_string - kw = { - 'version': __version__, - 'formatter': fmt, - 'usage': '%prog [OPTIONS] URL [URL...]', - 'conflict_handler': 'resolve', - } - - parser = _YoutubeDLOptionParser(**compat_kwargs(kw)) + parser = _YoutubeDLOptionParser( + version=__version__, + formatter=fmt, + usage='%prog [OPTIONS] URL [URL...]', + conflict_handler='resolve' + ) general = optparse.OptionGroup(parser, 'General Options') general.add_option( @@ -833,7 +830,7 @@ def create_parser(): callback_kwargs={ 'allowed_keys': r'ffmpeg_[io]\d*|%s' % '|'.join(map(re.escape, list_external_downloaders())), 'default_key': 'default', - 'process': compat_shlex_split + 'process': shlex.split }, help=( 'Give these arguments to the external downloader. ' 'Specify the downloader name and the arguments separated by a colon ":". ' @@ -1339,7 +1336,7 @@ def create_parser(): callback_kwargs={ 'allowed_keys': r'\w+(?:\+\w+)?', 'default_key': 'default-compat', - 'process': compat_shlex_split, + 'process': shlex.split, 'multiple_keys': False }, help=( 'Give these arguments to the postprocessors. ' diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 86149aeef..59cf0e0c3 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -1,9 +1,9 @@ from __future__ import unicode_literals import os +import shlex import subprocess from .common import PostProcessor -from ..compat import compat_shlex_split from ..utils import ( check_executable, cli_option, @@ -79,7 +79,7 @@ class SponSkrubPP(PostProcessor): if not self.cutout: cmd += ['-chapter'] cmd += cli_option(self._downloader.params, '-proxy', 'proxy') - cmd += compat_shlex_split(self.args) # For backward compatibility + cmd += shlex.split(self.args) # For backward compatibility cmd += self._configuration_args(self._exe_name, use_compat=False) cmd += ['--', information['id'], filename, temp_filename] cmd = [encodeArgument(i) for i in cmd] diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 324b54e78..3f70b1f60 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -41,12 +41,13 @@ import traceback import xml.etree.ElementTree import zlib import mimetypes +import urllib.parse +import shlex from .compat import ( compat_HTMLParseError, compat_HTMLParser, compat_HTTPError, - compat_basestring, compat_brotli, compat_chr, compat_cookiejar, @@ -55,28 +56,19 @@ from .compat import ( compat_html_entities, compat_html_entities_html5, compat_http_client, - compat_integer_types, - compat_numeric_types, - compat_kwargs, compat_os_name, compat_parse_qs, - compat_shlex_split, compat_shlex_quote, compat_str, compat_struct_pack, compat_struct_unpack, compat_urllib_error, - compat_urllib_parse, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, - compat_urllib_parse_urlunparse, - compat_urllib_parse_quote, - compat_urllib_parse_quote_plus, compat_urllib_parse_unquote_plus, compat_urllib_request, compat_urlparse, compat_websockets, - compat_xpath, ) from .socks import ( @@ -340,7 +332,7 @@ def xpath_with_ns(path, ns_map): def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT): def _find_xpath(xpath): - return node.find(compat_xpath(xpath)) + return node.find(xpath) if isinstance(xpath, (str, compat_str)): n = _find_xpath(xpath) @@ -1193,7 +1185,7 @@ class XAttrUnavailableError(YoutubeDLError): def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): - hc = http_class(*args, **compat_kwargs(kwargs)) + hc = http_class(*args, **kwargs) source_address = ydl_handler._params.get('source_address') if source_address is not None: @@ -2401,7 +2393,7 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if isinstance(int_str, compat_integer_types): + if isinstance(int_str, int): return int_str elif isinstance(int_str, compat_str): int_str = re.sub(r'[,\.\+]', '', int_str) @@ -2442,7 +2434,7 @@ def request_to_url(req): def strftime_or_none(timestamp, date_format, default=None): datetime_object = None try: - if isinstance(timestamp, compat_numeric_types): # unix timestamp + if isinstance(timestamp, (int, float)): # unix timestamp datetime_object = datetime.datetime.utcfromtimestamp(timestamp) elif isinstance(timestamp, compat_str): # assume YYYYMMDD datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d') @@ -2452,7 +2444,7 @@ def strftime_or_none(timestamp, date_format, default=None): def parse_duration(s): - if not isinstance(s, compat_basestring): + if not isinstance(s, str): return None s = s.strip() if not s: @@ -2789,7 +2781,7 @@ def lowercase_escape(s): def escape_rfc3986(s): """Escape non-ASCII characters as suggested by RFC 3986""" - return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") + return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") def escape_url(url): @@ -2975,7 +2967,7 @@ TV_PARENTAL_GUIDELINES = { def parse_age_limit(s): if type(s) == int: return s if 0 <= s <= 21 else None - if not isinstance(s, compat_basestring): + if not isinstance(s, str): return None m = re.match(r'^(?P\d{1,2})\+?$', s) if m: @@ -3405,7 +3397,7 @@ def _match_one(filter_part, dct, incomplete): comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote']) actual_value = dct.get(m['key']) numeric_comparison = None - if isinstance(actual_value, compat_numeric_types): + if isinstance(actual_value, (int, float)): # If the original field is a string and matching comparisonvalue is # a number we should respect the origin of the original field # and process comparison value as a string (see @@ -4859,9 +4851,9 @@ def iri_to_uri(iri): net_location = '' if iri_parts.username: - net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~") + net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~") if iri_parts.password is not None: - net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~") + net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~") net_location += '@' net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames. @@ -4869,19 +4861,19 @@ def iri_to_uri(iri): if iri_parts.port is not None and iri_parts.port != 80: net_location += ':' + str(iri_parts.port) - return compat_urllib_parse_urlunparse( + return urllib.parse.urlunparse( (iri_parts.scheme, net_location, - compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"), + urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"), # Unsure about the `safe` argument, since this is a legacy way of handling parameters. - compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"), + urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"), # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component. - compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"), + urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"), - compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~"))) + urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~"))) # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes. @@ -5233,7 +5225,7 @@ class Config: try: # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56 contents = optionf.read() - res = compat_shlex_split(contents, comments=True) + res = shlex.split(contents, comments=True) finally: optionf.close() return res diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 962aa57ad..c78078f17 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -15,7 +15,6 @@ import re import io from .utils import int_or_none, timetuple_from_msec from .compat import ( - compat_str as str, compat_Pattern, compat_Match, ) From 86e5f3ed2e6e71eb81ea4c9e26288f16119ffd0c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Apr 2022 20:40:28 +0530 Subject: [PATCH 0942/2552] [cleanup] Upgrade syntax Using https://github.com/asottile/pyupgrade 1. `__future__` imports and `coding: utf-8` were removed 2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format` 3. f-strings were cherry-picked from `pyupgrade --py36-plus` Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts --- CONTRIBUTING.md | 1 - devscripts/bash-completion.py | 4 +- devscripts/check-porn.py | 10 +- devscripts/fish-completion.py | 4 +- devscripts/generate_aes_testdata.py | 2 - devscripts/lazy_load_template.py | 1 - devscripts/make_contributing.py | 7 +- devscripts/make_issue_template.py | 7 +- devscripts/make_lazy_extractors.py | 13 +-- devscripts/make_readme.py | 8 +- devscripts/make_supportedsites.py | 9 +- devscripts/prepare_manpage.py | 7 +- devscripts/update-formulae.py | 4 +- devscripts/update-version.py | 2 +- devscripts/zsh-completion.py | 4 +- pyinst.py | 1 - setup.py | 1 - test/helper.py | 48 ++++---- test/test_InfoExtractor.py | 19 +--- test/test_YoutubeDL.py | 30 +++-- test/test_YoutubeDLCookieJar.py | 4 - test/test_aes.py | 2 - test/test_age_restriction.py | 2 - test/test_all_urls.py | 9 +- test/test_cache.py | 4 - test/test_compat.py | 6 +- test/test_download.py | 14 +-- test/test_downloader_http.py | 5 +- test/test_execution.py | 6 +- test/test_http.py | 15 +-- test/test_iqiyi_sdk_interpreter.py | 5 +- test/test_jsinterp.py | 3 - test/test_netrc.py | 3 - test/test_overwrites.py | 2 - test/test_post_hooks.py | 5 +- test/test_postprocessors.py | 3 - test/test_socks.py | 3 - test/test_subtitles.py | 38 +++---- test/test_update.py.disabled | 3 - test/test_utils.py | 21 ++-- test/test_verbose_output.py | 4 - test/test_write_annotations.py.disabled | 7 +- test/test_youtube_lists.py | 2 - test/test_youtube_misc.py | 2 - test/test_youtube_signature.py | 6 +- yt_dlp/YoutubeDL.py | 80 +++++++------- yt_dlp/__init__.py | 10 +- yt_dlp/__main__.py | 2 - yt_dlp/aes.py | 2 - yt_dlp/cache.py | 19 ++-- yt_dlp/compat.py | 2 - yt_dlp/cookies.py | 58 +++++----- yt_dlp/downloader/__init__.py | 2 - yt_dlp/downloader/common.py | 8 +- yt_dlp/downloader/dash.py | 3 +- yt_dlp/downloader/external.py | 24 ++-- yt_dlp/downloader/f4m.py | 2 - yt_dlp/downloader/fc2.py | 2 - yt_dlp/downloader/fragment.py | 8 +- yt_dlp/downloader/hls.py | 5 +- yt_dlp/downloader/http.py | 12 +- yt_dlp/downloader/ism.py | 2 - yt_dlp/downloader/mhtml.py | 3 - yt_dlp/downloader/niconico.py | 3 - yt_dlp/downloader/rtmp.py | 2 - yt_dlp/downloader/rtsp.py | 4 +- yt_dlp/downloader/youtube_live_chat.py | 2 - yt_dlp/extractor/abc.py | 2 - yt_dlp/extractor/abcnews.py | 4 - yt_dlp/extractor/abcotvs.py | 4 - yt_dlp/extractor/academicearth.py | 2 - yt_dlp/extractor/acast.py | 4 - yt_dlp/extractor/adn.py | 3 - yt_dlp/extractor/adobeconnect.py | 3 - yt_dlp/extractor/adobepass.py | 3 - yt_dlp/extractor/adobetv.py | 2 - yt_dlp/extractor/adultswim.py | 3 - yt_dlp/extractor/aenetworks.py | 4 - yt_dlp/extractor/afreecatv.py | 3 - yt_dlp/extractor/airmozilla.py | 3 - yt_dlp/extractor/aliexpress.py | 3 - yt_dlp/extractor/aljazeera.py | 3 - yt_dlp/extractor/allocine.py | 3 - yt_dlp/extractor/alphaporno.py | 2 - yt_dlp/extractor/alsace20tv.py | 3 - yt_dlp/extractor/alura.py | 3 - yt_dlp/extractor/amara.py | 3 - yt_dlp/extractor/amazon.py | 1 - yt_dlp/extractor/amcnetworks.py | 3 - yt_dlp/extractor/americastestkitchen.py | 3 - yt_dlp/extractor/amp.py | 3 - yt_dlp/extractor/animelab.py | 3 - yt_dlp/extractor/animeondemand.py | 2 - yt_dlp/extractor/ant1newsgr.py | 3 - yt_dlp/extractor/anvato.py | 3 - .../anvato_token_generator/__init__.py | 2 - .../anvato_token_generator/common.py | 3 - .../extractor/anvato_token_generator/nfl.py | 2 - yt_dlp/extractor/aol.py | 3 - yt_dlp/extractor/apa.py | 3 - yt_dlp/extractor/aparat.py | 3 - yt_dlp/extractor/appleconnect.py | 3 - yt_dlp/extractor/applepodcasts.py | 3 - yt_dlp/extractor/appletrailers.py | 2 - yt_dlp/extractor/archiveorg.py | 7 +- yt_dlp/extractor/arcpublishing.py | 3 - yt_dlp/extractor/ard.py | 3 - yt_dlp/extractor/arkena.py | 3 - yt_dlp/extractor/arnes.py | 3 - yt_dlp/extractor/arte.py | 3 - yt_dlp/extractor/asiancrush.py | 3 - yt_dlp/extractor/atresplayer.py | 4 - yt_dlp/extractor/atttechchannel.py | 2 - yt_dlp/extractor/atvat.py | 3 - yt_dlp/extractor/audimedia.py | 3 - yt_dlp/extractor/audioboom.py | 3 - yt_dlp/extractor/audiomack.py | 3 - yt_dlp/extractor/audius.py | 3 - yt_dlp/extractor/awaan.py | 3 - yt_dlp/extractor/aws.py | 3 - yt_dlp/extractor/azmedien.py | 3 - yt_dlp/extractor/baidu.py | 4 - yt_dlp/extractor/banbye.py | 3 - yt_dlp/extractor/bandaichannel.py | 3 - yt_dlp/extractor/bandcamp.py | 3 - yt_dlp/extractor/bannedvideo.py | 2 - yt_dlp/extractor/bbc.py | 3 - yt_dlp/extractor/beatport.py | 3 - yt_dlp/extractor/beeg.py | 2 - yt_dlp/extractor/behindkink.py | 4 - yt_dlp/extractor/bellmedia.py | 4 - yt_dlp/extractor/bet.py | 2 - yt_dlp/extractor/bfi.py | 3 - yt_dlp/extractor/bfmtv.py | 3 - yt_dlp/extractor/bibeltv.py | 3 - yt_dlp/extractor/bigflix.py | 3 - yt_dlp/extractor/bigo.py | 3 - yt_dlp/extractor/bild.py | 3 - yt_dlp/extractor/bilibili.py | 2 - yt_dlp/extractor/biobiochiletv.py | 3 - yt_dlp/extractor/biqle.py | 3 - yt_dlp/extractor/bitchute.py | 3 - yt_dlp/extractor/bitwave.py | 2 - yt_dlp/extractor/blackboardcollaborate.py | 4 - yt_dlp/extractor/bleacherreport.py | 3 - yt_dlp/extractor/blinkx.py | 2 - yt_dlp/extractor/blogger.py | 3 - yt_dlp/extractor/bloomberg.py | 3 - yt_dlp/extractor/bokecc.py | 4 - yt_dlp/extractor/bongacams.py | 3 - yt_dlp/extractor/bostonglobe.py | 3 - yt_dlp/extractor/box.py | 3 - yt_dlp/extractor/bpb.py | 3 - yt_dlp/extractor/br.py | 3 - yt_dlp/extractor/bravotv.py | 3 - yt_dlp/extractor/breakcom.py | 3 - yt_dlp/extractor/breitbart.py | 2 - yt_dlp/extractor/brightcove.py | 3 - yt_dlp/extractor/businessinsider.py | 3 - yt_dlp/extractor/buzzfeed.py | 3 - yt_dlp/extractor/byutv.py | 3 - yt_dlp/extractor/c56.py | 4 - yt_dlp/extractor/cableav.py | 1 - yt_dlp/extractor/callin.py | 1 - yt_dlp/extractor/caltrans.py | 3 - yt_dlp/extractor/cam4.py | 3 - yt_dlp/extractor/camdemy.py | 3 - yt_dlp/extractor/cammodels.py | 3 - yt_dlp/extractor/camwithher.py | 2 - yt_dlp/extractor/canalalpha.py | 3 - yt_dlp/extractor/canalc2.py | 3 - yt_dlp/extractor/canalplus.py | 4 - yt_dlp/extractor/canvas.py | 1 - yt_dlp/extractor/carambatv.py | 3 - yt_dlp/extractor/cartoonnetwork.py | 3 - yt_dlp/extractor/cbc.py | 3 - yt_dlp/extractor/cbs.py | 2 - yt_dlp/extractor/cbsinteractive.py | 4 - yt_dlp/extractor/cbslocal.py | 3 - yt_dlp/extractor/cbsnews.py | 3 - yt_dlp/extractor/cbssports.py | 3 - yt_dlp/extractor/ccc.py | 3 - yt_dlp/extractor/ccma.py | 3 - yt_dlp/extractor/cctv.py | 3 - yt_dlp/extractor/cda.py | 3 - yt_dlp/extractor/ceskatelevize.py | 3 - yt_dlp/extractor/cgtn.py | 3 - yt_dlp/extractor/channel9.py | 2 - yt_dlp/extractor/charlierose.py | 2 - yt_dlp/extractor/chaturbate.py | 2 - yt_dlp/extractor/chilloutzone.py | 2 - yt_dlp/extractor/chingari.py | 3 - yt_dlp/extractor/chirbit.py | 3 - yt_dlp/extractor/cinchcast.py | 3 - yt_dlp/extractor/cinemax.py | 4 - yt_dlp/extractor/ciscolive.py | 3 - yt_dlp/extractor/ciscowebex.py | 3 - yt_dlp/extractor/cjsw.py | 4 - yt_dlp/extractor/cliphunter.py | 2 - yt_dlp/extractor/clippit.py | 4 - yt_dlp/extractor/cliprs.py | 3 - yt_dlp/extractor/clipsyndicate.py | 2 - yt_dlp/extractor/closertotruth.py | 3 - yt_dlp/extractor/cloudflarestream.py | 3 - yt_dlp/extractor/cloudy.py | 3 - yt_dlp/extractor/clubic.py | 3 - yt_dlp/extractor/clyp.py | 2 - yt_dlp/extractor/cmt.py | 2 - yt_dlp/extractor/cnbc.py | 4 - yt_dlp/extractor/cnn.py | 3 - yt_dlp/extractor/comedycentral.py | 2 - yt_dlp/extractor/common.py | 37 +++---- yt_dlp/extractor/commonmistakes.py | 2 - yt_dlp/extractor/commonprotocols.py | 3 - yt_dlp/extractor/condenast.py | 3 - yt_dlp/extractor/contv.py | 3 - yt_dlp/extractor/corus.py | 4 - yt_dlp/extractor/coub.py | 3 - yt_dlp/extractor/cozytv.py | 3 - yt_dlp/extractor/cpac.py | 3 - yt_dlp/extractor/cracked.py | 2 - yt_dlp/extractor/crackle.py | 3 - yt_dlp/extractor/craftsy.py | 3 - yt_dlp/extractor/crooksandliars.py | 2 - yt_dlp/extractor/crowdbunker.py | 3 - yt_dlp/extractor/crunchyroll.py | 3 - yt_dlp/extractor/cspan.py | 2 - yt_dlp/extractor/ctsnews.py | 3 - yt_dlp/extractor/ctv.py | 3 - yt_dlp/extractor/ctvnews.py | 3 - yt_dlp/extractor/cultureunplugged.py | 2 - yt_dlp/extractor/curiositystream.py | 3 - yt_dlp/extractor/cwtv.py | 3 - yt_dlp/extractor/cybrary.py | 3 +- yt_dlp/extractor/daftsex.py | 3 - yt_dlp/extractor/dailymail.py | 3 - yt_dlp/extractor/dailymotion.py | 3 - yt_dlp/extractor/damtomo.py | 3 - yt_dlp/extractor/daum.py | 4 - yt_dlp/extractor/dbtv.py | 3 - yt_dlp/extractor/dctp.py | 3 - yt_dlp/extractor/deezer.py | 2 - yt_dlp/extractor/defense.py | 2 - yt_dlp/extractor/democracynow.py | 3 - yt_dlp/extractor/dfb.py | 3 - yt_dlp/extractor/dhm.py | 2 - yt_dlp/extractor/digg.py | 2 - yt_dlp/extractor/digitalconcerthall.py | 3 - yt_dlp/extractor/digiteka.py | 3 - yt_dlp/extractor/discovery.py | 2 - yt_dlp/extractor/discoverygo.py | 2 - yt_dlp/extractor/discoveryvr.py | 3 - yt_dlp/extractor/disney.py | 3 - yt_dlp/extractor/dispeak.py | 2 - yt_dlp/extractor/dlive.py | 2 - yt_dlp/extractor/doodstream.py | 3 - yt_dlp/extractor/dotsub.py | 2 - yt_dlp/extractor/douyutv.py | 3 - yt_dlp/extractor/dplay.py | 3 - yt_dlp/extractor/drbonanza.py | 3 - yt_dlp/extractor/dreisat.py | 2 - yt_dlp/extractor/drooble.py | 3 - yt_dlp/extractor/dropbox.py | 3 - yt_dlp/extractor/dropout.py | 1 - yt_dlp/extractor/drtuber.py | 2 - yt_dlp/extractor/drtv.py | 3 - yt_dlp/extractor/dtube.py | 3 - yt_dlp/extractor/duboku.py | 3 - yt_dlp/extractor/dumpert.py | 3 - yt_dlp/extractor/dvtv.py | 3 - yt_dlp/extractor/dw.py | 3 - yt_dlp/extractor/eagleplatform.py | 3 - yt_dlp/extractor/ebaumsworld.py | 2 - yt_dlp/extractor/echomsk.py | 3 - yt_dlp/extractor/egghead.py | 3 - yt_dlp/extractor/ehow.py | 2 - yt_dlp/extractor/eighttracks.py | 3 - yt_dlp/extractor/einthusan.py | 3 - yt_dlp/extractor/eitb.py | 3 - yt_dlp/extractor/ellentube.py | 3 - yt_dlp/extractor/elonet.py | 3 - yt_dlp/extractor/elpais.py | 3 - yt_dlp/extractor/embedly.py | 3 - yt_dlp/extractor/engadget.py | 2 - yt_dlp/extractor/epicon.py | 3 - yt_dlp/extractor/eporner.py | 4 - yt_dlp/extractor/eroprofile.py | 2 - yt_dlp/extractor/ertgr.py | 3 - yt_dlp/extractor/escapist.py | 2 - yt_dlp/extractor/espn.py | 2 - yt_dlp/extractor/esri.py | 3 - yt_dlp/extractor/europa.py | 3 - yt_dlp/extractor/europeantour.py | 3 - yt_dlp/extractor/euscreen.py | 3 - yt_dlp/extractor/everyonesmixtape.py | 3 - yt_dlp/extractor/expotv.py | 2 - yt_dlp/extractor/expressen.py | 3 - yt_dlp/extractor/extractors.py | 2 - yt_dlp/extractor/extremetube.py | 2 - yt_dlp/extractor/eyedotv.py | 3 - yt_dlp/extractor/facebook.py | 3 - yt_dlp/extractor/fancode.py | 3 - yt_dlp/extractor/faz.py | 3 - yt_dlp/extractor/fc2.py | 3 - yt_dlp/extractor/fczenit.py | 3 - yt_dlp/extractor/filmmodu.py | 3 - yt_dlp/extractor/filmon.py | 3 - yt_dlp/extractor/filmweb.py | 3 - yt_dlp/extractor/firsttv.py | 3 - yt_dlp/extractor/fivetv.py | 4 - yt_dlp/extractor/flickr.py | 2 - yt_dlp/extractor/folketinget.py | 3 - yt_dlp/extractor/footyroom.py | 3 - yt_dlp/extractor/formula1.py | 3 - yt_dlp/extractor/fourtube.py | 2 - yt_dlp/extractor/fox.py | 3 - yt_dlp/extractor/fox9.py | 3 - yt_dlp/extractor/foxgay.py | 2 - yt_dlp/extractor/foxnews.py | 2 - yt_dlp/extractor/foxsports.py | 2 - yt_dlp/extractor/fptplay.py | 3 - yt_dlp/extractor/franceculture.py | 3 - yt_dlp/extractor/franceinter.py | 3 - yt_dlp/extractor/francetv.py | 5 - yt_dlp/extractor/freesound.py | 2 - yt_dlp/extractor/freespeech.py | 2 - yt_dlp/extractor/frontendmasters.py | 3 - yt_dlp/extractor/fujitv.py | 2 - yt_dlp/extractor/funimation.py | 3 - yt_dlp/extractor/funk.py | 4 - yt_dlp/extractor/fusion.py | 2 - yt_dlp/extractor/fxnetworks.py | 3 - yt_dlp/extractor/gab.py | 3 - yt_dlp/extractor/gaia.py | 4 - yt_dlp/extractor/gameinformer.py | 3 - yt_dlp/extractor/gamejolt.py | 1 - yt_dlp/extractor/gamespot.py | 2 - yt_dlp/extractor/gamestar.py | 4 - yt_dlp/extractor/gaskrank.py | 3 - yt_dlp/extractor/gazeta.py | 4 - yt_dlp/extractor/gdcvault.py | 2 - yt_dlp/extractor/gedidigital.py | 3 - yt_dlp/extractor/generic.py | 10 +- yt_dlp/extractor/gettr.py | 3 - yt_dlp/extractor/gfycat.py | 3 - yt_dlp/extractor/giantbomb.py | 2 - yt_dlp/extractor/giga.py | 3 - yt_dlp/extractor/gigya.py | 2 - yt_dlp/extractor/glide.py | 3 - yt_dlp/extractor/globo.py | 3 - yt_dlp/extractor/glomex.py | 3 - yt_dlp/extractor/go.py | 3 - yt_dlp/extractor/godtube.py | 3 - yt_dlp/extractor/gofile.py | 1 - yt_dlp/extractor/golem.py | 3 - yt_dlp/extractor/googledrive.py | 2 - yt_dlp/extractor/googlepodcasts.py | 3 - yt_dlp/extractor/googlesearch.py | 2 - yt_dlp/extractor/gopro.py | 3 - yt_dlp/extractor/goshgay.py | 3 - yt_dlp/extractor/gotostage.py | 3 - yt_dlp/extractor/gputechconf.py | 3 - yt_dlp/extractor/gronkh.py | 3 - yt_dlp/extractor/groupon.py | 2 - yt_dlp/extractor/hbo.py | 3 - yt_dlp/extractor/hearthisat.py | 4 - yt_dlp/extractor/heise.py | 3 - yt_dlp/extractor/hellporno.py | 2 - yt_dlp/extractor/helsinki.py | 4 - yt_dlp/extractor/hentaistigma.py | 2 - yt_dlp/extractor/hgtv.py | 3 - yt_dlp/extractor/hidive.py | 1 - yt_dlp/extractor/historicfilms.py | 2 - yt_dlp/extractor/hitbox.py | 3 - yt_dlp/extractor/hitrecord.py | 2 - yt_dlp/extractor/hketv.py | 3 - yt_dlp/extractor/hotnewhiphop.py | 2 - yt_dlp/extractor/hotstar.py | 3 - yt_dlp/extractor/howcast.py | 2 - yt_dlp/extractor/howstuffworks.py | 2 - yt_dlp/extractor/hrfensehen.py | 3 - yt_dlp/extractor/hrti.py | 3 - yt_dlp/extractor/hse.py | 1 - yt_dlp/extractor/huajiao.py | 3 - yt_dlp/extractor/huffpost.py | 2 - yt_dlp/extractor/hungama.py | 3 - yt_dlp/extractor/huya.py | 3 - yt_dlp/extractor/hypem.py | 2 - yt_dlp/extractor/ichinanalive.py | 3 - yt_dlp/extractor/ign.py | 2 - yt_dlp/extractor/iheart.py | 3 - yt_dlp/extractor/imdb.py | 2 - yt_dlp/extractor/imggaming.py | 3 - yt_dlp/extractor/imgur.py | 2 - yt_dlp/extractor/ina.py | 3 - yt_dlp/extractor/inc.py | 2 - yt_dlp/extractor/indavideo.py | 3 - yt_dlp/extractor/infoq.py | 4 - yt_dlp/extractor/instagram.py | 2 - yt_dlp/extractor/internazionale.py | 3 - yt_dlp/extractor/internetvideoarchive.py | 2 - yt_dlp/extractor/iprima.py | 3 - yt_dlp/extractor/iqiyi.py | 3 - yt_dlp/extractor/itprotv.py | 2 - yt_dlp/extractor/itv.py | 3 - yt_dlp/extractor/ivi.py | 3 - yt_dlp/extractor/ivideon.py | 4 - yt_dlp/extractor/iwara.py | 2 - yt_dlp/extractor/izlesene.py | 3 - yt_dlp/extractor/jable.py | 3 - yt_dlp/extractor/jamendo.py | 3 - yt_dlp/extractor/jeuxvideo.py | 5 - yt_dlp/extractor/joj.py | 3 - yt_dlp/extractor/jove.py | 3 - yt_dlp/extractor/jwplatform.py | 3 - yt_dlp/extractor/kakao.py | 4 - yt_dlp/extractor/kaltura.py | 3 - yt_dlp/extractor/kanalplay.py | 4 - yt_dlp/extractor/karaoketv.py | 3 - yt_dlp/extractor/karrierevideos.py | 3 - yt_dlp/extractor/keezmovies.py | 2 - yt_dlp/extractor/kelbyone.py | 3 - yt_dlp/extractor/ketnet.py | 2 - yt_dlp/extractor/khanacademy.py | 2 - yt_dlp/extractor/kickstarter.py | 3 - yt_dlp/extractor/kinja.py | 3 - yt_dlp/extractor/kinopoisk.py | 3 - yt_dlp/extractor/konserthusetplay.py | 3 - yt_dlp/extractor/koo.py | 2 - yt_dlp/extractor/krasview.py | 3 - yt_dlp/extractor/ku6.py | 2 - yt_dlp/extractor/kusi.py | 3 - yt_dlp/extractor/kuwo.py | 3 - yt_dlp/extractor/la7.py | 3 - yt_dlp/extractor/laola1tv.py | 3 - yt_dlp/extractor/lastfm.py | 3 - yt_dlp/extractor/lbry.py | 3 - yt_dlp/extractor/lci.py | 3 - yt_dlp/extractor/lcp.py | 3 - yt_dlp/extractor/lecture2go.py | 3 - yt_dlp/extractor/lecturio.py | 3 - yt_dlp/extractor/leeco.py | 3 - yt_dlp/extractor/lego.py | 3 - yt_dlp/extractor/lemonde.py | 2 - yt_dlp/extractor/lenta.py | 3 - yt_dlp/extractor/libraryofcongress.py | 3 - yt_dlp/extractor/libsyn.py | 4 - yt_dlp/extractor/lifenews.py | 3 - yt_dlp/extractor/limelight.py | 3 - yt_dlp/extractor/line.py | 4 - yt_dlp/extractor/linkedin.py | 3 - yt_dlp/extractor/linuxacademy.py | 2 - yt_dlp/extractor/litv.py | 3 - yt_dlp/extractor/livejournal.py | 3 - yt_dlp/extractor/livestream.py | 2 - yt_dlp/extractor/lnkgo.py | 4 - yt_dlp/extractor/localnews8.py | 4 - yt_dlp/extractor/lovehomeporn.py | 3 - yt_dlp/extractor/lrt.py | 4 - yt_dlp/extractor/lynda.py | 2 - yt_dlp/extractor/m6.py | 3 - yt_dlp/extractor/magentamusik360.py | 3 - yt_dlp/extractor/mailru.py | 3 - yt_dlp/extractor/mainstreaming.py | 1 - yt_dlp/extractor/malltv.py | 3 - yt_dlp/extractor/mangomolo.py | 3 - yt_dlp/extractor/manoto.py | 3 - yt_dlp/extractor/manyvids.py | 3 - yt_dlp/extractor/maoritv.py | 3 - yt_dlp/extractor/markiza.py | 3 - yt_dlp/extractor/massengeschmacktv.py | 2 - yt_dlp/extractor/matchtv.py | 3 - yt_dlp/extractor/mdr.py | 3 - yt_dlp/extractor/medaltv.py | 3 - yt_dlp/extractor/mediaite.py | 3 - yt_dlp/extractor/mediaklikk.py | 3 - yt_dlp/extractor/medialaan.py | 2 - yt_dlp/extractor/mediaset.py | 3 - yt_dlp/extractor/mediasite.py | 3 - yt_dlp/extractor/medici.py | 3 - yt_dlp/extractor/megaphone.py | 3 - yt_dlp/extractor/megatvcom.py | 3 - yt_dlp/extractor/meipai.py | 3 - yt_dlp/extractor/melonvod.py | 3 - yt_dlp/extractor/meta.py | 3 - yt_dlp/extractor/metacafe.py | 2 - yt_dlp/extractor/metacritic.py | 2 - yt_dlp/extractor/mgoon.py | 4 - yt_dlp/extractor/mgtv.py | 3 - yt_dlp/extractor/miaopai.py | 3 - yt_dlp/extractor/microsoftstream.py | 3 - yt_dlp/extractor/microsoftvirtualacademy.py | 2 - yt_dlp/extractor/mildom.py | 3 - yt_dlp/extractor/minds.py | 3 - yt_dlp/extractor/ministrygrid.py | 2 - yt_dlp/extractor/minoto.py | 4 - yt_dlp/extractor/miomio.py | 3 - yt_dlp/extractor/mirrativ.py | 2 - yt_dlp/extractor/mit.py | 2 - yt_dlp/extractor/mitele.py | 3 - yt_dlp/extractor/mixch.py | 2 - yt_dlp/extractor/mixcloud.py | 2 - yt_dlp/extractor/mlb.py | 2 - yt_dlp/extractor/mlssoccer.py | 3 - yt_dlp/extractor/mnet.py | 3 - yt_dlp/extractor/moevideo.py | 4 - yt_dlp/extractor/mofosex.py | 2 - yt_dlp/extractor/mojvideo.py | 4 - yt_dlp/extractor/morningstar.py | 4 - yt_dlp/extractor/motherless.py | 2 - yt_dlp/extractor/motorsport.py | 3 - yt_dlp/extractor/movieclips.py | 3 - yt_dlp/extractor/moviepilot.py | 3 - yt_dlp/extractor/moviezine.py | 4 - yt_dlp/extractor/movingimage.py | 2 - yt_dlp/extractor/msn.py | 3 - yt_dlp/extractor/mtv.py | 3 - yt_dlp/extractor/muenchentv.py | 3 - yt_dlp/extractor/murrtube.py | 3 - yt_dlp/extractor/musescore.py | 3 - yt_dlp/extractor/musicdex.py | 3 - yt_dlp/extractor/mwave.py | 2 - yt_dlp/extractor/mxplayer.py | 3 - yt_dlp/extractor/mychannels.py | 4 - yt_dlp/extractor/myspace.py | 3 - yt_dlp/extractor/myspass.py | 3 - yt_dlp/extractor/myvi.py | 3 - yt_dlp/extractor/myvideoge.py | 3 - yt_dlp/extractor/myvidster.py | 2 - yt_dlp/extractor/n1.py | 3 - yt_dlp/extractor/nate.py | 3 - yt_dlp/extractor/nationalgeographic.py | 2 - yt_dlp/extractor/naver.py | 3 - yt_dlp/extractor/nba.py | 2 - yt_dlp/extractor/nbc.py | 2 - yt_dlp/extractor/ndr.py | 3 - yt_dlp/extractor/ndtv.py | 3 - yt_dlp/extractor/nebula.py | 3 - yt_dlp/extractor/nerdcubed.py | 3 - yt_dlp/extractor/neteasemusic.py | 3 - yt_dlp/extractor/netzkino.py | 4 - yt_dlp/extractor/newgrounds.py | 3 - yt_dlp/extractor/newstube.py | 3 - yt_dlp/extractor/newsy.py | 3 - yt_dlp/extractor/nextmedia.py | 3 - yt_dlp/extractor/nexx.py | 3 - yt_dlp/extractor/nfb.py | 3 - yt_dlp/extractor/nfhsnetwork.py | 3 - yt_dlp/extractor/nfl.py | 3 - yt_dlp/extractor/nhk.py | 2 - yt_dlp/extractor/nhl.py | 3 - yt_dlp/extractor/nick.py | 4 - yt_dlp/extractor/niconico.py | 3 - yt_dlp/extractor/ninecninemedia.py | 3 - yt_dlp/extractor/ninegag.py | 2 - yt_dlp/extractor/ninenow.py | 3 - yt_dlp/extractor/nintendo.py | 3 - yt_dlp/extractor/nitter.py | 3 - yt_dlp/extractor/njpwworld.py | 3 - yt_dlp/extractor/nobelprize.py | 3 - yt_dlp/extractor/noco.py | 3 - yt_dlp/extractor/nonktube.py | 2 - yt_dlp/extractor/noodlemagazine.py | 3 - yt_dlp/extractor/noovo.py | 3 - yt_dlp/extractor/normalboots.py | 3 - yt_dlp/extractor/nosvideo.py | 3 - yt_dlp/extractor/nova.py | 3 - yt_dlp/extractor/novaplay.py | 1 - yt_dlp/extractor/nowness.py | 3 - yt_dlp/extractor/noz.py | 3 - yt_dlp/extractor/npo.py | 2 - yt_dlp/extractor/npr.py | 2 - yt_dlp/extractor/nrk.py | 3 - yt_dlp/extractor/nrl.py | 3 - yt_dlp/extractor/ntvcojp.py | 3 - yt_dlp/extractor/ntvde.py | 3 - yt_dlp/extractor/ntvru.py | 3 - yt_dlp/extractor/nuevo.py | 3 - yt_dlp/extractor/nuvid.py | 2 - yt_dlp/extractor/nytimes.py | 3 - yt_dlp/extractor/nzherald.py | 3 - yt_dlp/extractor/nzz.py | 3 - yt_dlp/extractor/odatv.py | 3 - yt_dlp/extractor/odnoklassniki.py | 3 - yt_dlp/extractor/oktoberfesttv.py | 3 - yt_dlp/extractor/olympics.py | 3 - yt_dlp/extractor/on24.py | 3 - yt_dlp/extractor/once.py | 3 - yt_dlp/extractor/ondemandkorea.py | 3 - yt_dlp/extractor/onefootball.py | 3 - yt_dlp/extractor/onet.py | 3 - yt_dlp/extractor/onionstudios.py | 3 - yt_dlp/extractor/ooyala.py | 2 - yt_dlp/extractor/opencast.py | 3 - yt_dlp/extractor/openload.py | 11 +- yt_dlp/extractor/openrec.py | 3 - yt_dlp/extractor/ora.py | 3 - yt_dlp/extractor/orf.py | 3 - yt_dlp/extractor/outsidetv.py | 3 - yt_dlp/extractor/packtpub.py | 2 - yt_dlp/extractor/palcomp3.py | 4 - yt_dlp/extractor/pandoratv.py | 4 - yt_dlp/extractor/paramountplus.py | 1 - yt_dlp/extractor/parliamentliveuk.py | 3 - yt_dlp/extractor/parlview.py | 3 - yt_dlp/extractor/patreon.py | 3 - yt_dlp/extractor/pbs.py | 3 - yt_dlp/extractor/pearvideo.py | 3 - yt_dlp/extractor/peekvids.py | 3 - yt_dlp/extractor/peertube.py | 3 - yt_dlp/extractor/peertv.py | 3 - yt_dlp/extractor/peloton.py | 3 - yt_dlp/extractor/people.py | 3 - yt_dlp/extractor/performgroup.py | 4 - yt_dlp/extractor/periscope.py | 3 - yt_dlp/extractor/philharmoniedeparis.py | 3 - yt_dlp/extractor/phoenix.py | 3 - yt_dlp/extractor/photobucket.py | 2 - yt_dlp/extractor/piapro.py | 3 - yt_dlp/extractor/picarto.py | 3 - yt_dlp/extractor/piksel.py | 3 - yt_dlp/extractor/pinkbike.py | 3 - yt_dlp/extractor/pinterest.py | 3 - yt_dlp/extractor/pixivsketch.py | 3 - yt_dlp/extractor/pladform.py | 3 - yt_dlp/extractor/planetmarathi.py | 3 - yt_dlp/extractor/platzi.py | 3 - yt_dlp/extractor/playfm.py | 4 - yt_dlp/extractor/playplustv.py | 3 - yt_dlp/extractor/plays.py | 3 - yt_dlp/extractor/playstuff.py | 2 - yt_dlp/extractor/playtvak.py | 3 - yt_dlp/extractor/playvid.py | 2 - yt_dlp/extractor/playwire.py | 3 - yt_dlp/extractor/pluralsight.py | 2 - yt_dlp/extractor/plutotv.py | 3 - yt_dlp/extractor/podomatic.py | 2 - yt_dlp/extractor/pokemon.py | 3 - yt_dlp/extractor/pokergo.py | 3 - yt_dlp/extractor/polsatgo.py | 3 - yt_dlp/extractor/polskieradio.py | 3 - yt_dlp/extractor/popcorntimes.py | 4 - yt_dlp/extractor/popcorntv.py | 3 - yt_dlp/extractor/porn91.py | 3 - yt_dlp/extractor/porncom.py | 2 - yt_dlp/extractor/pornez.py | 2 - yt_dlp/extractor/pornflip.py | 3 - yt_dlp/extractor/pornhd.py | 3 - yt_dlp/extractor/pornhub.py | 3 - yt_dlp/extractor/pornotube.py | 2 - yt_dlp/extractor/pornovoisines.py | 4 - yt_dlp/extractor/pornoxo.py | 3 - yt_dlp/extractor/presstv.py | 4 - yt_dlp/extractor/projectveritas.py | 3 - yt_dlp/extractor/prosiebensat1.py | 3 - yt_dlp/extractor/prx.py | 3 - yt_dlp/extractor/puhutv.py | 3 - yt_dlp/extractor/puls4.py | 3 - yt_dlp/extractor/pyvideo.py | 2 - yt_dlp/extractor/qqmusic.py | 3 - yt_dlp/extractor/r7.py | 3 - yt_dlp/extractor/radiko.py | 3 - yt_dlp/extractor/radiobremen.py | 4 - yt_dlp/extractor/radiocanada.py | 4 - yt_dlp/extractor/radiode.py | 2 - yt_dlp/extractor/radiofrance.py | 3 - yt_dlp/extractor/radiojavan.py | 2 - yt_dlp/extractor/radiokapital.py | 2 - yt_dlp/extractor/radiozet.py | 1 - yt_dlp/extractor/rai.py | 3 - yt_dlp/extractor/raywenderlich.py | 2 - yt_dlp/extractor/rbmaradio.py | 3 - yt_dlp/extractor/rcs.py | 3 - yt_dlp/extractor/rcti.py | 3 - yt_dlp/extractor/rds.py | 3 - yt_dlp/extractor/redbulltv.py | 4 - yt_dlp/extractor/redgifs.py | 1 - yt_dlp/extractor/redtube.py | 2 - yt_dlp/extractor/regiotv.py | 3 - yt_dlp/extractor/rentv.py | 3 - yt_dlp/extractor/restudy.py | 3 - yt_dlp/extractor/reuters.py | 3 - yt_dlp/extractor/reverbnation.py | 2 - yt_dlp/extractor/rice.py | 3 - yt_dlp/extractor/rmcdecouverte.py | 4 - yt_dlp/extractor/rockstargames.py | 3 - yt_dlp/extractor/rokfin.py | 1 - yt_dlp/extractor/roosterteeth.py | 1 - yt_dlp/extractor/rottentomatoes.py | 2 - yt_dlp/extractor/rozhlas.py | 3 - yt_dlp/extractor/rtbf.py | 3 - yt_dlp/extractor/rte.py | 3 - yt_dlp/extractor/rtl2.py | 3 - yt_dlp/extractor/rtlnl.py | 3 - yt_dlp/extractor/rtnews.py | 3 - yt_dlp/extractor/rtp.py | 3 - yt_dlp/extractor/rtrfm.py | 2 - yt_dlp/extractor/rts.py | 3 - yt_dlp/extractor/rtve.py | 3 - yt_dlp/extractor/rtvnh.py | 3 - yt_dlp/extractor/rtvs.py | 3 - yt_dlp/extractor/ruhd.py | 3 - yt_dlp/extractor/rule34video.py | 2 - yt_dlp/extractor/rumble.py | 3 - yt_dlp/extractor/rutube.py | 3 - yt_dlp/extractor/rutv.py | 3 - yt_dlp/extractor/ruutu.py | 3 - yt_dlp/extractor/ruv.py | 3 - yt_dlp/extractor/safari.py | 3 - yt_dlp/extractor/saitosan.py | 4 - yt_dlp/extractor/samplefocus.py | 3 - yt_dlp/extractor/sapo.py | 3 - yt_dlp/extractor/savefrom.py | 3 - yt_dlp/extractor/sbs.py | 3 - yt_dlp/extractor/screencast.py | 3 - yt_dlp/extractor/screencastomatic.py | 3 - yt_dlp/extractor/scrippsnetworks.py | 3 - yt_dlp/extractor/scte.py | 2 - yt_dlp/extractor/seeker.py | 3 - yt_dlp/extractor/senategov.py | 3 - yt_dlp/extractor/sendtonews.py | 3 - yt_dlp/extractor/servus.py | 3 - yt_dlp/extractor/sevenplus.py | 3 - yt_dlp/extractor/sexu.py | 2 - yt_dlp/extractor/seznamzpravy.py | 3 - yt_dlp/extractor/shahid.py | 3 - yt_dlp/extractor/shared.py | 2 - yt_dlp/extractor/shemaroome.py | 3 - yt_dlp/extractor/showroomlive.py | 3 - yt_dlp/extractor/simplecast.py | 3 - yt_dlp/extractor/sina.py | 4 - yt_dlp/extractor/sixplay.py | 4 - yt_dlp/extractor/skeb.py | 3 - yt_dlp/extractor/sky.py | 3 - yt_dlp/extractor/skyit.py | 3 - yt_dlp/extractor/skylinewebcams.py | 3 - yt_dlp/extractor/skynewsarabia.py | 3 - yt_dlp/extractor/skynewsau.py | 3 - yt_dlp/extractor/slideshare.py | 2 - yt_dlp/extractor/slideslive.py | 3 - yt_dlp/extractor/slutload.py | 2 - yt_dlp/extractor/snotr.py | 4 - yt_dlp/extractor/sohu.py | 3 - yt_dlp/extractor/sonyliv.py | 3 - yt_dlp/extractor/soundcloud.py | 3 - yt_dlp/extractor/soundgasm.py | 3 - yt_dlp/extractor/southpark.py | 3 - yt_dlp/extractor/sovietscloset.py | 3 - yt_dlp/extractor/spankbang.py | 2 - yt_dlp/extractor/spankwire.py | 2 - yt_dlp/extractor/spiegel.py | 3 - yt_dlp/extractor/spiegeltv.py | 2 - yt_dlp/extractor/spike.py | 2 - yt_dlp/extractor/sport5.py | 4 - yt_dlp/extractor/sportbox.py | 3 - yt_dlp/extractor/sportdeutschland.py | 3 - yt_dlp/extractor/spotify.py | 3 - yt_dlp/extractor/spreaker.py | 3 - yt_dlp/extractor/springboardplatform.py | 3 - yt_dlp/extractor/sprout.py | 3 - yt_dlp/extractor/srgssr.py | 4 - yt_dlp/extractor/srmediathek.py | 3 - yt_dlp/extractor/stanfordoc.py | 2 - yt_dlp/extractor/startv.py | 3 - yt_dlp/extractor/steam.py | 2 - yt_dlp/extractor/stitcher.py | 2 - yt_dlp/extractor/storyfire.py | 3 - yt_dlp/extractor/streamable.py | 3 - yt_dlp/extractor/streamanity.py | 3 - yt_dlp/extractor/streamcloud.py | 3 - yt_dlp/extractor/streamcz.py | 1 - yt_dlp/extractor/streamff.py | 1 - yt_dlp/extractor/streetvoice.py | 3 - yt_dlp/extractor/stretchinternet.py | 2 - yt_dlp/extractor/stripchat.py | 3 - yt_dlp/extractor/stv.py | 4 - yt_dlp/extractor/sunporno.py | 2 - yt_dlp/extractor/sverigesradio.py | 3 - yt_dlp/extractor/svt.py | 3 - yt_dlp/extractor/swrmediathek.py | 3 - yt_dlp/extractor/syfy.py | 2 - yt_dlp/extractor/sztvhu.py | 3 - yt_dlp/extractor/tagesschau.py | 3 - yt_dlp/extractor/tass.py | 3 - yt_dlp/extractor/tastytrade.py | 2 - yt_dlp/extractor/tbs.py | 3 - yt_dlp/extractor/tdslifeway.py | 2 - yt_dlp/extractor/teachable.py | 2 - yt_dlp/extractor/teachertube.py | 3 - yt_dlp/extractor/teachingchannel.py | 2 - yt_dlp/extractor/teamcoco.py | 3 - yt_dlp/extractor/teamtreehouse.py | 3 - yt_dlp/extractor/techtalks.py | 2 - yt_dlp/extractor/tele13.py | 3 - yt_dlp/extractor/tele5.py | 3 - yt_dlp/extractor/telebruxelles.py | 3 - yt_dlp/extractor/telecinco.py | 3 - yt_dlp/extractor/telegraaf.py | 3 - yt_dlp/extractor/telemb.py | 3 - yt_dlp/extractor/telemundo.py | 3 - yt_dlp/extractor/telequebec.py | 3 - yt_dlp/extractor/teletask.py | 2 - yt_dlp/extractor/telewebion.py | 3 - yt_dlp/extractor/tennistv.py | 3 - yt_dlp/extractor/tenplay.py | 3 - yt_dlp/extractor/testurl.py | 2 - yt_dlp/extractor/tf1.py | 3 - yt_dlp/extractor/tfo.py | 3 - yt_dlp/extractor/theintercept.py | 3 - yt_dlp/extractor/theplatform.py | 3 - yt_dlp/extractor/thestar.py | 3 - yt_dlp/extractor/thesun.py | 2 - yt_dlp/extractor/theta.py | 3 - yt_dlp/extractor/theweatherchannel.py | 3 - yt_dlp/extractor/thisamericanlife.py | 2 - yt_dlp/extractor/thisav.py | 4 - yt_dlp/extractor/thisoldhouse.py | 3 - yt_dlp/extractor/threeqsdn.py | 2 - yt_dlp/extractor/threespeak.py | 3 - yt_dlp/extractor/tiktok.py | 3 - yt_dlp/extractor/tinypic.py | 2 - yt_dlp/extractor/tmz.py | 3 - yt_dlp/extractor/tnaflix.py | 2 - yt_dlp/extractor/toggle.py | 3 - yt_dlp/extractor/tokentube.py | 3 - yt_dlp/extractor/tonline.py | 3 - yt_dlp/extractor/toongoggles.py | 4 - yt_dlp/extractor/toutv.py | 3 - yt_dlp/extractor/toypics.py | 3 - yt_dlp/extractor/traileraddict.py | 2 - yt_dlp/extractor/trilulilu.py | 3 - yt_dlp/extractor/trovo.py | 3 - yt_dlp/extractor/trueid.py | 3 - yt_dlp/extractor/trunews.py | 2 - yt_dlp/extractor/trutv.py | 4 - yt_dlp/extractor/tube8.py | 2 - yt_dlp/extractor/tubitv.py | 3 - yt_dlp/extractor/tudou.py | 4 - yt_dlp/extractor/tumblr.py | 4 - yt_dlp/extractor/tunein.py | 3 - yt_dlp/extractor/tunepk.py | 2 - yt_dlp/extractor/turbo.py | 3 - yt_dlp/extractor/turner.py | 3 - yt_dlp/extractor/tv2.py | 3 - yt_dlp/extractor/tv2dk.py | 3 - yt_dlp/extractor/tv2hu.py | 2 - yt_dlp/extractor/tv4.py | 3 - yt_dlp/extractor/tv5mondeplus.py | 3 - yt_dlp/extractor/tv5unis.py | 4 - yt_dlp/extractor/tva.py | 3 - yt_dlp/extractor/tvanouvelles.py | 3 - yt_dlp/extractor/tvc.py | 3 - yt_dlp/extractor/tver.py | 3 - yt_dlp/extractor/tvigle.py | 4 - yt_dlp/extractor/tvland.py | 3 - yt_dlp/extractor/tvn24.py | 3 - yt_dlp/extractor/tvnet.py | 3 - yt_dlp/extractor/tvnoe.py | 3 - yt_dlp/extractor/tvnow.py | 3 - yt_dlp/extractor/tvopengr.py | 3 - yt_dlp/extractor/tvp.py | 3 - yt_dlp/extractor/tvplay.py | 3 - yt_dlp/extractor/tvplayer.py | 3 - yt_dlp/extractor/tweakers.py | 2 - yt_dlp/extractor/twentyfourvideo.py | 4 - yt_dlp/extractor/twentymin.py | 3 - yt_dlp/extractor/twentythreevideo.py | 3 - yt_dlp/extractor/twitcasting.py | 3 - yt_dlp/extractor/twitch.py | 3 - yt_dlp/extractor/twitter.py | 3 - yt_dlp/extractor/udemy.py | 2 - yt_dlp/extractor/udn.py | 3 - yt_dlp/extractor/ufctv.py | 3 - yt_dlp/extractor/ukcolumn.py | 2 - yt_dlp/extractor/uktvplay.py | 3 - yt_dlp/extractor/umg.py | 3 - yt_dlp/extractor/unistra.py | 2 - yt_dlp/extractor/unity.py | 2 - yt_dlp/extractor/uol.py | 3 - yt_dlp/extractor/uplynk.py | 3 - yt_dlp/extractor/urort.py | 3 - yt_dlp/extractor/urplay.py | 3 - yt_dlp/extractor/usanetwork.py | 3 - yt_dlp/extractor/usatoday.py | 3 - yt_dlp/extractor/ustream.py | 2 - yt_dlp/extractor/ustudio.py | 3 - yt_dlp/extractor/utreon.py | 3 - yt_dlp/extractor/varzesh3.py | 3 - yt_dlp/extractor/vbox7.py | 3 - yt_dlp/extractor/veehd.py | 2 - yt_dlp/extractor/veo.py | 3 - yt_dlp/extractor/veoh.py | 2 - yt_dlp/extractor/vesti.py | 3 - yt_dlp/extractor/vevo.py | 2 - yt_dlp/extractor/vgtv.py | 3 - yt_dlp/extractor/vh1.py | 3 - yt_dlp/extractor/vice.py | 3 - yt_dlp/extractor/vidbit.py | 2 - yt_dlp/extractor/viddler.py | 3 - yt_dlp/extractor/videa.py | 3 - yt_dlp/extractor/videocampus_sachsen.py | 1 - yt_dlp/extractor/videodetective.py | 2 - yt_dlp/extractor/videofyme.py | 2 - yt_dlp/extractor/videomore.py | 3 - yt_dlp/extractor/videopress.py | 3 - yt_dlp/extractor/vidio.py | 4 - yt_dlp/extractor/vidlii.py | 3 - yt_dlp/extractor/vidzi.py | 3 - yt_dlp/extractor/vier.py | 3 - yt_dlp/extractor/viewlift.py | 2 - yt_dlp/extractor/viidea.py | 2 - yt_dlp/extractor/viki.py | 2 - yt_dlp/extractor/vimeo.py | 3 - yt_dlp/extractor/vimm.py | 1 - yt_dlp/extractor/vimple.py | 2 - yt_dlp/extractor/vine.py | 4 - yt_dlp/extractor/viqeo.py | 3 - yt_dlp/extractor/viu.py | 3 - yt_dlp/extractor/vk.py | 3 - yt_dlp/extractor/vlive.py | 3 - yt_dlp/extractor/vodlocker.py | 3 - yt_dlp/extractor/vodpl.py | 3 - yt_dlp/extractor/vodplatform.py | 3 - yt_dlp/extractor/voicerepublic.py | 2 - yt_dlp/extractor/voicy.py | 3 - yt_dlp/extractor/voot.py | 3 - yt_dlp/extractor/voxmedia.py | 3 - yt_dlp/extractor/vrak.py | 3 - yt_dlp/extractor/vrt.py | 4 - yt_dlp/extractor/vrv.py | 3 - yt_dlp/extractor/vshare.py | 3 - yt_dlp/extractor/vtm.py | 3 - yt_dlp/extractor/vuclip.py | 2 - yt_dlp/extractor/vupload.py | 3 - yt_dlp/extractor/vvvvid.py | 3 - yt_dlp/extractor/vyborymos.py | 3 - yt_dlp/extractor/vzaar.py | 3 - yt_dlp/extractor/wakanim.py | 3 - yt_dlp/extractor/walla.py | 3 - yt_dlp/extractor/wasdtv.py | 3 - yt_dlp/extractor/washingtonpost.py | 3 - yt_dlp/extractor/wat.py | 3 - yt_dlp/extractor/watchbox.py | 4 - yt_dlp/extractor/watchindianporn.py | 3 - yt_dlp/extractor/wdr.py | 3 - yt_dlp/extractor/webcaster.py | 3 - yt_dlp/extractor/webofstories.py | 3 - yt_dlp/extractor/weibo.py | 3 - yt_dlp/extractor/weiqitv.py | 3 - yt_dlp/extractor/whowatch.py | 3 - yt_dlp/extractor/willow.py | 1 - yt_dlp/extractor/wimtv.py | 3 - yt_dlp/extractor/wistia.py | 2 - yt_dlp/extractor/worldstarhiphop.py | 2 - yt_dlp/extractor/wppilot.py | 2 - yt_dlp/extractor/wsj.py | 3 - yt_dlp/extractor/wwe.py | 2 - yt_dlp/extractor/xbef.py | 2 - yt_dlp/extractor/xboxclips.py | 3 - yt_dlp/extractor/xfileshare.py | 3 - yt_dlp/extractor/xhamster.py | 2 - yt_dlp/extractor/xiami.py | 3 - yt_dlp/extractor/ximalaya.py | 4 - yt_dlp/extractor/xinpianchang.py | 3 - yt_dlp/extractor/xminus.py | 3 - yt_dlp/extractor/xnxx.py | 3 - yt_dlp/extractor/xstream.py | 3 - yt_dlp/extractor/xtube.py | 2 - yt_dlp/extractor/xuite.py | 3 - yt_dlp/extractor/xvideos.py | 2 - yt_dlp/extractor/xxxymovies.py | 3 - yt_dlp/extractor/yahoo.py | 3 - yt_dlp/extractor/yandexdisk.py | 3 - yt_dlp/extractor/yandexmusic.py | 3 - yt_dlp/extractor/yandexvideo.py | 3 - yt_dlp/extractor/yapfiles.py | 3 - yt_dlp/extractor/yesjapan.py | 3 - yt_dlp/extractor/yinyuetai.py | 3 - yt_dlp/extractor/ynet.py | 3 - yt_dlp/extractor/youjizz.py | 3 - yt_dlp/extractor/youku.py | 3 - yt_dlp/extractor/younow.py | 3 - yt_dlp/extractor/youporn.py | 2 - yt_dlp/extractor/yourporn.py | 2 - yt_dlp/extractor/yourupload.py | 3 - yt_dlp/extractor/youtube.py | 62 +++++------ yt_dlp/extractor/zapiks.py | 3 - yt_dlp/extractor/zattoo.py | 3 - yt_dlp/extractor/zdf.py | 3 - yt_dlp/extractor/zee5.py | 11 +- yt_dlp/extractor/zhihu.py | 3 - yt_dlp/extractor/zingmp3.py | 3 - yt_dlp/extractor/zoom.py | 4 - yt_dlp/extractor/zype.py | 3 - yt_dlp/jsinterp.py | 16 +-- yt_dlp/options.py | 10 +- yt_dlp/postprocessor/common.py | 4 +- yt_dlp/postprocessor/embedthumbnail.py | 5 +- yt_dlp/postprocessor/exec.py | 2 - yt_dlp/postprocessor/ffmpeg.py | 37 +++---- .../postprocessor/movefilesafterdownload.py | 3 +- yt_dlp/postprocessor/sponskrub.py | 1 - yt_dlp/postprocessor/xattrpp.py | 2 - yt_dlp/socks.py | 29 +++-- yt_dlp/update.py | 20 ++-- yt_dlp/utils.py | 103 +++++++++--------- yt_dlp/webvtt.py | 11 +- ytdlp_plugins/extractor/sample.py | 2 - ytdlp_plugins/postprocessor/sample.py | 2 - 1009 files changed, 375 insertions(+), 3224 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ea1893d15..eff6becac 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -178,7 +178,6 @@ After you have ensured this site is distributing its content legally, you can fo 1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`: ```python - # coding: utf-8 from .common import InfoExtractor diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index 46b4b2ff5..23a9a5781 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import os from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) import yt_dlp BASH_COMPLETION_FILE = "completions/bash/yt-dlp" diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py index 50f6bebc6..6188f68ec 100644 --- a/devscripts/check-porn.py +++ b/devscripts/check-porn.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - """ This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check if we are not 'age_limit' tagging some porn site @@ -29,7 +27,7 @@ for test in gettestcases(): try: webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() except Exception: - print('\nFail: {0}'.format(test['name'])) + print('\nFail: {}'.format(test['name'])) continue webpage = webpage.decode('utf8', 'replace') @@ -39,7 +37,7 @@ for test in gettestcases(): elif METHOD == 'LIST': domain = compat_urllib_parse_urlparse(test['url']).netloc if not domain: - print('\nFail: {0}'.format(test['name'])) + print('\nFail: {}'.format(test['name'])) continue domain = '.'.join(domain.split('.')[-2:]) @@ -47,11 +45,11 @@ for test in gettestcases(): if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or test['info_dict']['age_limit'] != 18): - print('\nPotential missing age_limit check: {0}'.format(test['name'])) + print('\nPotential missing age_limit check: {}'.format(test['name'])) elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and test['info_dict']['age_limit'] == 18): - print('\nPotential false negative: {0}'.format(test['name'])) + print('\nPotential false negative: {}'.format(test['name'])) else: sys.stdout.write('.') diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index fb45e0280..d958a5d6b 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import optparse import os from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) import yt_dlp from yt_dlp.utils import shell_quote diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py index 0979eee5b..308c74a20 100644 --- a/devscripts/generate_aes_testdata.py +++ b/devscripts/generate_aes_testdata.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import codecs import subprocess diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index da89e070d..0058915ae 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -1,4 +1,3 @@ -# coding: utf-8 import re from ..utils import bug_reports_message, write_string diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index 6b1b8219c..2562c4fd7 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - -import io import optparse import re @@ -16,7 +13,7 @@ def main(): infile, outfile = args - with io.open(infile, encoding='utf-8') as inf: + with open(infile, encoding='utf-8') as inf: readme = inf.read() bug_text = re.search( @@ -26,7 +23,7 @@ def main(): out = bug_text + dev_text - with io.open(outfile, 'w', encoding='utf-8') as outf: + with open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index 902059231..878b94166 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import io import optparse @@ -13,7 +11,7 @@ def main(): infile, outfile = args - with io.open(infile, encoding='utf-8') as inf: + with open(infile, encoding='utf-8') as inf: issue_template_tmpl = inf.read() # Get the version from yt_dlp/version.py without importing the package @@ -22,8 +20,9 @@ def main(): out = issue_template_tmpl % {'version': locals()['__version__']} - with io.open(outfile, 'w', encoding='utf-8') as outf: + with open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) + if __name__ == '__main__': main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index b58fb85e3..24e8cfa5b 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -1,13 +1,10 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals, print_function - from inspect import getsource -import io import os from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py' if os.path.exists(lazy_extractors_filename): @@ -25,7 +22,7 @@ from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor if os.path.exists(plugins_blocked_dirname): os.rename(plugins_blocked_dirname, plugins_dirname) -with open('devscripts/lazy_load_template.py', 'rt') as f: +with open('devscripts/lazy_load_template.py') as f: module_template = f.read() CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id'] @@ -72,7 +69,7 @@ classes = _ALL_CLASSES[:-1] ordered_cls = [] while classes: for c in classes[:]: - bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor)) + bases = set(c.__bases__) - {object, InfoExtractor, SearchInfoExtractor} stop = False for b in bases: if b not in classes and b not in ordered_cls: @@ -97,9 +94,9 @@ for ie in ordered_cls: names.append(name) module_contents.append( - '\n_ALL_CLASSES = [{0}]'.format(', '.join(names))) + '\n_ALL_CLASSES = [{}]'.format(', '.join(names))) module_src = '\n'.join(module_contents) + '\n' -with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: +with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: f.write(module_src) diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 3f56af744..5d85bcc63 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -2,10 +2,6 @@ # yt-dlp --help | make_readme.py # This must be run in a console of correct width - -from __future__ import unicode_literals - -import io import sys import re @@ -15,7 +11,7 @@ helptext = sys.stdin.read() if isinstance(helptext, bytes): helptext = helptext.decode('utf-8') -with io.open(README_FILE, encoding='utf-8') as f: +with open(README_FILE, encoding='utf-8') as f: oldreadme = f.read() header = oldreadme[:oldreadme.index('## General Options:')] @@ -25,7 +21,7 @@ options = helptext[helptext.index(' General Options:'):] options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options) options = options + '\n' -with io.open(README_FILE, 'w', encoding='utf-8') as f: +with open(README_FILE, 'w', encoding='utf-8') as f: f.write(header) f.write(options) f.write(footer) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 729f60a0e..26d25704e 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - -import io import optparse import os import sys @@ -23,11 +20,11 @@ def main(): def gen_ies_md(ies): for ie in ies: - ie_md = '**{0}**'.format(ie.IE_NAME) + ie_md = f'**{ie.IE_NAME}**' if ie.IE_DESC is False: continue if ie.IE_DESC is not None: - ie_md += ': {0}'.format(ie.IE_DESC) + ie_md += f': {ie.IE_DESC}' search_key = getattr(ie, 'SEARCH_KEY', None) if search_key is not None: ie_md += f'; "{ie.SEARCH_KEY}:" prefix' @@ -40,7 +37,7 @@ def main(): ' - ' + md + '\n' for md in gen_ies_md(ies)) - with io.open(outfile, 'w', encoding='utf-8') as outf: + with open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 29c675f8a..91e9ebced 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - -import io import optparse import os.path import re @@ -32,14 +29,14 @@ def main(): outfile, = args - with io.open(README_FILE, encoding='utf-8') as f: + with open(README_FILE, encoding='utf-8') as f: readme = f.read() readme = filter_excluded_sections(readme) readme = move_sections(readme) readme = filter_options(readme) - with io.open(outfile, 'w', encoding='utf-8') as outf: + with open(outfile, 'w', encoding='utf-8') as outf: outf.write(PREFIX + readme) diff --git a/devscripts/update-formulae.py b/devscripts/update-formulae.py index 41bc1ac7a..3a0bef52e 100644 --- a/devscripts/update-formulae.py +++ b/devscripts/update-formulae.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import json import os import re @@ -27,7 +25,7 @@ tarball_file = next(x for x in pypi_release['urls'] if x['filename'].endswith('. sha256sum = tarball_file['digests']['sha256'] url = tarball_file['url'] -with open(filename, 'r') as r: +with open(filename) as r: formulae_text = r.read() formulae_text = re.sub(r'sha256 "[0-9a-f]*?"', 'sha256 "%s"' % sha256sum, formulae_text) diff --git a/devscripts/update-version.py b/devscripts/update-version.py index 0ee7bf291..233cdaa76 100644 --- a/devscripts/update-version.py +++ b/devscripts/update-version.py @@ -4,7 +4,7 @@ import sys import subprocess -with open('yt_dlp/version.py', 'rt') as f: +with open('yt_dlp/version.py') as f: exec(compile(f.read(), 'yt_dlp/version.py', 'exec')) old_version = locals()['__version__'] diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 780df0de6..677fe7373 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import os from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) import yt_dlp ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp" diff --git a/pyinst.py b/pyinst.py index e5934e04f..1f72bd4be 100644 --- a/pyinst.py +++ b/pyinst.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 import os import platform import sys diff --git a/setup.py b/setup.py index 503599c76..9eab7f1d7 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 import os.path import warnings import sys diff --git a/test/helper.py b/test/helper.py index 804e954a3..d940e327c 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,7 +1,4 @@ -from __future__ import unicode_literals - import errno -import io import hashlib import json import os.path @@ -35,10 +32,10 @@ def get_params(override=None): 'parameters.json') LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'local_parameters.json') - with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + with open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if os.path.exists(LOCAL_PARAMETERS_FILE): - with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: parameters.update(json.load(pf)) if override: parameters.update(override) @@ -63,7 +60,7 @@ def report_warning(message): _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' - output = '%s %s\n' % (_msg_header, message) + output = f'{_msg_header} {message}\n' if 'b' in getattr(sys.stderr, 'mode', ''): output = output.encode(preferredencoding()) sys.stderr.write(output) @@ -74,7 +71,7 @@ class FakeYDL(YoutubeDL): # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. params = get_params(override=override) - super(FakeYDL, self).__init__(params, auto_init=False) + super().__init__(params, auto_init=False) self.result = [] def to_screen(self, s, skip_eol=None): @@ -99,8 +96,7 @@ class FakeYDL(YoutubeDL): def gettestcases(include_onlymatching=False): for ie in yt_dlp.extractor.gen_extractors(): - for tc in ie.get_testcases(include_onlymatching): - yield tc + yield from ie.get_testcases(include_onlymatching) md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() @@ -113,33 +109,30 @@ def expect_value(self, got, expected, field): self.assertTrue( isinstance(got, compat_str), - 'Expected a %s object, but got %s for field %s' % ( - compat_str.__name__, type(got).__name__, field)) + f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( match_rex.match(got), - 'field %s (value: %r) should match %r' % (field, got, match_str)) + f'field {field} (value: {got!r}) should match {match_str!r}') elif isinstance(expected, compat_str) and expected.startswith('startswith:'): start_str = expected[len('startswith:'):] self.assertTrue( isinstance(got, compat_str), - 'Expected a %s object, but got %s for field %s' % ( - compat_str.__name__, type(got).__name__, field)) + f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( got.startswith(start_str), - 'field %s (value: %r) should start with %r' % (field, got, start_str)) + f'field {field} (value: {got!r}) should start with {start_str!r}') elif isinstance(expected, compat_str) and expected.startswith('contains:'): contains_str = expected[len('contains:'):] self.assertTrue( isinstance(got, compat_str), - 'Expected a %s object, but got %s for field %s' % ( - compat_str.__name__, type(got).__name__, field)) + f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( contains_str in got, - 'field %s (value: %r) should contain %r' % (field, got, contains_str)) + f'field {field} (value: {got!r}) should contain {contains_str!r}') elif isinstance(expected, type): self.assertTrue( isinstance(got, expected), - 'Expected type %r for field %s, but got value %r of type %r' % (expected, field, got, type(got))) + f'Expected type {expected!r} for field {field}, but got value {got!r} of type {type(got)!r}') elif isinstance(expected, dict) and isinstance(got, dict): expect_dict(self, got, expected) elif isinstance(expected, list) and isinstance(got, list): @@ -159,13 +152,12 @@ def expect_value(self, got, expected, field): if isinstance(expected, compat_str) and expected.startswith('md5:'): self.assertTrue( isinstance(got, compat_str), - 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got))) + f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}') got = 'md5:' + md5(got) elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected): self.assertTrue( isinstance(got, (list, dict)), - 'Expected field %s to be a list or a dict, but it is of type %s' % ( - field, type(got).__name__)) + f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}') op, _, expected_num = expected.partition(':') expected_num = int(expected_num) if op == 'mincount': @@ -185,7 +177,7 @@ def expect_value(self, got, expected, field): return self.assertEqual( expected, got, - 'Invalid value for field %s, expected %r, got %r' % (field, expected, got)) + f'Invalid value for field {field}, expected {expected!r}, got {got!r}') def expect_dict(self, got_dict, expected_dict): @@ -260,13 +252,13 @@ def expect_info_dict(self, got_dict, expected_dict): info_dict_str = '' if len(missing_keys) != len(expected_dict): info_dict_str += ''.join( - ' %s: %s,\n' % (_repr(k), _repr(v)) + f' {_repr(k)}: {_repr(v)},\n' for k, v in test_info_dict.items() if k not in missing_keys) if info_dict_str: info_dict_str += '\n' info_dict_str += ''.join( - ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) + f' {_repr(k)}: {_repr(test_info_dict[k])},\n' for k in missing_keys) write_string( '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) @@ -295,21 +287,21 @@ def assertRegexpMatches(self, text, regexp, msg=None): def assertGreaterEqual(self, got, expected, msg=None): if not (got >= expected): if msg is None: - msg = '%r not greater than or equal to %r' % (got, expected) + msg = f'{got!r} not greater than or equal to {expected!r}' self.assertTrue(got >= expected, msg) def assertLessEqual(self, got, expected, msg=None): if not (got <= expected): if msg is None: - msg = '%r not less than or equal to %r' % (got, expected) + msg = f'{got!r} not less than or equal to {expected!r}' self.assertTrue(got <= expected, msg) def assertEqual(self, got, expected, msg=None): if not (got == expected): if msg is None: - msg = '%r not equal to %r' % (got, expected) + msg = f'{got!r} not equal to {expected!r}' self.assertTrue(got == expected, msg) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 866ded243..4fd21bed4 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1,9 +1,5 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution -import io import os import sys import unittest @@ -1011,8 +1007,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES: - with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f: formats, subs = self.ie._parse_m3u8_formats_and_subtitles( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) @@ -1357,8 +1352,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: - with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( compat_etree_fromstring(f.read().encode('utf-8')), mpd_base_url=mpd_base_url, mpd_url=mpd_url) @@ -1549,8 +1543,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES: - with io.open('./test/testdata/ism/%s.Manifest' % ism_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_ism_formats_and_subtitles( compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url) self.ie._sort_formats(formats) @@ -1576,8 +1569,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for f4m_file, f4m_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/f4m/%s.f4m' % f4m_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( compat_etree_fromstring(f.read().encode('utf-8')), f4m_url, None) @@ -1624,8 +1616,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for xspf_file, xspf_url, expected_entries in _TEST_CASES: - with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode('utf-8')), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index c9108c5b6..480c7539c 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -25,7 +21,7 @@ TEST_URL = 'http://localhost/sample.mp4' class YDL(FakeYDL): def __init__(self, *args, **kwargs): - super(YDL, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.downloaded_info_dicts = [] self.msgs = [] @@ -551,11 +547,11 @@ class TestYoutubeDL(unittest.TestCase): def s_formats(lang, autocaption=False): return [{ 'ext': ext, - 'url': 'http://localhost/video.%s.%s' % (lang, ext), + 'url': f'http://localhost/video.{lang}.{ext}', '_auto': autocaption, } for ext in ['vtt', 'srt', 'ass']] - subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es']) - auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es']) + subtitles = {l: s_formats(l) for l in ['en', 'fr', 'es']} + auto_captions = {l: s_formats(l, True) for l in ['it', 'pt', 'es']} info_dict = { 'id': 'test', 'title': 'Test', @@ -580,7 +576,7 @@ class TestYoutubeDL(unittest.TestCase): result = get_info({'writesubtitles': True}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['en'])) + self.assertEqual(set(subs.keys()), {'en'}) self.assertTrue(subs['en'].get('data') is None) self.assertEqual(subs['en']['ext'], 'ass') @@ -591,39 +587,39 @@ class TestYoutubeDL(unittest.TestCase): result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'fr'])) + self.assertEqual(set(subs.keys()), {'es', 'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['all', '-en']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'fr'])) + self.assertEqual(set(subs.keys()), {'es', 'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['en', 'fr', '-en']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['fr'])) + self.assertEqual(set(subs.keys()), {'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['-en', 'en']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['en'])) + self.assertEqual(set(subs.keys()), {'en'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['e.+']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'en'])) + self.assertEqual(set(subs.keys()), {'es', 'en'}) result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'pt'])) + self.assertEqual(set(subs.keys()), {'es', 'pt'}) self.assertFalse(subs['es']['_auto']) self.assertTrue(subs['pt']['_auto']) result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'pt'])) + self.assertEqual(set(subs.keys()), {'es', 'pt'}) self.assertTrue(subs['es']['_auto']) self.assertTrue(subs['pt']['_auto']) @@ -1082,7 +1078,7 @@ class TestYoutubeDL(unittest.TestCase): class _YDL(YDL): def __init__(self, *args, **kwargs): - super(_YDL, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def trouble(self, s, tb=None): pass diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index c514413a4..1e5bedcae 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - import os import re import sys diff --git a/test/test_aes.py b/test/test_aes.py index 5c9273f8a..34584a04f 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py index 70f9f4845..50d16a729 100644 --- a/test/test_age_restriction.py +++ b/test/test_age_restriction.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 2d89366d4..d70da8cae 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -81,11 +78,11 @@ class TestAllURLsMatching(unittest.TestCase): url = tc['url'] for ie in ies: if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): - self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) + self.assertTrue(ie.suitable(url), f'{type(ie).__name__} should match URL {url!r}') else: self.assertFalse( ie.suitable(url), - '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name'])) + f'{type(ie).__name__} should not match URL {url!r} . That URL belongs to {tc["name"]}.') def test_keywords(self): self.assertMatch(':ytsubs', ['youtube:subscriptions']) @@ -120,7 +117,7 @@ class TestAllURLsMatching(unittest.TestCase): for (ie_name, ie_list) in name_accu.items(): self.assertEqual( len(ie_list), 1, - 'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list))) + f'Multiple extractors with the same IE_NAME "{ie_name}" ({", ".join(ie_list)})') if __name__ == '__main__': diff --git a/test/test_cache.py b/test/test_cache.py index 8c4f85387..4e4641eba 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - import shutil # Allow direct execution diff --git a/test/test_compat.py b/test/test_compat.py index 6cbffd6fe..31524c5ab 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -48,7 +44,7 @@ class TestCompat(unittest.TestCase): all_names = yt_dlp.compat.__all__ present_names = set(filter( lambda c: '_' in c and not c.startswith('_'), - dir(yt_dlp.compat))) - set(['unicode_literals']) + dir(yt_dlp.compat))) - {'unicode_literals'} self.assertEqual(all_names, sorted(present_names)) def test_compat_urllib_parse_unquote(self): diff --git a/test/test_download.py b/test/test_download.py index 818a670fb..3c6b55d98 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -21,7 +18,6 @@ from test.helper import ( import hashlib -import io import json import socket @@ -46,7 +42,7 @@ class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): self.to_stderr = self.to_screen self.processed_info_dicts = [] - super(YoutubeDL, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def report_warning(self, message): # Don't accept warnings during tests @@ -54,7 +50,7 @@ class YoutubeDL(yt_dlp.YoutubeDL): def process_info(self, info_dict): self.processed_info_dicts.append(info_dict.copy()) - return super(YoutubeDL, self).process_info(info_dict) + return super().process_info(info_dict) def _file_md5(fn): @@ -80,7 +76,7 @@ class TestDownload(unittest.TestCase): def strclass(cls): """From 2.7's unittest; 2.6 had _strclass so we can't import it.""" - return '%s.%s' % (cls.__module__, cls.__name__) + return f'{cls.__module__}.{cls.__name__}' add_ie = getattr(self, self._testMethodName).add_ie return '%s (%s)%s:' % (self._testMethodName, @@ -179,7 +175,7 @@ def generator(test_case, tname): report_warning('%s failed due to network errors, skipping...' % tname) return - print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) + print(f'Retrying: {try_num} failed tries\n\n##########\n\n') try_num += 1 else: @@ -245,7 +241,7 @@ def generator(test_case, tname): self.assertTrue( os.path.exists(info_json_fn), 'Missing info file %s' % info_json_fn) - with io.open(info_json_fn, encoding='utf-8') as infof: + with open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, info_dict, tc.get('info_dict', {})) finally: diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 03ae8c62a..c511909c7 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 -from __future__ import unicode_literals - # Allow direct execution import os import re @@ -66,7 +63,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): assert False -class FakeLogger(object): +class FakeLogger: def debug(self, msg): pass diff --git a/test/test_execution.py b/test/test_execution.py index 4981786e1..623f08165 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - import unittest import sys @@ -45,7 +41,7 @@ class TestExecution(unittest.TestCase): finally: try: os.remove('yt_dlp/extractor/lazy_extractors.py') - except (IOError, OSError): + except OSError: pass diff --git a/test/test_http.py b/test/test_http.py index eec8684b1..2106220eb 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -41,7 +38,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): assert False -class FakeLogger(object): +class FakeLogger: def debug(self, msg): pass @@ -117,23 +114,23 @@ class TestProxy(unittest.TestCase): self.geo_proxy_thread.start() def test_proxy(self): - geo_proxy = '127.0.0.1:{0}'.format(self.geo_port) + geo_proxy = f'127.0.0.1:{self.geo_port}' ydl = YoutubeDL({ - 'proxy': '127.0.0.1:{0}'.format(self.port), + 'proxy': f'127.0.0.1:{self.port}', 'geo_verification_proxy': geo_proxy, }) url = 'http://foo.com/bar' response = ydl.urlopen(url).read().decode('utf-8') - self.assertEqual(response, 'normal: {0}'.format(url)) + self.assertEqual(response, f'normal: {url}') req = compat_urllib_request.Request(url) req.add_header('Ytdl-request-proxy', geo_proxy) response = ydl.urlopen(req).read().decode('utf-8') - self.assertEqual(response, 'geo: {0}'.format(url)) + self.assertEqual(response, f'geo: {url}') def test_proxy_with_idn(self): ydl = YoutubeDL({ - 'proxy': '127.0.0.1:{0}'.format(self.port), + 'proxy': f'127.0.0.1:{self.port}', }) url = 'http://中文.tw/' response = ydl.urlopen(url).read().decode('utf-8') diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py index adbae4690..57a7ed3a8 100644 --- a/test/test_iqiyi_sdk_interpreter.py +++ b/test/test_iqiyi_sdk_interpreter.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -12,7 +9,7 @@ from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import IqiyiIE -class WarningLogger(object): +class WarningLogger: def __init__(self): self.messages = [] diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index e230b045f..10a465cf9 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution import os import sys diff --git a/test/test_netrc.py b/test/test_netrc.py index 94a703406..adc3a0ed1 100644 --- a/test/test_netrc.py +++ b/test/test_netrc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import os import sys import unittest diff --git a/test/test_overwrites.py b/test/test_overwrites.py index f5d10a409..8e0548db5 100644 --- a/test/test_overwrites.py +++ b/test/test_overwrites.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import os from os.path import join import subprocess diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py index 1555a23e0..020203f2f 100644 --- a/test/test_post_hooks.py +++ b/test/test_post_hooks.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - import os import sys import unittest @@ -14,7 +11,7 @@ from yt_dlp.utils import DownloadError class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): - super(YoutubeDL, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.to_stderr = self.to_screen diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index bbe998993..e5893f7d2 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution import os import sys diff --git a/test/test_socks.py b/test/test_socks.py index cf1f613ab..02723b469 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 -from __future__ import unicode_literals - # Allow direct execution import os import sys diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 95e33e54a..0be1842da 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -54,7 +52,7 @@ class BaseTestSubtitles(unittest.TestCase): if sub_info.get('data') is None: uf = self.DL.urlopen(sub_info['url']) sub_info['data'] = uf.read().decode('utf-8') - return dict((l, sub_info['data']) for l, sub_info in subtitles.items()) + return {l: sub_info['data'] for l, sub_info in subtitles.items()} @is_download_test @@ -163,7 +161,7 @@ class TestVimeoSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr'])) + self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'}) self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') @@ -186,7 +184,7 @@ class TestWallaSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['heb'])) + self.assertEqual(set(subtitles.keys()), {'heb'}) self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920') def test_nosubtitles(self): @@ -208,7 +206,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['cs'])) + self.assertEqual(set(subtitles.keys()), {'cs'}) self.assertTrue(len(subtitles['cs']) > 20000) def test_nosubtitles(self): @@ -229,7 +227,7 @@ class TestLyndaSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') @@ -242,7 +240,7 @@ class TestNPOSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['nl'])) + self.assertEqual(set(subtitles.keys()), {'nl'}) self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') @@ -252,13 +250,13 @@ class TestMTVSubtitles(BaseTestSubtitles): IE = ComedyCentralIE def getInfoDict(self): - return super(TestMTVSubtitles, self).getInfoDict()['entries'][0] + return super().getInfoDict()['entries'][0] def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961') @@ -271,7 +269,7 @@ class TestNRKSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['no'])) + self.assertEqual(set(subtitles.keys()), {'no'}) self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2') @@ -284,7 +282,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['it'])) + self.assertEqual(set(subtitles.keys()), {'it'}) self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') def test_subtitles_array_key(self): @@ -292,7 +290,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['it'])) + self.assertEqual(set(subtitles.keys()), {'it'}) self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd') @@ -305,7 +303,7 @@ class TestVikiSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') @@ -320,7 +318,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') @@ -333,7 +331,7 @@ class TestThePlatformFeedSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade') @@ -348,7 +346,7 @@ class TestRtveSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['es'])) + self.assertEqual(set(subtitles.keys()), {'es'}) self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') @@ -361,7 +359,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c') def test_subtitles_in_page(self): @@ -369,7 +367,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c') @@ -382,7 +380,7 @@ class TestPBSSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) def test_subtitles_dfxp_format(self): self.DL.params['writesubtitles'] = True diff --git a/test/test_update.py.disabled b/test/test_update.py.disabled index 1e8edf0f6..5f0794ae2 100644 --- a/test/test_update.py.disabled +++ b/test/test_update.py.disabled @@ -1,7 +1,4 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution import os import sys diff --git a/test/test_utils.py b/test/test_utils.py index c1228c74a..e0c862807 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -266,7 +262,7 @@ class TestUtil(unittest.TestCase): def test_expand_path(self): def env(var): - return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) + return f'%{var}%' if sys.platform == 'win32' else f'${var}' compat_setenv('yt_dlp_EXPATH_PATH', 'expanded') self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded') @@ -666,8 +662,7 @@ class TestUtil(unittest.TestCase): def get_page(pagenum): firstid = pagenum * pagesize upto = min(size, pagenum * pagesize + pagesize) - for i in range(firstid, upto): - yield i + yield from range(firstid, upto) pl = OnDemandPagedList(get_page, pagesize) got = pl.getslice(*sliceargs) @@ -736,7 +731,7 @@ class TestUtil(unittest.TestCase): multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') self.assertEqual( - multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0], + multipart_encode({'欄位'.encode(): '值'.encode()}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') self.assertRaises( ValueError, multipart_encode, {b'field': b'value'}, boundary='value') @@ -1397,7 +1392,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')

    Ignored, three

    - '''.encode('utf-8') + '''.encode() srt_data = '''1 00:00:00,000 --> 00:00:01,000 The following line contains Chinese characters and special symbols @@ -1415,14 +1410,14 @@ Line ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data) - dfxp_data_no_default_namespace = ''' + dfxp_data_no_default_namespace = b'''

    The first line

    -
    '''.encode('utf-8') + ''' srt_data = '''1 00:00:00,000 --> 00:00:01,000 The first line @@ -1430,7 +1425,7 @@ The first line ''' self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) - dfxp_data_with_style = ''' + dfxp_data_with_style = b''' @@ -1448,7 +1443,7 @@ The first line

    inner
    style

    -'''.encode('utf-8') +''' srt_data = '''1 00:00:02,080 --> 00:00:05,840 default stylecustom style diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py index cc606115f..17aeafbc0 100644 --- a/test/test_verbose_output.py +++ b/test/test_verbose_output.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - import unittest import sys diff --git a/test/test_write_annotations.py.disabled b/test/test_write_annotations.py.disabled index 7e4d8bc5a..4173fd09d 100644 --- a/test/test_write_annotations.py.disabled +++ b/test/test_write_annotations.py.disabled @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -21,7 +18,7 @@ import yt_dlp.extractor class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): - super(YoutubeDL, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.to_stderr = self.to_screen @@ -52,7 +49,7 @@ class TestAnnotations(unittest.TestCase): ydl.download([TEST_ID]) self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) annoxml = None - with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: + with open(ANNOTATIONS_FILE, encoding='utf-8') as annof: annoxml = xml.etree.ElementTree.parse(annof) self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') root = annoxml.getroot() diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 455192b1f..8691abb67 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py index 402681cad..70d6d9949 100644 --- a/test/test_youtube_misc.py +++ b/test/test_youtube_misc.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 6412acce0..d751d5396 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -1,14 +1,10 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import io import re import string import urllib.request @@ -149,7 +145,7 @@ def t_factory(name, sig_func, url_pattern): if not os.path.exists(fn): urllib.request.urlretrieve(url, fn) - with io.open(fn, encoding='utf-8') as testf: + with open(fn, encoding='utf-8') as testf: jscode = testf.read() self.assertEqual(sig_func(jscode, sig_input), expected_sig) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4bf5a8942..56f0346dc 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import absolute_import, unicode_literals - import collections import contextlib import datetime @@ -165,7 +161,7 @@ if compat_os_name == 'nt': import ctypes -class YoutubeDL(object): +class YoutubeDL: """YoutubeDL class. YoutubeDL objects are the ones responsible of downloading the @@ -501,7 +497,7 @@ class YoutubeDL(object): care about HLS. (only for youtube) """ - _NUMERIC_FIELDS = set(( + _NUMERIC_FIELDS = { 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', 'timestamp', 'release_timestamp', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', @@ -509,7 +505,7 @@ class YoutubeDL(object): 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', 'track_number', 'disc_number', 'release_year', - )) + } _format_fields = { # NB: Keep in sync with the docstring of extractor/common.py @@ -576,7 +572,7 @@ class YoutubeDL(object): def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: - self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion)) + self.report_warning(f'{option} is deprecated. Use {suggestion} instead') return True return False @@ -693,7 +689,7 @@ class YoutubeDL(object): with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: self.archive.add(line.strip()) - except IOError as ioe: + except OSError as ioe: if ioe.errno != errno.ENOENT: raise return False @@ -990,11 +986,9 @@ class YoutubeDL(object): outtmpl_dict.update({ k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl_dict.get(k) is None}) - for key, val in outtmpl_dict.items(): + for _, val in outtmpl_dict.items(): if isinstance(val, bytes): - self.report_warning( - 'Parameter outtmpl is bytes, but should be a unicode string. ' - 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') + self.report_warning('Parameter outtmpl is bytes, but should be a unicode string') return outtmpl_dict def get_output_path(self, dir_type='', filename=None): @@ -1013,7 +1007,7 @@ class YoutubeDL(object): # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) - outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) + outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution # because meta fields may contain env variables we don't want to @@ -1173,7 +1167,7 @@ class YoutubeDL(object): fmt = outer_mobj.group('format') if fmt == 's' and value is not None and key in field_size_compat_map.keys(): - fmt = '0{:d}d'.format(field_size_compat_map[key]) + fmt = f'0{field_size_compat_map[key]:d}d' value = default if value is None else value if replacement is None else replacement @@ -1188,7 +1182,7 @@ class YoutubeDL(object): value = map(str, variadic(value) if '#' in flags else [value]) value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt elif fmt[-1] == 'B': # bytes - value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') + value = f'%{str_fmt}'.encode() % str(value).encode('utf-8') value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( @@ -1301,7 +1295,7 @@ class YoutubeDL(object): if date is not None: dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: - return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}' view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') @@ -1765,14 +1759,14 @@ class YoutubeDL(object): x_forwarded_for = ie_result.get('__x_forwarded_for_ip') - self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries)) + self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}') failures = 0 max_failures = self.params.get('skip_playlist_after_errors') or float('inf') for i, entry_tuple in enumerate(entries, 1): playlist_index, entry = entry_tuple if 'playlist-index' in self.params.get('compat_opts', []): playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1 - self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) + self.to_screen(f'[download] Downloading video {i} of {n_entries}') # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes if x_forwarded_for: @@ -1940,7 +1934,7 @@ class YoutubeDL(object): def syntax_error(note, start): message = ( 'Invalid format specification: ' - '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1])) + '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1])) return SyntaxError(message) PICKFIRST = 'PICKFIRST' @@ -2044,7 +2038,7 @@ class YoutubeDL(object): raise syntax_error('Expected a selector', start) current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: - raise syntax_error('Operator not recognized: "{0}"'.format(string), start) + raise syntax_error(f'Operator not recognized: "{string}"', start) elif type == tokenize.ENDMARKER: break if current_selector: @@ -2244,7 +2238,7 @@ class YoutubeDL(object): except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) - class TokenIterator(object): + class TokenIterator: def __init__(self, tokens): self.tokens = tokens self.counter = 0 @@ -2644,7 +2638,7 @@ class YoutubeDL(object): if max_downloads_reached: break - write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download) + write_archive = {f.get('__write_download_archive', False) for f in formats_to_download} assert write_archive.issubset({True, False, 'ignore'}) if True in write_archive and False not in write_archive: self.record_download_archive(info_dict) @@ -2712,7 +2706,7 @@ class YoutubeDL(object): for lang in requested_langs: formats = available_subs.get(lang) if formats is None: - self.report_warning('%s subtitles not available for %s' % (lang, video_id)) + self.report_warning(f'{lang} subtitles not available for {video_id}') continue for ext in formats_preference: if ext == 'best': @@ -2755,7 +2749,7 @@ class YoutubeDL(object): tmpl = format_tmpl(tmpl) self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') if self._ensure_dir_exists(filename): - with io.open(filename, 'a', encoding='utf-8') as f: + with open(filename, 'a', encoding='utf-8') as f: f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') def __forced_printings(self, info_dict, filename, incomplete): @@ -2920,11 +2914,11 @@ class YoutubeDL(object): else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) - with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') - except (OSError, IOError): + except OSError: self.report_error('Cannot write annotations file: ' + annofn) return @@ -2943,13 +2937,13 @@ class YoutubeDL(object): return True try: self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') - with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', - newline='\r\n' if link_type == 'url' else '\n') as linkfile: + with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + newline='\r\n' if link_type == 'url' else '\n') as linkfile: template_vars = {'url': url} if link_type == 'desktop': template_vars['filename'] = linkfn[:-(len(link_type) + 1)] linkfile.write(LINK_TEMPLATES[link_type] % template_vars) - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write internet shortcut {linkfn}') return False return True @@ -3014,10 +3008,10 @@ class YoutubeDL(object): return False # Check extension - exts = set(format.get('ext') for format in formats) + exts = {format.get('ext') for format in formats} COMPATIBLE_EXTS = ( - set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')), - set(('webm',)), + {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'}, + {'webm'}, ) for ext_sets in COMPATIBLE_EXTS: if ext_sets.issuperset(exts): @@ -3050,7 +3044,7 @@ class YoutubeDL(object): os.path.splitext(filename)[0] if filename_real_ext in (old_ext, new_ext) else filename) - return '%s.%s' % (filename_wo_ext, ext) + return f'{filename_wo_ext}.{ext}' # Ensure filename always has a correct extension for successful merge full_filename = correct_ext(full_filename) @@ -3135,10 +3129,10 @@ class YoutubeDL(object): except network_exceptions as err: self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return - except (OSError, IOError) as err: + except OSError as err: raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: - self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return if success and full_filename != '-': @@ -3343,7 +3337,7 @@ class YoutubeDL(object): self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) try: os.remove(encodeFilename(old_filename)) - except (IOError, OSError): + except OSError: self.report_warning('Unable to remove downloaded original file') if old_filename in infodict['__files_to_move']: del infodict['__files_to_move'][old_filename] @@ -3388,7 +3382,7 @@ class YoutubeDL(object): break else: return - return '%s %s' % (extractor.lower(), video_id) + return f'{extractor.lower()} {video_id}' def in_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -3791,7 +3785,7 @@ class YoutubeDL(object): try: write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) return True - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') return None @@ -3812,9 +3806,9 @@ class YoutubeDL(object): else: try: self.to_screen(f'[info] Writing {label} description to: {descfn}') - with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(ie_result['description']) - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write {label} description file {descfn}') return None return True @@ -3848,12 +3842,12 @@ class YoutubeDL(object): try: # Use newline='' to prevent conversion of newline characters # See https://github.com/ytdl-org/youtube-dl/issues/10268 - with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: + with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) continue - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write video subtitles file {sub_filename}') return None diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 10dc221b4..91bf5c4ce 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1,11 +1,8 @@ #!/usr/bin/env python3 -# coding: utf-8 - f'You are using an unsupported version of Python. Only Python versions 3.6 and above are supported by yt-dlp' # noqa: F541 __license__ = 'Public Domain' -import io import itertools import os import random @@ -67,13 +64,12 @@ def get_urls(urls, batchfile, verbose): 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D')) batchfd = sys.stdin else: - batchfd = io.open( - expand_path(batchfile), - 'r', encoding='utf-8', errors='ignore') + batchfd = open( + expand_path(batchfile), encoding='utf-8', errors='ignore') batch_urls = read_batch_urls(batchfd) if verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') - except IOError: + except OSError: sys.exit('ERROR: batch file %s could not be read' % batchfile) _enc = preferredencoding() return [ diff --git a/yt_dlp/__main__.py b/yt_dlp/__main__.py index fb2726bd3..c9d275b86 100644 --- a/yt_dlp/__main__.py +++ b/yt_dlp/__main__.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Execute with # $ python -m yt_dlp diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index b37f0dd39..e5d73f740 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from math import ceil from .compat import ( diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index e5cb193bc..f93ef85e7 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -1,7 +1,4 @@ -from __future__ import unicode_literals - import errno -import io import json import os import re @@ -15,7 +12,7 @@ from .utils import ( ) -class Cache(object): +class Cache: def __init__(self, ydl): self._ydl = ydl @@ -31,7 +28,7 @@ class Cache(object): 'invalid section %r' % section assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key return os.path.join( - self._get_root_dir(), section, '%s.%s' % (key, dtype)) + self._get_root_dir(), section, f'{key}.{dtype}') @property def enabled(self): @@ -54,8 +51,7 @@ class Cache(object): write_json_file(data, fn) except Exception: tb = traceback.format_exc() - self._ydl.report_warning( - 'Writing cache to %r failed: %s' % (fn, tb)) + self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}') def load(self, section, key, dtype='json', default=None): assert dtype in ('json',) @@ -66,17 +62,16 @@ class Cache(object): cache_fn = self._get_cache_fn(section, key, dtype) try: try: - with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + with open(cache_fn, encoding='utf-8') as cachef: self._ydl.write_debug(f'Loading {section}.{key} from cache') return json.load(cachef) except ValueError: try: file_size = os.path.getsize(cache_fn) - except (OSError, IOError) as oe: + except OSError as oe: file_size = str(oe) - self._ydl.report_warning( - 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) - except IOError: + self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})') + except OSError: pass # No cache available return default diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 5bac87c10..7a1500435 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import asyncio import base64 import collections diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 3476595d3..1d92fd8ce 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -125,7 +125,7 @@ def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), elif browser_name in CHROMIUM_BASED_BROWSERS: return _extract_chrome_cookies(browser_name, profile, keyring, logger) else: - raise ValueError('unknown browser: {}'.format(browser_name)) + raise ValueError(f'unknown browser: {browser_name}') def _extract_firefox_cookies(profile, logger): @@ -144,8 +144,8 @@ def _extract_firefox_cookies(profile, logger): cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger) if cookie_database_path is None: - raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) - logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') + logger.debug(f'Extracting cookies from: "{cookie_database_path}"') with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None @@ -164,7 +164,7 @@ def _extract_firefox_cookies(profile, logger): path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, comment=None, comment_url=None, rest={}) jar.set_cookie(cookie) - logger.info('Extracted {} cookies from firefox'.format(len(jar))) + logger.info(f'Extracted {len(jar)} cookies from firefox') return jar finally: if cursor is not None: @@ -179,7 +179,7 @@ def _firefox_browser_dir(): elif sys.platform == 'darwin': return os.path.expanduser('~/Library/Application Support/Firefox') else: - raise ValueError('unsupported platform: {}'.format(sys.platform)) + raise ValueError(f'unsupported platform: {sys.platform}') def _get_chromium_based_browser_settings(browser_name): @@ -219,7 +219,7 @@ def _get_chromium_based_browser_settings(browser_name): }[browser_name] else: - raise ValueError('unsupported platform: {}'.format(sys.platform)) + raise ValueError(f'unsupported platform: {sys.platform}') # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" @@ -242,7 +242,7 @@ def _get_chromium_based_browser_settings(browser_name): def _extract_chrome_cookies(browser_name, profile, keyring, logger): - logger.info('Extracting cookies from {}'.format(browser_name)) + logger.info(f'Extracting cookies from {browser_name}') if not SQLITE_AVAILABLE: logger.warning(('Cannot extract cookies from {} without sqlite3 support. ' @@ -260,13 +260,13 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): if config['supports_profiles']: search_root = os.path.join(config['browser_dir'], profile) else: - logger.error('{} does not support profiles'.format(browser_name)) + logger.error(f'{browser_name} does not support profiles') search_root = config['browser_dir'] cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger) if cookie_database_path is None: - raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root)) - logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') + logger.debug(f'Extracting cookies from: "{cookie_database_path}"') decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring) @@ -295,13 +295,13 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): unencrypted_cookies += 1 jar.set_cookie(cookie) if failed_cookies > 0: - failed_message = ' ({} could not be decrypted)'.format(failed_cookies) + failed_message = f' ({failed_cookies} could not be decrypted)' else: failed_message = '' - logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message)) + logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}') counts = decryptor.cookie_counts.copy() counts['unencrypted'] = unencrypted_cookies - logger.debug('cookie version breakdown: {}'.format(counts)) + logger.debug(f'cookie version breakdown: {counts}') return jar finally: if cursor is not None: @@ -492,7 +492,7 @@ def _extract_safari_cookies(profile, logger): if profile is not None: logger.error('safari does not support profiles') if sys.platform != 'darwin': - raise ValueError('unsupported platform: {}'.format(sys.platform)) + raise ValueError(f'unsupported platform: {sys.platform}') cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') @@ -506,7 +506,7 @@ def _extract_safari_cookies(profile, logger): cookies_data = f.read() jar = parse_safari_cookies(cookies_data, logger=logger) - logger.info('Extracted {} cookies from safari'.format(len(jar))) + logger.info(f'Extracted {len(jar)} cookies from safari') return jar @@ -522,7 +522,7 @@ class DataParser: def read_bytes(self, num_bytes): if num_bytes < 0: - raise ParserError('invalid read of {} bytes'.format(num_bytes)) + raise ParserError(f'invalid read of {num_bytes} bytes') end = self.cursor + num_bytes if end > len(self._data): raise ParserError('reached end of input') @@ -533,7 +533,7 @@ class DataParser: def expect_bytes(self, expected_value, message): value = self.read_bytes(len(expected_value)) if value != expected_value: - raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message)) + raise ParserError(f'unexpected value: {value} != {expected_value} ({message})') def read_uint(self, big_endian=False): data_format = '>I' if big_endian else ' file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL): if not fatal: @@ -486,4 +484,4 @@ class FileDownloader(object): if exe is None: exe = os.path.basename(str_args[0]) - self.write_debug('%s command line: %s' % (exe, shell_quote(str_args))) + self.write_debug(f'{exe} command line: {shell_quote(str_args)}') diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index a845ee7d3..64eb5e66a 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -1,4 +1,3 @@ -from __future__ import unicode_literals import time from ..downloader import get_suitable_downloader @@ -46,7 +45,7 @@ class DashSegmentsFD(FragmentFD): if real_downloader: self.to_screen( - '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') info_dict['fragments'] = list(fragments_to_download) fd = real_downloader(self.ydl, self.params) return fd.real_download(filename, info_dict) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 71af705ea..b6dd32701 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import os.path import re import subprocess @@ -56,7 +54,7 @@ class ExternalFD(FragmentFD): } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) + self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -157,7 +155,7 @@ class ExternalFD(FragmentFD): fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) try: src, _ = self.sanitize_open(fragment_filename, 'rb') - except IOError as err: + except OSError as err: if skip_unavailable_fragments and frag_index > 1: self.report_skip_fragment(frag_index, err) continue @@ -179,7 +177,7 @@ class CurlFD(ExternalFD): cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--header', f'{key}: {val}'] cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._valueless_option('--silent', 'noprogress') @@ -216,7 +214,7 @@ class AxelFD(ExternalFD): cmd = [self.exe, '-o', tmpfilename] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['-H', '%s: %s' % (key, val)] + cmd += ['-H', f'{key}: {val}'] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -229,7 +227,7 @@ class WgetFD(ExternalFD): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--header', f'{key}: {val}'] cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--tries', 'retries') if len(retry) == 2: @@ -240,7 +238,7 @@ class WgetFD(ExternalFD): proxy = self.params.get('proxy') if proxy: for var in ('http_proxy', 'https_proxy'): - cmd += ['--execute', '%s=%s' % (var, proxy)] + cmd += ['--execute', f'{var}={proxy}'] cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] @@ -271,7 +269,7 @@ class Aria2cFD(ExternalFD): if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--header', f'{key}: {val}'] cmd += self._option('--max-overall-download-limit', 'ratelimit') cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') @@ -289,10 +287,10 @@ class Aria2cFD(ExternalFD): dn = os.path.dirname(tmpfilename) if dn: if not os.path.isabs(dn): - dn = '.%s%s' % (os.path.sep, dn) + dn = f'.{os.path.sep}{dn}' cmd += ['--dir', dn + os.path.sep] if 'fragments' not in info_dict: - cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))] + cmd += ['--out', f'.{os.path.sep}{os.path.basename(tmpfilename)}'] cmd += ['--auto-file-renaming=false'] if 'fragments' in info_dict: @@ -320,7 +318,7 @@ class HttpieFD(ExternalFD): if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['%s:%s' % (key, val)] + cmd += [f'{key}:{val}'] return cmd @@ -393,7 +391,7 @@ class FFmpegFD(ExternalFD): headers = handle_youtubedl_headers(info_dict['http_headers']) args += [ '-headers', - ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] + ''.join(f'{key}: {val}\r\n' for key, val in headers.items())] env = None proxy = self.params.get('proxy') diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 0008b7c28..414071075 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -1,5 +1,3 @@ -from __future__ import division, unicode_literals - import io import itertools import time diff --git a/yt_dlp/downloader/fc2.py b/yt_dlp/downloader/fc2.py index 157bcf23e..d503aac04 100644 --- a/yt_dlp/downloader/fc2.py +++ b/yt_dlp/downloader/fc2.py @@ -1,5 +1,3 @@ -from __future__ import division, unicode_literals - import threading from .common import FileDownloader diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index c45a8a476..217b89e3f 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -1,5 +1,3 @@ -from __future__ import division, unicode_literals - import http.client import json import math @@ -172,8 +170,7 @@ class FragmentFD(FileDownloader): total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' - self.to_screen( - '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') self.report_destination(ctx['filename']) dl = HttpQuietDownloader( self.ydl, @@ -342,8 +339,7 @@ class FragmentFD(FileDownloader): total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' - self.to_screen( - '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') tmpfilename = self.temp_name(ctx['filename']) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index e932fd6ae..00695f93f 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import io import binascii @@ -102,8 +100,7 @@ class HlsFD(FragmentFD): if real_downloader and not real_downloader.supports_manifest(s): real_downloader = None if real_downloader: - self.to_screen( - '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index a232168fa..03efbf1cd 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import os import ssl import time @@ -221,10 +219,12 @@ class HttpFD(FileDownloader): min_data_len = self.params.get('min_filesize') max_data_len = self.params.get('max_filesize') if min_data_len is not None and data_len < min_data_len: - self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + self.to_screen( + f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.') return False if max_data_len is not None and data_len > max_data_len: - self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + self.to_screen( + f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.') return False byte_counter = 0 + ctx.resume_len @@ -265,7 +265,7 @@ class HttpFD(FileDownloader): assert ctx.stream is not None ctx.filename = self.undo_temp_name(ctx.tmpfilename) self.report_destination(ctx.filename) - except (OSError, IOError) as err: + except OSError as err: self.report_error('unable to open for writing: %s' % str(err)) return False @@ -277,7 +277,7 @@ class HttpFD(FileDownloader): try: ctx.stream.write(data_block) - except (IOError, OSError) as err: + except OSError as err: self.to_stderr('\n') self.report_error('unable to write data: %s' % str(err)) return False diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index 2ba36085e..ca4ca3a19 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import time import binascii import io diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index 54e711792..5a322f1db 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import io import quopri import re diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 521dfece3..0e6c177b7 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import threading from .common import FileDownloader diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index 90f1acfd4..12aa04cf3 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import os import re import subprocess diff --git a/yt_dlp/downloader/rtsp.py b/yt_dlp/downloader/rtsp.py index 7815d59d9..26dbd9ef7 100644 --- a/yt_dlp/downloader/rtsp.py +++ b/yt_dlp/downloader/rtsp.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import os import subprocess @@ -32,7 +30,7 @@ class RtspFD(FileDownloader): retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) + self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index cfca686ee..36c82b03b 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -1,5 +1,3 @@ -from __future__ import division, unicode_literals - import json import time diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 6fe195e82..03f10ab23 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import hashlib import hmac import re diff --git a/yt_dlp/extractor/abcnews.py b/yt_dlp/extractor/abcnews.py index 296b8cec1..a57295b13 100644 --- a/yt_dlp/extractor/abcnews.py +++ b/yt_dlp/extractor/abcnews.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .amp import AMPIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 5bff46634..44a9f8ca5 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/academicearth.py b/yt_dlp/extractor/academicearth.py index 34095501c..d9691cb5c 100644 --- a/yt_dlp/extractor/academicearth.py +++ b/yt_dlp/extractor/academicearth.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/acast.py b/yt_dlp/extractor/acast.py index 63587c5cf..f2f828f8e 100644 --- a/yt_dlp/extractor/acast.py +++ b/yt_dlp/extractor/acast.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index fca6e605d..b47345e3c 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import binascii import json diff --git a/yt_dlp/extractor/adobeconnect.py b/yt_dlp/extractor/adobeconnect.py index e2e6f93f3..8963b128a 100644 --- a/yt_dlp/extractor/adobeconnect.py +++ b/yt_dlp/extractor/adobeconnect.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 1292484c6..1bdc8587c 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re import time diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index 3cfa1ff55..941254243 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/adultswim.py b/yt_dlp/extractor/adultswim.py index c97cfc161..1368954bc 100644 --- a/yt_dlp/extractor/adultswim.py +++ b/yt_dlp/extractor/adultswim.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .turner import TurnerBaseIE diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index 8025de5a3..86a10f2dc 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .theplatform import ThePlatformIE from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 44bfb8bc2..b0fd158f6 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/airmozilla.py b/yt_dlp/extractor/airmozilla.py index 9e38136b4..669556b98 100644 --- a/yt_dlp/extractor/airmozilla.py +++ b/yt_dlp/extractor/airmozilla.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py index 9722fe9ac..2e83f2eb6 100644 --- a/yt_dlp/extractor/aliexpress.py +++ b/yt_dlp/extractor/aliexpress.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index 7bcdb7afb..124bab0d9 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 403a277e9..1f881e2a0 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index 3a6d99f6b..8d5b472d3 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_iso8601, diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index 4aae6fe74..d16ab496e 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index d2e2df270..b76ccb2a1 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 61d469574..5018710e0 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE from .vimeo import VimeoIE diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 07b1b1861..de4917adc 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index e38e215d3..e04ecf65f 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .theplatform import ThePlatformIE diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py index 6e6099a03..f5747cf1e 100644 --- a/yt_dlp/extractor/americastestkitchen.py +++ b/yt_dlp/extractor/americastestkitchen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 24c684cad..73b72b085 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/animelab.py b/yt_dlp/extractor/animelab.py index 1c2cc47dd..cd0d77805 100644 --- a/yt_dlp/extractor/animelab.py +++ b/yt_dlp/extractor/animelab.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/animeondemand.py b/yt_dlp/extractor/animeondemand.py index 2e674d58f..de49db4ea 100644 --- a/yt_dlp/extractor/animeondemand.py +++ b/yt_dlp/extractor/animeondemand.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py index 1075b461e..cd0f36856 100644 --- a/yt_dlp/extractor/ant1newsgr.py +++ b/yt_dlp/extractor/ant1newsgr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import urllib.parse diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 0d444fc33..28fbd606e 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import hashlib import json diff --git a/yt_dlp/extractor/anvato_token_generator/__init__.py b/yt_dlp/extractor/anvato_token_generator/__init__.py index 6e223db9f..6530caf53 100644 --- a/yt_dlp/extractor/anvato_token_generator/__init__.py +++ b/yt_dlp/extractor/anvato_token_generator/__init__.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .nfl import NFLTokenGenerator __all__ = [ diff --git a/yt_dlp/extractor/anvato_token_generator/common.py b/yt_dlp/extractor/anvato_token_generator/common.py index b959a903b..3800b5808 100644 --- a/yt_dlp/extractor/anvato_token_generator/common.py +++ b/yt_dlp/extractor/anvato_token_generator/common.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - class TokenGenerator: def generate(self, anvack, mcp_id): raise NotImplementedError('This method must be implemented by subclasses') diff --git a/yt_dlp/extractor/anvato_token_generator/nfl.py b/yt_dlp/extractor/anvato_token_generator/nfl.py index 97a2b245f..9ee4aa002 100644 --- a/yt_dlp/extractor/anvato_token_generator/nfl.py +++ b/yt_dlp/extractor/anvato_token_generator/nfl.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import TokenGenerator diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 4766a2c77..b67db2adc 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .yahoo import YahooIE diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index 1736cdf56..847be6edf 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py index 1057233cf..cd6cd1c79 100644 --- a/yt_dlp/extractor/aparat.py +++ b/yt_dlp/extractor/aparat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( get_element_by_id, diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py index 494f8330c..d00b0f906 100644 --- a/yt_dlp/extractor/appleconnect.py +++ b/yt_dlp/extractor/appleconnect.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( str_to_int, diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index 9139ff777..49bbeab82 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 8140e332b..6b63f070d 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 2ab3c1beb..c85d5297d 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json from .common import InfoExtractor @@ -479,7 +476,7 @@ class YoutubeWebArchiveIE(InfoExtractor): def _extract_yt_initial_variable(self, webpage, regex, video_id, name): return self._parse_json(self._search_regex( - (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), + (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', regex), webpage, name, default='{}'), video_id, fatal=False) def _extract_webpage_title(self, webpage): @@ -597,7 +594,7 @@ class YoutubeWebArchiveIE(InfoExtractor): response = self._call_cdx_api( video_id, f'https://www.youtube.com/watch?v={video_id}', filters=['mimetype:text/html'], collapse=['timestamp:6', 'digest'], query={'matchType': 'prefix'}) or [] - all_captures = sorted([int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None]) + all_captures = sorted(int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None) # Prefer the new polymer UI captures as we support extracting more metadata from them # WBM captures seem to all switch to this layout ~July 2020 diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index 8880e5c95..2e3f3cc5f 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 7ea339b39..f294679ef 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index 4f4f457c1..9da2bfd5e 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 050c252e3..96b134fa0 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index c2f2c1bd3..443b0d4b9 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/asiancrush.py b/yt_dlp/extractor/asiancrush.py index 7f1940fca..23f310edb 100644 --- a/yt_dlp/extractor/asiancrush.py +++ b/yt_dlp/extractor/asiancrush.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 465af4ed3..39d1f1cc5 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/atttechchannel.py b/yt_dlp/extractor/atttechchannel.py index 8f93fb353..6ff4ec0ad 100644 --- a/yt_dlp/extractor/atttechchannel.py +++ b/yt_dlp/extractor/atttechchannel.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 481a09737..2311837e9 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime from .common import InfoExtractor diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index 6bd48ef15..c1c4f67d0 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py index c51837b40..dc19a3874 100644 --- a/yt_dlp/extractor/audioboom.py +++ b/yt_dlp/extractor/audioboom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index 19775cf0f..5c4160fe4 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import time diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index fa64995d5..189d1224f 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index f5e559c9f..d289f6be3 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 from .common import InfoExtractor diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index dccfeaf73..c2b22922b 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import hashlib import hmac diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index 0168340b9..d1686eed6 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/baidu.py b/yt_dlp/extractor/baidu.py index 364fd9459..8786d67e0 100644 --- a/yt_dlp/extractor/baidu.py +++ b/yt_dlp/extractor/baidu.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import unescapeHTML diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index 3d4d36ec3..92f567c5d 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import math from .common import InfoExtractor diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py index f1bcdef7a..2e3233376 100644 --- a/yt_dlp/extractor/bandaichannel.py +++ b/yt_dlp/extractor/bandaichannel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from ..utils import extract_attributes diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 745055e2d..5863eaeca 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import time diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 3db1151f6..ec9bdd8ca 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 5bc8d3110..9cb019a49 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import xml.etree.ElementTree import functools import itertools diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index e1cf8b4fe..f71f1f308 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index 717fff3a6..5957e370a 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py index 2c97f9817..ca4498150 100644 --- a/yt_dlp/extractor/behindkink.py +++ b/yt_dlp/extractor/behindkink.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import url_basename diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py index 904c17ed0..8f9849d9b 100644 --- a/yt_dlp/extractor/bellmedia.py +++ b/yt_dlp/extractor/bellmedia.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index 2c7144235..6b867d135 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/bfi.py b/yt_dlp/extractor/bfi.py index 60c8944b5..76f0516a4 100644 --- a/yt_dlp/extractor/bfi.py +++ b/yt_dlp/extractor/bfi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index 501f69d80..48526e38b 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bibeltv.py b/yt_dlp/extractor/bibeltv.py index 56c2bfee8..fd20aadad 100644 --- a/yt_dlp/extractor/bibeltv.py +++ b/yt_dlp/extractor/bibeltv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 28e3e59f6..6b2797ca0 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index ddf76ac55..f39e15002 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, urlencode_postdata diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py index b8dfbd42b..f3dea33c4 100644 --- a/yt_dlp/extractor/bild.py +++ b/yt_dlp/extractor/bild.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a9574758c..eb2dcb024 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import base64 import hashlib import itertools diff --git a/yt_dlp/extractor/biobiochiletv.py b/yt_dlp/extractor/biobiochiletv.py index dc86c57c5..180c9656e 100644 --- a/yt_dlp/extractor/biobiochiletv.py +++ b/yt_dlp/extractor/biobiochiletv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/biqle.py b/yt_dlp/extractor/biqle.py index 2b57bade3..3a4234491 100644 --- a/yt_dlp/extractor/biqle.py +++ b/yt_dlp/extractor/biqle.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .vk import VKIE from ..compat import compat_b64decode diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index dcae6f4cc..c831092d4 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/bitwave.py b/yt_dlp/extractor/bitwave.py index e6e093f59..bd8eac1f1 100644 --- a/yt_dlp/extractor/bitwave.py +++ b/yt_dlp/extractor/bitwave.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 8ae294198..8f41c897a 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import parse_iso8601 diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index d1bf8e829..8d8fabe33 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .amp import AMPIE from ..utils import ( diff --git a/yt_dlp/extractor/blinkx.py b/yt_dlp/extractor/blinkx.py index d70a3b30f..80531ccad 100644 --- a/yt_dlp/extractor/blinkx.py +++ b/yt_dlp/extractor/blinkx.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index dba131cb0..d7aa7f94e 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from ..utils import ( diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index 2fbfad1ba..c0aaeae02 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py index 6a89d36f4..0c081750e 100644 --- a/yt_dlp/extractor/bokecc.py +++ b/yt_dlp/extractor/bokecc.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ExtractorError diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index 4e346e7b6..cbef0fc53 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 57882fbee..92f8ea2cb 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 8214086a6..5842de88a 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/bpb.py b/yt_dlp/extractor/bpb.py index 98491975c..388f1f94f 100644 --- a/yt_dlp/extractor/bpb.py +++ b/yt_dlp/extractor/bpb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 0155827d8..faac442e8 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py index 139d51c09..d4895848e 100644 --- a/yt_dlp/extractor/bravotv.py +++ b/yt_dlp/extractor/bravotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .adobepass import AdobePassIE diff --git a/yt_dlp/extractor/breakcom.py b/yt_dlp/extractor/breakcom.py index f38789f99..51c8c822f 100644 --- a/yt_dlp/extractor/breakcom.py +++ b/yt_dlp/extractor/breakcom.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index e029aa627..a2b04fcce 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 60c853898..936c34e15 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import re import struct diff --git a/yt_dlp/extractor/businessinsider.py b/yt_dlp/extractor/businessinsider.py index 73a57b1e4..4b3f5e68b 100644 --- a/yt_dlp/extractor/businessinsider.py +++ b/yt_dlp/extractor/businessinsider.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .jwplatform import JWPlatformIE diff --git a/yt_dlp/extractor/buzzfeed.py b/yt_dlp/extractor/buzzfeed.py index ec411091e..1b4cba63e 100644 --- a/yt_dlp/extractor/buzzfeed.py +++ b/yt_dlp/extractor/buzzfeed.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py index f4d5086ed..eca2e294e 100644 --- a/yt_dlp/extractor/byutv.py +++ b/yt_dlp/extractor/byutv.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/c56.py b/yt_dlp/extractor/c56.py index a853c530c..1d98ea598 100644 --- a/yt_dlp/extractor/c56.py +++ b/yt_dlp/extractor/c56.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py index 77efdf45a..3200b5677 100644 --- a/yt_dlp/extractor/cableav.py +++ b/yt_dlp/extractor/cableav.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index 1f3b7cfff..fc5da7028 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/caltrans.py b/yt_dlp/extractor/caltrans.py index 9ac740f7e..e52dfb170 100644 --- a/yt_dlp/extractor/caltrans.py +++ b/yt_dlp/extractor/caltrans.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py index 2a3931fd0..4256b28e0 100644 --- a/yt_dlp/extractor/cam4.py +++ b/yt_dlp/extractor/cam4.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/camdemy.py b/yt_dlp/extractor/camdemy.py index 8f0c6c545..c7079e422 100644 --- a/yt_dlp/extractor/camdemy.py +++ b/yt_dlp/extractor/camdemy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cammodels.py b/yt_dlp/extractor/cammodels.py index 3dc19377b..32fbffcc2 100644 --- a/yt_dlp/extractor/cammodels.py +++ b/yt_dlp/extractor/cammodels.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/camwithher.py b/yt_dlp/extractor/camwithher.py index bbc5205fd..a0b3749ed 100644 --- a/yt_dlp/extractor/camwithher.py +++ b/yt_dlp/extractor/camwithher.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 0365cb2f6..f2ec9355f 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/canalc2.py b/yt_dlp/extractor/canalc2.py index 407cc8084..c9bb94c40 100644 --- a/yt_dlp/extractor/canalc2.py +++ b/yt_dlp/extractor/canalc2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/canalplus.py b/yt_dlp/extractor/canalplus.py index 211ea267a..b184398e2 100644 --- a/yt_dlp/extractor/canalplus.py +++ b/yt_dlp/extractor/canalplus.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( # ExtractorError, diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py index 8b9903774..8eff4a57c 100644 --- a/yt_dlp/extractor/canvas.py +++ b/yt_dlp/extractor/canvas.py @@ -1,4 +1,3 @@ -from __future__ import unicode_literals import json diff --git a/yt_dlp/extractor/carambatv.py b/yt_dlp/extractor/carambatv.py index 7e5cc90fb..087ea8aa0 100644 --- a/yt_dlp/extractor/carambatv.py +++ b/yt_dlp/extractor/carambatv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/cartoonnetwork.py b/yt_dlp/extractor/cartoonnetwork.py index 48b33617f..4dd7ac46d 100644 --- a/yt_dlp/extractor/cartoonnetwork.py +++ b/yt_dlp/extractor/cartoonnetwork.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .turner import TurnerBaseIE from ..utils import int_or_none diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index fba8bf965..cac3f1e9d 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json import base64 diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index 2af36ea82..e32539c9e 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .theplatform import ThePlatformFeedIE from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/cbsinteractive.py b/yt_dlp/extractor/cbsinteractive.py index 9d4f75435..7abeecf78 100644 --- a/yt_dlp/extractor/cbsinteractive.py +++ b/yt_dlp/extractor/cbsinteractive.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .cbs import CBSIE from ..utils import int_or_none diff --git a/yt_dlp/extractor/cbslocal.py b/yt_dlp/extractor/cbslocal.py index 3b7e1a8b9..c6495c95f 100644 --- a/yt_dlp/extractor/cbslocal.py +++ b/yt_dlp/extractor/cbslocal.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .anvato import AnvatoIE from .sendtonews import SendtoNewsIE from ..compat import compat_urlparse diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py index 1285ed65e..76925b4f9 100644 --- a/yt_dlp/extractor/cbsnews.py +++ b/yt_dlp/extractor/cbsnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import zlib diff --git a/yt_dlp/extractor/cbssports.py b/yt_dlp/extractor/cbssports.py index b8a6e5967..56a255149 100644 --- a/yt_dlp/extractor/cbssports.py +++ b/yt_dlp/extractor/cbssports.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - # from .cbs import CBSBaseIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py index 36e6dff72..b11e1f74e 100644 --- a/yt_dlp/extractor/ccc.py +++ b/yt_dlp/extractor/ccc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index 9dbaabfa0..ca739f8a1 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py index 0ed5f327b..623cbb342 100644 --- a/yt_dlp/extractor/cctv.py +++ b/yt_dlp/extractor/cctv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 72c47050f..9b257bee9 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import codecs import re import json diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index ddf66b207..331b350f1 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cgtn.py b/yt_dlp/extractor/cgtn.py index 89f173887..aaafa02d1 100644 --- a/yt_dlp/extractor/cgtn.py +++ b/yt_dlp/extractor/cgtn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/channel9.py b/yt_dlp/extractor/channel9.py index 90024dbba..90a1ab2be 100644 --- a/yt_dlp/extractor/channel9.py +++ b/yt_dlp/extractor/channel9.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/charlierose.py b/yt_dlp/extractor/charlierose.py index 42c9af263..27f8b33e5 100644 --- a/yt_dlp/extractor/charlierose.py +++ b/yt_dlp/extractor/charlierose.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import remove_end diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index 8da51f919..d39210bf7 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/chilloutzone.py b/yt_dlp/extractor/chilloutzone.py index fd5202b9e..1a2f77c4e 100644 --- a/yt_dlp/extractor/chilloutzone.py +++ b/yt_dlp/extractor/chilloutzone.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py index e6841fb8b..40613cfa3 100644 --- a/yt_dlp/extractor/chingari.py +++ b/yt_dlp/extractor/chingari.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json diff --git a/yt_dlp/extractor/chirbit.py b/yt_dlp/extractor/chirbit.py index 8d75cdf19..452711d97 100644 --- a/yt_dlp/extractor/chirbit.py +++ b/yt_dlp/extractor/chirbit.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cinchcast.py b/yt_dlp/extractor/cinchcast.py index b861d54b0..393df3698 100644 --- a/yt_dlp/extractor/cinchcast.py +++ b/yt_dlp/extractor/cinchcast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( unified_strdate, diff --git a/yt_dlp/extractor/cinemax.py b/yt_dlp/extractor/cinemax.py index 2c3ff8d4f..54cab2285 100644 --- a/yt_dlp/extractor/cinemax.py +++ b/yt_dlp/extractor/cinemax.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .hbo import HBOBaseIE diff --git a/yt_dlp/extractor/ciscolive.py b/yt_dlp/extractor/ciscolive.py index 349c5eb50..066857817 100644 --- a/yt_dlp/extractor/ciscolive.py +++ b/yt_dlp/extractor/ciscolive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 882dae91b..e1aae9bda 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/cjsw.py b/yt_dlp/extractor/cjsw.py index 1dea0d7c7..c37a3b848 100644 --- a/yt_dlp/extractor/cjsw.py +++ b/yt_dlp/extractor/cjsw.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/cliphunter.py b/yt_dlp/extractor/cliphunter.py index f2ca7a337..7e5fd3175 100644 --- a/yt_dlp/extractor/cliphunter.py +++ b/yt_dlp/extractor/cliphunter.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py index a1a7a774c..006a713b2 100644 --- a/yt_dlp/extractor/clippit.py +++ b/yt_dlp/extractor/clippit.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_iso8601, diff --git a/yt_dlp/extractor/cliprs.py b/yt_dlp/extractor/cliprs.py index d55b26d59..567f77b94 100644 --- a/yt_dlp/extractor/cliprs.py +++ b/yt_dlp/extractor/cliprs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .onet import OnetBaseIE diff --git a/yt_dlp/extractor/clipsyndicate.py b/yt_dlp/extractor/clipsyndicate.py index 6cdb42f5a..606444321 100644 --- a/yt_dlp/extractor/clipsyndicate.py +++ b/yt_dlp/extractor/clipsyndicate.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( find_xpath_attr, diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py index 517e121e0..e78e26a11 100644 --- a/yt_dlp/extractor/closertotruth.py +++ b/yt_dlp/extractor/closertotruth.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index 2fdcfbb3a..0333d5def 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import re diff --git a/yt_dlp/extractor/cloudy.py b/yt_dlp/extractor/cloudy.py index 85ca20ecc..848643e26 100644 --- a/yt_dlp/extractor/cloudy.py +++ b/yt_dlp/extractor/cloudy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( str_to_int, diff --git a/yt_dlp/extractor/clubic.py b/yt_dlp/extractor/clubic.py index 98f9cb596..ce8621296 100644 --- a/yt_dlp/extractor/clubic.py +++ b/yt_dlp/extractor/clubic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/clyp.py b/yt_dlp/extractor/clyp.py index e6b2ac4d4..c64726ca2 100644 --- a/yt_dlp/extractor/clyp.py +++ b/yt_dlp/extractor/clyp.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/cmt.py b/yt_dlp/extractor/cmt.py index a4ddb9160..4eec066dd 100644 --- a/yt_dlp/extractor/cmt.py +++ b/yt_dlp/extractor/cmt.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVIE # TODO Remove - Reason: Outdated Site diff --git a/yt_dlp/extractor/cnbc.py b/yt_dlp/extractor/cnbc.py index da3730cc8..68fd025b7 100644 --- a/yt_dlp/extractor/cnbc.py +++ b/yt_dlp/extractor/cnbc.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import smuggle_url diff --git a/yt_dlp/extractor/cnn.py b/yt_dlp/extractor/cnn.py index af11d95b4..96482eaf5 100644 --- a/yt_dlp/extractor/cnn.py +++ b/yt_dlp/extractor/cnn.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from .turner import TurnerBaseIE from ..utils import url_basename diff --git a/yt_dlp/extractor/comedycentral.py b/yt_dlp/extractor/comedycentral.py index 5a12ab5e6..05fc9f2b5 100644 --- a/yt_dlp/extractor/comedycentral.py +++ b/yt_dlp/extractor/comedycentral.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 8da21a3dc..ef22c7876 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import collections import xml.etree.ElementTree @@ -92,7 +89,7 @@ from ..utils import ( ) -class InfoExtractor(object): +class InfoExtractor: """Information Extractor class. Information extractors are the classes that, given a URL, extract @@ -628,7 +625,7 @@ class InfoExtractor(object): if country: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country) self._downloader.write_debug( - 'Using fake IP %s (%s) as X-Forwarded-For' % (self._x_forwarded_for_ip, country.upper())) + f'Using fake IP {self._x_forwarded_for_ip} ({country.upper()}) as X-Forwarded-For') def extract(self, url): """Extracts URL information and returns it in list of dicts.""" @@ -741,9 +738,9 @@ class InfoExtractor(object): self.report_download_webpage(video_id) elif note is not False: if video_id is None: - self.to_screen('%s' % (note,)) + self.to_screen(str(note)) else: - self.to_screen('%s: %s' % (video_id, note)) + self.to_screen(f'{video_id}: {note}') # Some sites check X-Forwarded-For HTTP header in order to figure out # the origin of the client behind proxy. This allows bypassing geo @@ -779,7 +776,7 @@ class InfoExtractor(object): if errnote is None: errnote = 'Unable to download webpage' - errmsg = '%s: %s' % (errnote, error_to_compat_str(err)) + errmsg = f'{errnote}: {error_to_compat_str(err)}' if fatal: raise ExtractorError(errmsg, cause=err) else: @@ -860,7 +857,7 @@ class InfoExtractor(object): dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) if self.get_param('write_pages', False): - basen = '%s_%s' % (video_id, urlh.geturl()) + basen = f'{video_id}_{urlh.geturl()}' trim_length = self.get_param('trim_file_name') or 240 if len(basen) > trim_length: h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() @@ -1098,10 +1095,10 @@ class InfoExtractor(object): def to_screen(self, msg, *args, **kwargs): """Print msg to screen, prefixing it with '[ie_name]'""" - self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs) + self._downloader.to_screen(f'[{self.IE_NAME}] {msg}', *args, **kwargs) def write_debug(self, msg, *args, **kwargs): - self._downloader.write_debug('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs) + self._downloader.write_debug(f'[{self.IE_NAME}] {msg}', *args, **kwargs) def get_param(self, name, default=None, *args, **kwargs): if self._downloader: @@ -1138,7 +1135,7 @@ class InfoExtractor(object): method = 'any' if self.supports_login() else 'cookies' if method is not None: assert method in self._LOGIN_HINTS, 'Invalid login method' - msg = '%s. %s' % (msg, self._LOGIN_HINTS[method]) + msg = f'{msg}. {self._LOGIN_HINTS[method]}' raise ExtractorError(msg, expected=True) def raise_geo_restricted( @@ -1257,7 +1254,7 @@ class InfoExtractor(object): else: raise netrc.NetrcParseError( 'No authenticators for %s' % netrc_machine) - except (IOError, netrc.NetrcParseError) as err: + except (OSError, netrc.NetrcParseError) as err: self.report_warning( 'parsing .netrc: %s' % error_to_compat_str(err)) @@ -3333,7 +3330,7 @@ class InfoExtractor(object): http_f = f.copy() del http_f['manifest_url'] http_url = re.sub( - REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url']) + REPL_REGEX, protocol + fr'://{http_host}/\g<1>{qualities[i]}\3', f['url']) http_f.update({ 'format_id': http_f['format_id'].replace('hls-', protocol + '-'), 'url': http_url, @@ -3354,7 +3351,7 @@ class InfoExtractor(object): formats = [] def manifest_url(manifest): - m_url = '%s/%s' % (http_base_url, manifest) + m_url = f'{http_base_url}/{manifest}' if query: m_url += '?%s' % query return m_url @@ -3391,7 +3388,7 @@ class InfoExtractor(object): for protocol in ('rtmp', 'rtsp'): if protocol not in skip_protocols: formats.append({ - 'url': '%s:%s' % (protocol, url_base), + 'url': f'{protocol}:{url_base}', 'format_id': protocol, 'protocol': protocol, }) @@ -3557,7 +3554,7 @@ class InfoExtractor(object): def _int(self, v, name, fatal=False, **kwargs): res = int_or_none(v, **kwargs) if res is None: - msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + msg = f'Failed to extract {name}: Could not parse value {v!r}' if fatal: raise ExtractorError(msg) else: @@ -3567,7 +3564,7 @@ class InfoExtractor(object): def _float(self, v, name, fatal=False, **kwargs): res = float_or_none(v, **kwargs) if res is None: - msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + msg = f'Failed to extract {name}: Could not parse value {v!r}' if fatal: raise ExtractorError(msg) else: @@ -3685,7 +3682,7 @@ class InfoExtractor(object): def _merge_subtitle_items(subtitle_list1, subtitle_list2): """ Merge subtitle items for one language. Items with duplicated URLs/data will be dropped. """ - list1_data = set((item.get('url'), item.get('data')) for item in subtitle_list1) + list1_data = {(item.get('url'), item.get('data')) for item in subtitle_list1} ret = list(subtitle_list1) ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data) return ret @@ -3798,7 +3795,7 @@ class SearchInfoExtractor(InfoExtractor): else: n = int(prefix) if n <= 0: - raise ExtractorError('invalid download number %s for query "%s"' % (n, query)) + raise ExtractorError(f'invalid download number {n} for query "{query}"') elif n > self._MAX_RESULTS: self.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) n = self._MAX_RESULTS diff --git a/yt_dlp/extractor/commonmistakes.py b/yt_dlp/extractor/commonmistakes.py index e0a9f5956..62bd51fd7 100644 --- a/yt_dlp/extractor/commonmistakes.py +++ b/yt_dlp/extractor/commonmistakes.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py index 3708c6ad2..40475f7ec 100644 --- a/yt_dlp/extractor/commonprotocols.py +++ b/yt_dlp/extractor/commonprotocols.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_urlparse, diff --git a/yt_dlp/extractor/condenast.py b/yt_dlp/extractor/condenast.py index 54e7af8b0..cf6e40cb8 100644 --- a/yt_dlp/extractor/condenast.py +++ b/yt_dlp/extractor/condenast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/contv.py b/yt_dlp/extractor/contv.py index 84b462d40..50648a536 100644 --- a/yt_dlp/extractor/contv.py +++ b/yt_dlp/extractor/contv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index 119461375..7b83c0390 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .theplatform import ThePlatformFeedIE from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/coub.py b/yt_dlp/extractor/coub.py index e90aa1954..b462acaf0 100644 --- a/yt_dlp/extractor/coub.py +++ b/yt_dlp/extractor/coub.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py index d49f1ca74..5ef5afcc2 100644 --- a/yt_dlp/extractor/cozytv.py +++ b/yt_dlp/extractor/cozytv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/cpac.py b/yt_dlp/extractor/cpac.py index 22741152c..e8975e5e2 100644 --- a/yt_dlp/extractor/cpac.py +++ b/yt_dlp/extractor/cpac.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/cracked.py b/yt_dlp/extractor/cracked.py index f77a68ece..c6aabccc6 100644 --- a/yt_dlp/extractor/cracked.py +++ b/yt_dlp/extractor/cracked.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py index db4962c42..319374f3b 100644 --- a/yt_dlp/extractor/crackle.py +++ b/yt_dlp/extractor/crackle.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals, division - import hashlib import hmac import re diff --git a/yt_dlp/extractor/craftsy.py b/yt_dlp/extractor/craftsy.py index ed2f4420e..307bfb946 100644 --- a/yt_dlp/extractor/craftsy.py +++ b/yt_dlp/extractor/craftsy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor diff --git a/yt_dlp/extractor/crooksandliars.py b/yt_dlp/extractor/crooksandliars.py index 7fb782db7..c831a3ae0 100644 --- a/yt_dlp/extractor/crooksandliars.py +++ b/yt_dlp/extractor/crooksandliars.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/crowdbunker.py b/yt_dlp/extractor/crowdbunker.py index 72906afef..75d90b5c5 100644 --- a/yt_dlp/extractor/crowdbunker.py +++ b/yt_dlp/extractor/crowdbunker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index d7696bbd9..bb1dbbaad 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import re import json diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index f51159bbe..cb1523617 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py index 679f1d92e..cec178f03 100644 --- a/yt_dlp/extractor/ctsnews.py +++ b/yt_dlp/extractor/ctsnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_timestamp from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/ctv.py b/yt_dlp/extractor/ctv.py index 756bcc2be..f125c1ce9 100644 --- a/yt_dlp/extractor/ctv.py +++ b/yt_dlp/extractor/ctv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/ctvnews.py b/yt_dlp/extractor/ctvnews.py index 952f4c747..ad3f0d8e4 100644 --- a/yt_dlp/extractor/ctvnews.py +++ b/yt_dlp/extractor/ctvnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cultureunplugged.py b/yt_dlp/extractor/cultureunplugged.py index 9002e4cef..2fb22800f 100644 --- a/yt_dlp/extractor/cultureunplugged.py +++ b/yt_dlp/extractor/cultureunplugged.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import time from .common import InfoExtractor diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index b8abcf7a5..5b76b29ff 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py index 73382431b..07239f39c 100644 --- a/yt_dlp/extractor/cwtv.py +++ b/yt_dlp/extractor/cwtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/cybrary.py b/yt_dlp/extractor/cybrary.py index c278f0fe0..7da581828 100644 --- a/yt_dlp/extractor/cybrary.py +++ b/yt_dlp/extractor/cybrary.py @@ -1,5 +1,4 @@ -# coding: utf-8 -from .common import InfoExtractor +from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/daftsex.py b/yt_dlp/extractor/daftsex.py index 6037fd9ca..0fe014f76 100644 --- a/yt_dlp/extractor/daftsex.py +++ b/yt_dlp/extractor/daftsex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index 67b88fd56..5451dbf00 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index 9cb56185b..3b090d5e0 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json import re diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py index 456cd35a4..962d9741b 100644 --- a/yt_dlp/extractor/damtomo.py +++ b/yt_dlp/extractor/damtomo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/daum.py b/yt_dlp/extractor/daum.py index 4362e92cb..a1f197b0b 100644 --- a/yt_dlp/extractor/daum.py +++ b/yt_dlp/extractor/daum.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/dbtv.py b/yt_dlp/extractor/dbtv.py index 8e73176a6..2beccd8b5 100644 --- a/yt_dlp/extractor/dbtv.py +++ b/yt_dlp/extractor/dbtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dctp.py b/yt_dlp/extractor/dctp.py index e700f8d86..24bb6aca2 100644 --- a/yt_dlp/extractor/dctp.py +++ b/yt_dlp/extractor/dctp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/deezer.py b/yt_dlp/extractor/deezer.py index 7ba02e552..bee1c7501 100644 --- a/yt_dlp/extractor/deezer.py +++ b/yt_dlp/extractor/deezer.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/defense.py b/yt_dlp/extractor/defense.py index 9fe144e14..7d73ea862 100644 --- a/yt_dlp/extractor/defense.py +++ b/yt_dlp/extractor/defense.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py index 5c9c0ecdc..af327e6c6 100644 --- a/yt_dlp/extractor/democracynow.py +++ b/yt_dlp/extractor/democracynow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import os.path diff --git a/yt_dlp/extractor/dfb.py b/yt_dlp/extractor/dfb.py index 97f70fc7b..5aca72988 100644 --- a/yt_dlp/extractor/dfb.py +++ b/yt_dlp/extractor/dfb.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/dhm.py b/yt_dlp/extractor/dhm.py index aee72a6ed..3d42fc2b0 100644 --- a/yt_dlp/extractor/dhm.py +++ b/yt_dlp/extractor/dhm.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_duration diff --git a/yt_dlp/extractor/digg.py b/yt_dlp/extractor/digg.py index 913c1750f..86e8a6fac 100644 --- a/yt_dlp/extractor/digg.py +++ b/yt_dlp/extractor/digg.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 8398ae30e..c891ad0a6 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/digiteka.py b/yt_dlp/extractor/digiteka.py index d63204778..5d244cb08 100644 --- a/yt_dlp/extractor/digiteka.py +++ b/yt_dlp/extractor/digiteka.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py index fd3ad75c7..fd3fc8fb0 100644 --- a/yt_dlp/extractor/discovery.py +++ b/yt_dlp/extractor/discovery.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import random import string diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py index 9e7b14a7d..7b4278c88 100644 --- a/yt_dlp/extractor/discoverygo.py +++ b/yt_dlp/extractor/discoverygo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/discoveryvr.py b/yt_dlp/extractor/discoveryvr.py index cb63c2649..a021d986e 100644 --- a/yt_dlp/extractor/discoveryvr.py +++ b/yt_dlp/extractor/discoveryvr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_duration diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py index 0ad7b1f46..f9af59a57 100644 --- a/yt_dlp/extractor/disney.py +++ b/yt_dlp/extractor/disney.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dispeak.py b/yt_dlp/extractor/dispeak.py index 3d651f3ab..d4f3324e7 100644 --- a/yt_dlp/extractor/dispeak.py +++ b/yt_dlp/extractor/dispeak.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dlive.py b/yt_dlp/extractor/dlive.py index 7410eb6c8..31b4a568f 100644 --- a/yt_dlp/extractor/dlive.py +++ b/yt_dlp/extractor/dlive.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/doodstream.py b/yt_dlp/extractor/doodstream.py index f692127c2..f1001c778 100644 --- a/yt_dlp/extractor/doodstream.py +++ b/yt_dlp/extractor/doodstream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import string import random import time diff --git a/yt_dlp/extractor/dotsub.py b/yt_dlp/extractor/dotsub.py index 148605c0b..079f83750 100644 --- a/yt_dlp/extractor/dotsub.py +++ b/yt_dlp/extractor/dotsub.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index 26a8d645c..477f4687c 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import time import hashlib import re diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index a25f27c3a..54f95a44a 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import uuid diff --git a/yt_dlp/extractor/drbonanza.py b/yt_dlp/extractor/drbonanza.py index ea0f06d3d..dca8c89d0 100644 --- a/yt_dlp/extractor/drbonanza.py +++ b/yt_dlp/extractor/drbonanza.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( js_to_json, diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py index 5a07c18f4..80a724607 100644 --- a/yt_dlp/extractor/dreisat.py +++ b/yt_dlp/extractor/dreisat.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .zdf import ZDFIE diff --git a/yt_dlp/extractor/drooble.py b/yt_dlp/extractor/drooble.py index 058425095..106e5c457 100644 --- a/yt_dlp/extractor/drooble.py +++ b/yt_dlp/extractor/drooble.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index 2559657ad..6ac0c713a 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import os.path import re diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index 2fa61950c..475825eb8 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from .vimeo import VHXEmbedIE from ..utils import ( diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index 540b86a16..3149e319f 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py index 37e4d5b26..843e93072 100644 --- a/yt_dlp/extractor/drtv.py +++ b/yt_dlp/extractor/drtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import binascii import hashlib import re diff --git a/yt_dlp/extractor/dtube.py b/yt_dlp/extractor/dtube.py index ad247b7dd..25a98f625 100644 --- a/yt_dlp/extractor/dtube.py +++ b/yt_dlp/extractor/dtube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from socket import timeout diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py index a87597873..24403842d 100644 --- a/yt_dlp/extractor/duboku.py +++ b/yt_dlp/extractor/duboku.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dumpert.py b/yt_dlp/extractor/dumpert.py index d9d9afdec..dc61115ff 100644 --- a/yt_dlp/extractor/dumpert.py +++ b/yt_dlp/extractor/dumpert.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index 08663cffb..61d469f11 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py index 6eaee07b4..ee2365ddd 100644 --- a/yt_dlp/extractor/dw.py +++ b/yt_dlp/extractor/dw.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py index f86731a0c..e2ecd4b7c 100644 --- a/yt_dlp/extractor/eagleplatform.py +++ b/yt_dlp/extractor/eagleplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ebaumsworld.py b/yt_dlp/extractor/ebaumsworld.py index c97682cd3..0854d0344 100644 --- a/yt_dlp/extractor/ebaumsworld.py +++ b/yt_dlp/extractor/ebaumsworld.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/echomsk.py b/yt_dlp/extractor/echomsk.py index 6b7cc652f..850eabbff 100644 --- a/yt_dlp/extractor/echomsk.py +++ b/yt_dlp/extractor/echomsk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/egghead.py b/yt_dlp/extractor/egghead.py index b6b86768c..d5c954961 100644 --- a/yt_dlp/extractor/egghead.py +++ b/yt_dlp/extractor/egghead.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/ehow.py b/yt_dlp/extractor/ehow.py index b1cd4f5d4..74469ce36 100644 --- a/yt_dlp/extractor/ehow.py +++ b/yt_dlp/extractor/ehow.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/eighttracks.py b/yt_dlp/extractor/eighttracks.py index 9a44f89f3..3dd9ab1b3 100644 --- a/yt_dlp/extractor/eighttracks.py +++ b/yt_dlp/extractor/eighttracks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import random diff --git a/yt_dlp/extractor/einthusan.py b/yt_dlp/extractor/einthusan.py index 7af279a53..37be68c61 100644 --- a/yt_dlp/extractor/einthusan.py +++ b/yt_dlp/extractor/einthusan.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py index ee5ead18b..01a47f6fd 100644 --- a/yt_dlp/extractor/eitb.py +++ b/yt_dlp/extractor/eitb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/ellentube.py b/yt_dlp/extractor/ellentube.py index d451bc048..bcd458cdf 100644 --- a/yt_dlp/extractor/ellentube.py +++ b/yt_dlp/extractor/ellentube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/elonet.py b/yt_dlp/extractor/elonet.py index 9c6aea28e..f99e12250 100644 --- a/yt_dlp/extractor/elonet.py +++ b/yt_dlp/extractor/elonet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import determine_ext diff --git a/yt_dlp/extractor/elpais.py b/yt_dlp/extractor/elpais.py index b89f6db62..7c6c88075 100644 --- a/yt_dlp/extractor/elpais.py +++ b/yt_dlp/extractor/elpais.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import strip_jsonp, unified_strdate diff --git a/yt_dlp/extractor/embedly.py b/yt_dlp/extractor/embedly.py index a5820b21e..a8d1f3c55 100644 --- a/yt_dlp/extractor/embedly.py +++ b/yt_dlp/extractor/embedly.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/engadget.py b/yt_dlp/extractor/engadget.py index 733bf322f..e7c5d7bf1 100644 --- a/yt_dlp/extractor/engadget.py +++ b/yt_dlp/extractor/engadget.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py index cd19325bc..89424785e 100644 --- a/yt_dlp/extractor/epicon.py +++ b/yt_dlp/extractor/epicon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py index 25a0d9799..6bc70c5c6 100644 --- a/yt_dlp/extractor/eporner.py +++ b/yt_dlp/extractor/eporner.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( encode_base_n, diff --git a/yt_dlp/extractor/eroprofile.py b/yt_dlp/extractor/eroprofile.py index 5d5e7f244..2b61f3be7 100644 --- a/yt_dlp/extractor/eroprofile.py +++ b/yt_dlp/extractor/eroprofile.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 19ce23f01..507f0a5c1 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/escapist.py b/yt_dlp/extractor/escapist.py index 4cd815ebc..5d9c46f72 100644 --- a/yt_dlp/extractor/escapist.py +++ b/yt_dlp/extractor/escapist.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index dc50f3b8b..8fad70e6b 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/esri.py b/yt_dlp/extractor/esri.py index e9dcaeb1d..1736788db 100644 --- a/yt_dlp/extractor/esri.py +++ b/yt_dlp/extractor/esri.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index 60ab2ce13..ea20b4d4d 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/europeantour.py b/yt_dlp/extractor/europeantour.py index e28f067be..1995a745d 100644 --- a/yt_dlp/extractor/europeantour.py +++ b/yt_dlp/extractor/europeantour.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 2759e7436..4435f08e0 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/everyonesmixtape.py b/yt_dlp/extractor/everyonesmixtape.py index 80cb032be..d26ff8ad3 100644 --- a/yt_dlp/extractor/everyonesmixtape.py +++ b/yt_dlp/extractor/everyonesmixtape.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/expotv.py b/yt_dlp/extractor/expotv.py index 95a897782..92eaf4248 100644 --- a/yt_dlp/extractor/expotv.py +++ b/yt_dlp/extractor/expotv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py index dc8b855d2..a1b8e9bc9 100644 --- a/yt_dlp/extractor/expressen.py +++ b/yt_dlp/extractor/expressen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0cb686304..cd3934a70 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1,6 +1,4 @@ # flake8: noqa -from __future__ import unicode_literals - from .abc import ( ABCIE, ABCIViewIE, diff --git a/yt_dlp/extractor/extremetube.py b/yt_dlp/extractor/extremetube.py index acd4090fa..99520b6a0 100644 --- a/yt_dlp/extractor/extremetube.py +++ b/yt_dlp/extractor/extremetube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from ..utils import str_to_int from .keezmovies import KeezMoviesIE diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py index f62ddebae..d8b068e9c 100644 --- a/yt_dlp/extractor/eyedotv.py +++ b/yt_dlp/extractor/eyedotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( xpath_text, diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 5e0e2facf..2e69dce0f 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index 7ea16c61d..9716e581a 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py index 312ee2aee..cc12fda2b 100644 --- a/yt_dlp/extractor/faz.py +++ b/yt_dlp/extractor/faz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index 54a83aa16..a4c9793bb 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py index 8db7c5963..df40888e1 100644 --- a/yt_dlp/extractor/fczenit.py +++ b/yt_dlp/extractor/fczenit.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/filmmodu.py b/yt_dlp/extractor/filmmodu.py index 2746876d5..d74131192 100644 --- a/yt_dlp/extractor/filmmodu.py +++ b/yt_dlp/extractor/filmmodu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py index 7b43ecc0f..7040231be 100644 --- a/yt_dlp/extractor/filmon.py +++ b/yt_dlp/extractor/filmon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/filmweb.py b/yt_dlp/extractor/filmweb.py index 5e323b4f8..cfea1f2fb 100644 --- a/yt_dlp/extractor/filmweb.py +++ b/yt_dlp/extractor/filmweb.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/firsttv.py b/yt_dlp/extractor/firsttv.py index ccad173b7..99c27e0c3 100644 --- a/yt_dlp/extractor/firsttv.py +++ b/yt_dlp/extractor/firsttv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/fivetv.py b/yt_dlp/extractor/fivetv.py index d6bebd19b..448c332b3 100644 --- a/yt_dlp/extractor/fivetv.py +++ b/yt_dlp/extractor/fivetv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py index 2ed6c2bdc..552ecd43a 100644 --- a/yt_dlp/extractor/flickr.py +++ b/yt_dlp/extractor/flickr.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/folketinget.py b/yt_dlp/extractor/folketinget.py index b3df93f28..0e69fa32f 100644 --- a/yt_dlp/extractor/folketinget.py +++ b/yt_dlp/extractor/folketinget.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ( diff --git a/yt_dlp/extractor/footyroom.py b/yt_dlp/extractor/footyroom.py index 118325b6d..4a1316b50 100644 --- a/yt_dlp/extractor/footyroom.py +++ b/yt_dlp/extractor/footyroom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .streamable import StreamableIE diff --git a/yt_dlp/extractor/formula1.py b/yt_dlp/extractor/formula1.py index 67662e6de..0a8ef850e 100644 --- a/yt_dlp/extractor/formula1.py +++ b/yt_dlp/extractor/formula1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/fourtube.py b/yt_dlp/extractor/fourtube.py index d4d955b6b..c6af100f3 100644 --- a/yt_dlp/extractor/fourtube.py +++ b/yt_dlp/extractor/fourtube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fox.py b/yt_dlp/extractor/fox.py index 4c52b9ac6..5996e86bb 100644 --- a/yt_dlp/extractor/fox.py +++ b/yt_dlp/extractor/fox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import uuid diff --git a/yt_dlp/extractor/fox9.py b/yt_dlp/extractor/fox9.py index 91f8f7b8a..dfbafa7dd 100644 --- a/yt_dlp/extractor/fox9.py +++ b/yt_dlp/extractor/fox9.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/foxgay.py b/yt_dlp/extractor/foxgay.py index 1c53e0642..4abc2cfd0 100644 --- a/yt_dlp/extractor/foxgay.py +++ b/yt_dlp/extractor/foxgay.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/foxnews.py b/yt_dlp/extractor/foxnews.py index 18fa0a5ef..cee4d6b49 100644 --- a/yt_dlp/extractor/foxnews.py +++ b/yt_dlp/extractor/foxnews.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .amp import AMPIE diff --git a/yt_dlp/extractor/foxsports.py b/yt_dlp/extractor/foxsports.py index 2b2cb6c6f..f9d7fe52a 100644 --- a/yt_dlp/extractor/foxsports.py +++ b/yt_dlp/extractor/foxsports.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py index c23fe6c53..1872d8a1c 100644 --- a/yt_dlp/extractor/fptplay.py +++ b/yt_dlp/extractor/fptplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import time import urllib.parse diff --git a/yt_dlp/extractor/franceculture.py b/yt_dlp/extractor/franceculture.py index 9dc28d801..6bd9912f3 100644 --- a/yt_dlp/extractor/franceculture.py +++ b/yt_dlp/extractor/franceculture.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/franceinter.py b/yt_dlp/extractor/franceinter.py index ae822a50e..779249b84 100644 --- a/yt_dlp/extractor/franceinter.py +++ b/yt_dlp/extractor/franceinter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import month_by_name diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 347a766d8..5902eaca0 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -1,8 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/freesound.py b/yt_dlp/extractor/freesound.py index 138b6bc58..9724dbdf0 100644 --- a/yt_dlp/extractor/freesound.py +++ b/yt_dlp/extractor/freesound.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/freespeech.py b/yt_dlp/extractor/freespeech.py index ea9c3e317..aea551379 100644 --- a/yt_dlp/extractor/freespeech.py +++ b/yt_dlp/extractor/freespeech.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index fc67a8437..e0529b7ba 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py index 4fdfe12ab..15d75a972 100644 --- a/yt_dlp/extractor/fujitv.py +++ b/yt_dlp/extractor/fujitv.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals from ..utils import HEADRequest from .common import InfoExtractor diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 6aa9bc9ce..1e3309605 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import string diff --git a/yt_dlp/extractor/funk.py b/yt_dlp/extractor/funk.py index 2c5cfe864..539d719c5 100644 --- a/yt_dlp/extractor/funk.py +++ b/yt_dlp/extractor/funk.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from .nexx import NexxIE from ..utils import ( diff --git a/yt_dlp/extractor/fusion.py b/yt_dlp/extractor/fusion.py index a3f44b812..46bda49ea 100644 --- a/yt_dlp/extractor/fusion.py +++ b/yt_dlp/extractor/fusion.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/fxnetworks.py b/yt_dlp/extractor/fxnetworks.py index 00e67426b..370b0a597 100644 --- a/yt_dlp/extractor/fxnetworks.py +++ b/yt_dlp/extractor/fxnetworks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .adobepass import AdobePassIE from ..utils import ( extract_attributes, diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index 9ba0b1ca1..7ed81f761 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/gaia.py b/yt_dlp/extractor/gaia.py index 5b0195c63..4ace0544a 100644 --- a/yt_dlp/extractor/gaia.py +++ b/yt_dlp/extractor/gaia.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/gameinformer.py b/yt_dlp/extractor/gameinformer.py index f1b96c172..2664edb81 100644 --- a/yt_dlp/extractor/gameinformer.py +++ b/yt_dlp/extractor/gameinformer.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py index a13e528f5..440b832fc 100644 --- a/yt_dlp/extractor/gamejolt.py +++ b/yt_dlp/extractor/gamejolt.py @@ -1,4 +1,3 @@ -# coding: utf-8 import itertools import json import math diff --git a/yt_dlp/extractor/gamespot.py b/yt_dlp/extractor/gamespot.py index 7a1beae3c..e1d317377 100644 --- a/yt_dlp/extractor/gamespot.py +++ b/yt_dlp/extractor/gamespot.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .once import OnceIE from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/gamestar.py b/yt_dlp/extractor/gamestar.py index e882fa671..e9966f532 100644 --- a/yt_dlp/extractor/gamestar.py +++ b/yt_dlp/extractor/gamestar.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py index 03acd2a73..76ddcc40e 100644 --- a/yt_dlp/extractor/gaskrank.py +++ b/yt_dlp/extractor/gaskrank.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/gazeta.py b/yt_dlp/extractor/gazeta.py index 367187080..c6868a672 100644 --- a/yt_dlp/extractor/gazeta.py +++ b/yt_dlp/extractor/gazeta.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/gdcvault.py b/yt_dlp/extractor/gdcvault.py index c3ad6b4ce..2878bbd88 100644 --- a/yt_dlp/extractor/gdcvault.py +++ b/yt_dlp/extractor/gdcvault.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/gedidigital.py b/yt_dlp/extractor/gedidigital.py index ec386c218..c878daff8 100644 --- a/yt_dlp/extractor/gedidigital.py +++ b/yt_dlp/extractor/gedidigital.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index fd620217e..f44f19a54 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import os import re import xml.etree.ElementTree @@ -2628,7 +2624,7 @@ class GenericIE(InfoExtractor): entries.append({ 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0], - 'title': '%s - %s' % (title, n.tag), + 'title': f'{title} - {n.tag}', 'url': compat_urlparse.urljoin(url, url_n.text), 'duration': float_or_none(n.find('./duration').text), }) @@ -2650,7 +2646,7 @@ class GenericIE(InfoExtractor): for o in range(len(newmagic) - 1, -1, -1): new = '' - l = (o + sum([int(n) for n in license[o:]])) % 32 + l = (o + sum(int(n) for n in license[o:])) % 32 for i in range(0, len(newmagic)): if i == o: @@ -3772,7 +3768,7 @@ class GenericIE(InfoExtractor): else: for num, entry in enumerate(entries, start=1): entry.update({ - 'id': '%s-%s' % (video_id, num), + 'id': f'{video_id}-{num}', 'title': '%s (%d)' % (video_title, num), }) for entry in entries: diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index 327a4d0b8..9bd6200b6 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( bool_or_none, diff --git a/yt_dlp/extractor/gfycat.py b/yt_dlp/extractor/gfycat.py index 2ad03e2b2..7373c574f 100644 --- a/yt_dlp/extractor/gfycat.py +++ b/yt_dlp/extractor/gfycat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/giantbomb.py b/yt_dlp/extractor/giantbomb.py index 1920923fc..5d6b208aa 100644 --- a/yt_dlp/extractor/giantbomb.py +++ b/yt_dlp/extractor/giantbomb.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/giga.py b/yt_dlp/extractor/giga.py index 5a9992a27..9e835a6da 100644 --- a/yt_dlp/extractor/giga.py +++ b/yt_dlp/extractor/giga.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py index 412178492..c5bc86bb4 100644 --- a/yt_dlp/extractor/gigya.py +++ b/yt_dlp/extractor/gigya.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/glide.py b/yt_dlp/extractor/glide.py index 12af859be..2bffb26dc 100644 --- a/yt_dlp/extractor/glide.py +++ b/yt_dlp/extractor/glide.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py index f6aaae1e9..8915ebf48 100644 --- a/yt_dlp/extractor/globo.py +++ b/yt_dlp/extractor/globo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import hashlib import json diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index d9ef4338f..85ffa4c05 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import urllib.parse diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index f92e16600..07d13d1c3 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .adobepass import AdobePassIE diff --git a/yt_dlp/extractor/godtube.py b/yt_dlp/extractor/godtube.py index 96e68b4d2..697540155 100644 --- a/yt_dlp/extractor/godtube.py +++ b/yt_dlp/extractor/godtube.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index 858bac52c..b491b46a5 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/golem.py b/yt_dlp/extractor/golem.py index 47a068e74..8416b5aa4 100644 --- a/yt_dlp/extractor/golem.py +++ b/yt_dlp/extractor/golem.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py index 7b5bf280f..c0905f86a 100644 --- a/yt_dlp/extractor/googledrive.py +++ b/yt_dlp/extractor/googledrive.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/googlepodcasts.py b/yt_dlp/extractor/googlepodcasts.py index 25631e213..8b2351ba8 100644 --- a/yt_dlp/extractor/googlepodcasts.py +++ b/yt_dlp/extractor/googlepodcasts.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/googlesearch.py b/yt_dlp/extractor/googlesearch.py index 4b8b1bcbb..67ca0e5e0 100644 --- a/yt_dlp/extractor/googlesearch.py +++ b/yt_dlp/extractor/googlesearch.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/gopro.py b/yt_dlp/extractor/gopro.py index 10cc1aec1..14d6b2187 100644 --- a/yt_dlp/extractor/gopro.py +++ b/yt_dlp/extractor/gopro.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/goshgay.py b/yt_dlp/extractor/goshgay.py index 377981d3e..9a1f32b7e 100644 --- a/yt_dlp/extractor/goshgay.py +++ b/yt_dlp/extractor/goshgay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py index 6aa96106a..112293bef 100644 --- a/yt_dlp/extractor/gotostage.py +++ b/yt_dlp/extractor/gotostage.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/gputechconf.py b/yt_dlp/extractor/gputechconf.py index 73dc62c49..2d13bf491 100644 --- a/yt_dlp/extractor/gputechconf.py +++ b/yt_dlp/extractor/gputechconf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index c9f1dd256..52bbf3bc7 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/groupon.py b/yt_dlp/extractor/groupon.py index a6da90931..362d3ff83 100644 --- a/yt_dlp/extractor/groupon.py +++ b/yt_dlp/extractor/groupon.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py index 68df748f5..f54628665 100644 --- a/yt_dlp/extractor/hbo.py +++ b/yt_dlp/extractor/hbo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py index a3d6a055f..9aa1325af 100644 --- a/yt_dlp/extractor/hearthisat.py +++ b/yt_dlp/extractor/hearthisat.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/heise.py b/yt_dlp/extractor/heise.py index cbe564a3c..84e5d3023 100644 --- a/yt_dlp/extractor/heise.py +++ b/yt_dlp/extractor/heise.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .kaltura import KalturaIE from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/hellporno.py b/yt_dlp/extractor/hellporno.py index 92d32cdcc..fd0327228 100644 --- a/yt_dlp/extractor/hellporno.py +++ b/yt_dlp/extractor/hellporno.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/helsinki.py b/yt_dlp/extractor/helsinki.py index 575fb332a..b7c826055 100644 --- a/yt_dlp/extractor/helsinki.py +++ b/yt_dlp/extractor/helsinki.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/hentaistigma.py b/yt_dlp/extractor/hentaistigma.py index 86a93de4d..ca5ffc2ae 100644 --- a/yt_dlp/extractor/hentaistigma.py +++ b/yt_dlp/extractor/hentaistigma.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/hgtv.py b/yt_dlp/extractor/hgtv.py index a4f332565..c40017db1 100644 --- a/yt_dlp/extractor/hgtv.py +++ b/yt_dlp/extractor/hgtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 46d7d62ab..a6a71d630 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -1,4 +1,3 @@ -# coding: utf-8 import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/historicfilms.py b/yt_dlp/extractor/historicfilms.py index 56343e98f..c428feede 100644 --- a/yt_dlp/extractor/historicfilms.py +++ b/yt_dlp/extractor/historicfilms.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_duration diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py index 0470d0a99..a7e4424b6 100644 --- a/yt_dlp/extractor/hitbox.py +++ b/yt_dlp/extractor/hitbox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/hitrecord.py b/yt_dlp/extractor/hitrecord.py index fd5dc2935..902af44fa 100644 --- a/yt_dlp/extractor/hitrecord.py +++ b/yt_dlp/extractor/hitrecord.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py index 1f3502b90..4c616d1dd 100644 --- a/yt_dlp/extractor/hketv.py +++ b/yt_dlp/extractor/hketv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/hotnewhiphop.py b/yt_dlp/extractor/hotnewhiphop.py index 4703e1894..f8570cb86 100644 --- a/yt_dlp/extractor/hotnewhiphop.py +++ b/yt_dlp/extractor/hotnewhiphop.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index a0ce1f10a..d82e1aead 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import hmac import re diff --git a/yt_dlp/extractor/howcast.py b/yt_dlp/extractor/howcast.py index 7e36b85ad..59cf80f1a 100644 --- a/yt_dlp/extractor/howcast.py +++ b/yt_dlp/extractor/howcast.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_iso8601 diff --git a/yt_dlp/extractor/howstuffworks.py b/yt_dlp/extractor/howstuffworks.py index cf90ab3c9..c49c0899e 100644 --- a/yt_dlp/extractor/howstuffworks.py +++ b/yt_dlp/extractor/howstuffworks.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( find_xpath_attr, diff --git a/yt_dlp/extractor/hrfensehen.py b/yt_dlp/extractor/hrfensehen.py index e39ded254..6f7ed9b4b 100644 --- a/yt_dlp/extractor/hrfensehen.py +++ b/yt_dlp/extractor/hrfensehen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py index 36d600773..773ae0c9a 100644 --- a/yt_dlp/extractor/hrti.py +++ b/yt_dlp/extractor/hrti.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/hse.py b/yt_dlp/extractor/hse.py index 9144ff8dc..9faf46a5d 100644 --- a/yt_dlp/extractor/hse.py +++ b/yt_dlp/extractor/hse.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/huajiao.py b/yt_dlp/extractor/huajiao.py index 4ca275dda..c498fa330 100644 --- a/yt_dlp/extractor/huajiao.py +++ b/yt_dlp/extractor/huajiao.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/huffpost.py b/yt_dlp/extractor/huffpost.py index 54385bafa..7286dbcd7 100644 --- a/yt_dlp/extractor/huffpost.py +++ b/yt_dlp/extractor/huffpost.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/hungama.py b/yt_dlp/extractor/hungama.py index 821b16e5d..938a24296 100644 --- a/yt_dlp/extractor/hungama.py +++ b/yt_dlp/extractor/hungama.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index 4e96f22fa..9dd5e41b3 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import random diff --git a/yt_dlp/extractor/hypem.py b/yt_dlp/extractor/hypem.py index 9ca28d632..54db7b3eb 100644 --- a/yt_dlp/extractor/hypem.py +++ b/yt_dlp/extractor/hypem.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index cb39f821c..ffff36cc1 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate from ..compat import compat_str diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py index c826eb3ba..bfb1e9d64 100644 --- a/yt_dlp/extractor/ign.py +++ b/yt_dlp/extractor/ign.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/iheart.py b/yt_dlp/extractor/iheart.py index b54c05eeb..2c6a5b6a1 100644 --- a/yt_dlp/extractor/iheart.py +++ b/yt_dlp/extractor/iheart.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/imdb.py b/yt_dlp/extractor/imdb.py index 96cee2e2f..74cab7dc1 100644 --- a/yt_dlp/extractor/imdb.py +++ b/yt_dlp/extractor/imdb.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import base64 import json import re diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py index ce7b21ab2..5b8bfda96 100644 --- a/yt_dlp/extractor/imggaming.py +++ b/yt_dlp/extractor/imggaming.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index dfa473752..a3bb47615 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ina.py b/yt_dlp/extractor/ina.py index b3b2683cb..56038f1ca 100644 --- a/yt_dlp/extractor/ina.py +++ b/yt_dlp/extractor/ina.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/inc.py b/yt_dlp/extractor/inc.py index d5b258a0f..9b3fe9ac1 100644 --- a/yt_dlp/extractor/inc.py +++ b/yt_dlp/extractor/inc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .kaltura import KalturaIE diff --git a/yt_dlp/extractor/indavideo.py b/yt_dlp/extractor/indavideo.py index 4c16243ec..fb041a182 100644 --- a/yt_dlp/extractor/indavideo.py +++ b/yt_dlp/extractor/indavideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 347cc5154..abf7d36ef 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from ..compat import ( compat_b64decode, compat_urllib_parse_unquote, diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 970f2c8ab..05000e2fb 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import itertools import hashlib import json diff --git a/yt_dlp/extractor/internazionale.py b/yt_dlp/extractor/internazionale.py index 45e2af690..c8f70785f 100644 --- a/yt_dlp/extractor/internazionale.py +++ b/yt_dlp/extractor/internazionale.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_timestamp diff --git a/yt_dlp/extractor/internetvideoarchive.py b/yt_dlp/extractor/internetvideoarchive.py index 880918cd7..6a8e30d73 100644 --- a/yt_dlp/extractor/internetvideoarchive.py +++ b/yt_dlp/extractor/internetvideoarchive.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index 1a2038453..5e0b523dc 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 14877d405..b755aab07 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import itertools import re diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py index 64cb4e69a..4ac12603a 100644 --- a/yt_dlp/extractor/itprotv.py +++ b/yt_dlp/extractor/itprotv.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index f1591403f..26d77a469 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 5f8a046e0..699746943 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/ivideon.py b/yt_dlp/extractor/ivideon.py index 44b220846..538a961b7 100644 --- a/yt_dlp/extractor/ivideon.py +++ b/yt_dlp/extractor/ivideon.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_urllib_parse_urlencode, diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index c0e01e352..974b4be7d 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/izlesene.py b/yt_dlp/extractor/izlesene.py index f8fca6c8f..6520ecf6d 100644 --- a/yt_dlp/extractor/izlesene.py +++ b/yt_dlp/extractor/izlesene.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py index b294aee70..6840654cc 100644 --- a/yt_dlp/extractor/jable.py +++ b/yt_dlp/extractor/jable.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index 755d9703b..5dc2c25e6 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import random diff --git a/yt_dlp/extractor/jeuxvideo.py b/yt_dlp/extractor/jeuxvideo.py index 77c0f520c..56ea15cf9 100644 --- a/yt_dlp/extractor/jeuxvideo.py +++ b/yt_dlp/extractor/jeuxvideo.py @@ -1,8 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py index 7350f537c..a01411be1 100644 --- a/yt_dlp/extractor/joj.py +++ b/yt_dlp/extractor/joj.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py index 4b7dfc526..245fe73d4 100644 --- a/yt_dlp/extractor/jove.py +++ b/yt_dlp/extractor/jove.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py index 5aa508bf9..8dbbb2926 100644 --- a/yt_dlp/extractor/jwplatform.py +++ b/yt_dlp/extractor/jwplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 483ab7128..8ad1d9efd 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index f6dfc9caa..f9b9c5c78 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import base64 diff --git a/yt_dlp/extractor/kanalplay.py b/yt_dlp/extractor/kanalplay.py index 5e24f7e21..ef74014c0 100644 --- a/yt_dlp/extractor/kanalplay.py +++ b/yt_dlp/extractor/kanalplay.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/karaoketv.py b/yt_dlp/extractor/karaoketv.py index bfccf89b0..381dc00ad 100644 --- a/yt_dlp/extractor/karaoketv.py +++ b/yt_dlp/extractor/karaoketv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/karrierevideos.py b/yt_dlp/extractor/karrierevideos.py index 7b291e0a0..28d4841aa 100644 --- a/yt_dlp/extractor/karrierevideos.py +++ b/yt_dlp/extractor/karrierevideos.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/keezmovies.py b/yt_dlp/extractor/keezmovies.py index 06dbcbb40..79f9c7fa7 100644 --- a/yt_dlp/extractor/keezmovies.py +++ b/yt_dlp/extractor/keezmovies.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/kelbyone.py b/yt_dlp/extractor/kelbyone.py index 20c26cf48..dea056c12 100644 --- a/yt_dlp/extractor/kelbyone.py +++ b/yt_dlp/extractor/kelbyone.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/ketnet.py b/yt_dlp/extractor/ketnet.py index e0599d02f..ab6276727 100644 --- a/yt_dlp/extractor/ketnet.py +++ b/yt_dlp/extractor/ketnet.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .canvas import CanvasIE from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/khanacademy.py b/yt_dlp/extractor/khanacademy.py index 87e520378..83cfeadba 100644 --- a/yt_dlp/extractor/khanacademy.py +++ b/yt_dlp/extractor/khanacademy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/kickstarter.py b/yt_dlp/extractor/kickstarter.py index d4da8f484..c0d851d96 100644 --- a/yt_dlp/extractor/kickstarter.py +++ b/yt_dlp/extractor/kickstarter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import smuggle_url diff --git a/yt_dlp/extractor/kinja.py b/yt_dlp/extractor/kinja.py index 1be8b4809..c00abfbc1 100644 --- a/yt_dlp/extractor/kinja.py +++ b/yt_dlp/extractor/kinja.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/kinopoisk.py b/yt_dlp/extractor/kinopoisk.py index cdbb642e2..84a2489a3 100644 --- a/yt_dlp/extractor/kinopoisk.py +++ b/yt_dlp/extractor/kinopoisk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/konserthusetplay.py b/yt_dlp/extractor/konserthusetplay.py index dd42bb2f2..1e177c363 100644 --- a/yt_dlp/extractor/konserthusetplay.py +++ b/yt_dlp/extractor/konserthusetplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py index 088db1cb0..892d355ba 100644 --- a/yt_dlp/extractor/koo.py +++ b/yt_dlp/extractor/koo.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/krasview.py b/yt_dlp/extractor/krasview.py index d27d052ff..4323aa429 100644 --- a/yt_dlp/extractor/krasview.py +++ b/yt_dlp/extractor/krasview.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/ku6.py b/yt_dlp/extractor/ku6.py index a574408e5..31b4ea0c6 100644 --- a/yt_dlp/extractor/ku6.py +++ b/yt_dlp/extractor/ku6.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/kusi.py b/yt_dlp/extractor/kusi.py index 707fe1821..f1221ef1b 100644 --- a/yt_dlp/extractor/kusi.py +++ b/yt_dlp/extractor/kusi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py index 460a4252f..0c9518e66 100644 --- a/yt_dlp/extractor/kuwo.py +++ b/yt_dlp/extractor/kuwo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py index de985e450..5d52decdb 100644 --- a/yt_dlp/extractor/la7.py +++ b/yt_dlp/extractor/la7.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/laola1tv.py b/yt_dlp/extractor/laola1tv.py index b5d27c2f0..4014a9256 100644 --- a/yt_dlp/extractor/laola1tv.py +++ b/yt_dlp/extractor/laola1tv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py index 5215717e8..7ba666d06 100644 --- a/yt_dlp/extractor/lastfm.py +++ b/yt_dlp/extractor/lastfm.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index 5d5457c53..953ce2e18 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py index 920872f5c..81cf88b6c 100644 --- a/yt_dlp/extractor/lci.py +++ b/yt_dlp/extractor/lci.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py index ade27a99e..87543d56f 100644 --- a/yt_dlp/extractor/lcp.py +++ b/yt_dlp/extractor/lcp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .arkena import ArkenaIE diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py index 81b5d41be..bee4e7587 100644 --- a/yt_dlp/extractor/lecture2go.py +++ b/yt_dlp/extractor/lecture2go.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 0ee1eeb4d..c3d0cb193 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py index d5e11423c..258e396cb 100644 --- a/yt_dlp/extractor/leeco.py +++ b/yt_dlp/extractor/leeco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import hashlib import re diff --git a/yt_dlp/extractor/lego.py b/yt_dlp/extractor/lego.py index 901f43bcf..7d0238a1f 100644 --- a/yt_dlp/extractor/lego.py +++ b/yt_dlp/extractor/lego.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import uuid from .common import InfoExtractor diff --git a/yt_dlp/extractor/lemonde.py b/yt_dlp/extractor/lemonde.py index 3306892e8..c916791af 100644 --- a/yt_dlp/extractor/lemonde.py +++ b/yt_dlp/extractor/lemonde.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/lenta.py b/yt_dlp/extractor/lenta.py index 2ebd4e577..10aac984e 100644 --- a/yt_dlp/extractor/lenta.py +++ b/yt_dlp/extractor/lenta.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py index 03f205144..afe3c98a1 100644 --- a/yt_dlp/extractor/libraryofcongress.py +++ b/yt_dlp/extractor/libraryofcongress.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/libsyn.py b/yt_dlp/extractor/libsyn.py index d1fcda4ef..8245a3481 100644 --- a/yt_dlp/extractor/libsyn.py +++ b/yt_dlp/extractor/libsyn.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py index 49a0a5989..8c7d2064d 100644 --- a/yt_dlp/extractor/lifenews.py +++ b/yt_dlp/extractor/lifenews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index b20681ad1..25667fc07 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py index 987c43430..63b6c002a 100644 --- a/yt_dlp/extractor/line.py +++ b/yt_dlp/extractor/line.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 0f57bfa06..27f1080b4 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from itertools import zip_longest import re diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py index 6aff88e13..bf22855a9 100644 --- a/yt_dlp/extractor/linuxacademy.py +++ b/yt_dlp/extractor/linuxacademy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import random diff --git a/yt_dlp/extractor/litv.py b/yt_dlp/extractor/litv.py index 16b475a44..31826ac99 100644 --- a/yt_dlp/extractor/litv.py +++ b/yt_dlp/extractor/litv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/livejournal.py b/yt_dlp/extractor/livejournal.py index 3a9f4553f..96bd8b233 100644 --- a/yt_dlp/extractor/livejournal.py +++ b/yt_dlp/extractor/livejournal.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import int_or_none diff --git a/yt_dlp/extractor/livestream.py b/yt_dlp/extractor/livestream.py index 45bf26d26..4b90c22c5 100644 --- a/yt_dlp/extractor/livestream.py +++ b/yt_dlp/extractor/livestream.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import itertools diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py index bd2dffac0..3bb52777f 100644 --- a/yt_dlp/extractor/lnkgo.py +++ b/yt_dlp/extractor/lnkgo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/localnews8.py b/yt_dlp/extractor/localnews8.py index c3e9d10fa..6f3f02c70 100644 --- a/yt_dlp/extractor/localnews8.py +++ b/yt_dlp/extractor/localnews8.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/lovehomeporn.py b/yt_dlp/extractor/lovehomeporn.py index ca4b5f375..ba5a13acd 100644 --- a/yt_dlp/extractor/lovehomeporn.py +++ b/yt_dlp/extractor/lovehomeporn.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .nuevo import NuevoBaseIE diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index 4024aef73..53076b839 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py index ce304743f..1ae7f9d4f 100644 --- a/yt_dlp/extractor/lynda.py +++ b/yt_dlp/extractor/lynda.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/m6.py b/yt_dlp/extractor/m6.py index 9806875e8..9dcc60164 100644 --- a/yt_dlp/extractor/m6.py +++ b/yt_dlp/extractor/m6.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/magentamusik360.py b/yt_dlp/extractor/magentamusik360.py index 5c274902f..5d0cb3bfb 100644 --- a/yt_dlp/extractor/magentamusik360.py +++ b/yt_dlp/extractor/magentamusik360.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py index 5d9f80bb3..5f30d0eaa 100644 --- a/yt_dlp/extractor/mailru.py +++ b/yt_dlp/extractor/mailru.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json import re diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py index 0f349a7a3..c144c7592 100644 --- a/yt_dlp/extractor/mainstreaming.py +++ b/yt_dlp/extractor/mainstreaming.py @@ -1,4 +1,3 @@ -# coding: utf-8 import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/malltv.py b/yt_dlp/extractor/malltv.py index fadfd9338..bfd6008b3 100644 --- a/yt_dlp/extractor/malltv.py +++ b/yt_dlp/extractor/malltv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/mangomolo.py b/yt_dlp/extractor/mangomolo.py index 68ce138b3..a392e9b54 100644 --- a/yt_dlp/extractor/mangomolo.py +++ b/yt_dlp/extractor/mangomolo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py index d12aa5f60..dc8653f5d 100644 --- a/yt_dlp/extractor/manoto.py +++ b/yt_dlp/extractor/manoto.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index bd24f8853..1f537d267 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/maoritv.py b/yt_dlp/extractor/maoritv.py index 0d23fec75..67780eafc 100644 --- a/yt_dlp/extractor/maoritv.py +++ b/yt_dlp/extractor/maoritv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/markiza.py b/yt_dlp/extractor/markiza.py index def960a0c..53ed79158 100644 --- a/yt_dlp/extractor/markiza.py +++ b/yt_dlp/extractor/markiza.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/massengeschmacktv.py b/yt_dlp/extractor/massengeschmacktv.py index b381d31b4..4508e4391 100644 --- a/yt_dlp/extractor/massengeschmacktv.py +++ b/yt_dlp/extractor/massengeschmacktv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py index e003b8d25..94ae20b26 100644 --- a/yt_dlp/extractor/matchtv.py +++ b/yt_dlp/extractor/matchtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/mdr.py b/yt_dlp/extractor/mdr.py index 3ca174c2b..b44cf809a 100644 --- a/yt_dlp/extractor/mdr.py +++ b/yt_dlp/extractor/mdr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 59cc30736..527b50cb0 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mediaite.py b/yt_dlp/extractor/mediaite.py index b670f0d61..0f9079b11 100644 --- a/yt_dlp/extractor/mediaite.py +++ b/yt_dlp/extractor/mediaite.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index 18ff3befa..f9a449377 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from ..utils import ( unified_strdate ) diff --git a/yt_dlp/extractor/medialaan.py b/yt_dlp/extractor/medialaan.py index 788acf7fb..297f8c4b2 100644 --- a/yt_dlp/extractor/medialaan.py +++ b/yt_dlp/extractor/medialaan.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index d6b456c5d..60c454dda 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index fbf9223b2..30464bad0 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/medici.py b/yt_dlp/extractor/medici.py index cd910238e..328ccd2c9 100644 --- a/yt_dlp/extractor/medici.py +++ b/yt_dlp/extractor/medici.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( unified_strdate, diff --git a/yt_dlp/extractor/megaphone.py b/yt_dlp/extractor/megaphone.py index 5bafa6cf4..0c150ef45 100644 --- a/yt_dlp/extractor/megaphone.py +++ b/yt_dlp/extractor/megaphone.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/megatvcom.py b/yt_dlp/extractor/megatvcom.py index 0d6793acd..ec481d016 100644 --- a/yt_dlp/extractor/megatvcom.py +++ b/yt_dlp/extractor/megatvcom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/meipai.py b/yt_dlp/extractor/meipai.py index 2445b8b39..95b6dfe52 100644 --- a/yt_dlp/extractor/meipai.py +++ b/yt_dlp/extractor/meipai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/melonvod.py b/yt_dlp/extractor/melonvod.py index bd8cf13ab..0cbc961c4 100644 --- a/yt_dlp/extractor/melonvod.py +++ b/yt_dlp/extractor/melonvod.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/meta.py b/yt_dlp/extractor/meta.py index cdb46e163..7c11e6017 100644 --- a/yt_dlp/extractor/meta.py +++ b/yt_dlp/extractor/meta.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .pladform import PladformIE from ..utils import ( diff --git a/yt_dlp/extractor/metacafe.py b/yt_dlp/extractor/metacafe.py index 7b2d4a003..31fec86d2 100644 --- a/yt_dlp/extractor/metacafe.py +++ b/yt_dlp/extractor/metacafe.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/metacritic.py b/yt_dlp/extractor/metacritic.py index 1424288e7..543bdffad 100644 --- a/yt_dlp/extractor/metacritic.py +++ b/yt_dlp/extractor/metacritic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mgoon.py b/yt_dlp/extractor/mgoon.py index 184c311be..c41c51384 100644 --- a/yt_dlp/extractor/mgoon.py +++ b/yt_dlp/extractor/mgoon.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py index 4ac70ea57..96f3fb982 100644 --- a/yt_dlp/extractor/mgtv.py +++ b/yt_dlp/extractor/mgtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import time import uuid diff --git a/yt_dlp/extractor/miaopai.py b/yt_dlp/extractor/miaopai.py index cf0610bdf..329ce3658 100644 --- a/yt_dlp/extractor/miaopai.py +++ b/yt_dlp/extractor/miaopai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py index 4d5a9df1f..2dde82a75 100644 --- a/yt_dlp/extractor/microsoftstream.py +++ b/yt_dlp/extractor/microsoftstream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from base64 import b64decode from .common import InfoExtractor diff --git a/yt_dlp/extractor/microsoftvirtualacademy.py b/yt_dlp/extractor/microsoftvirtualacademy.py index 9255a7964..f15f00ee5 100644 --- a/yt_dlp/extractor/microsoftvirtualacademy.py +++ b/yt_dlp/extractor/microsoftvirtualacademy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index 4de8e9ef4..c7a61dfa0 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json import uuid diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py index 9da07207b..393d20604 100644 --- a/yt_dlp/extractor/minds.py +++ b/yt_dlp/extractor/minds.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/ministrygrid.py b/yt_dlp/extractor/ministrygrid.py index 8ad9239c5..053c6726c 100644 --- a/yt_dlp/extractor/ministrygrid.py +++ b/yt_dlp/extractor/ministrygrid.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/minoto.py b/yt_dlp/extractor/minoto.py index 603ce940b..e799cd3bc 100644 --- a/yt_dlp/extractor/minoto.py +++ b/yt_dlp/extractor/minoto.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/miomio.py b/yt_dlp/extractor/miomio.py index 40f72d66f..a0a041ea5 100644 --- a/yt_dlp/extractor/miomio.py +++ b/yt_dlp/extractor/miomio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py index 2111de615..8192f2b46 100644 --- a/yt_dlp/extractor/mirrativ.py +++ b/yt_dlp/extractor/mirrativ.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py index 60e456978..38cc0c274 100644 --- a/yt_dlp/extractor/mit.py +++ b/yt_dlp/extractor/mit.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index b5937233b..12b2b2432 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .telecinco import TelecincoIE from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index 31f450dfa..3f430a717 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py index b19e59b1a..796f268f4 100644 --- a/yt_dlp/extractor/mixcloud.py +++ b/yt_dlp/extractor/mixcloud.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index b69301d97..5fb97083a 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py index 1d6d4b804..9383f1358 100644 --- a/yt_dlp/extractor/mlssoccer.py +++ b/yt_dlp/extractor/mlssoccer.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/mnet.py b/yt_dlp/extractor/mnet.py index 0e26ca1b3..65e3d476a 100644 --- a/yt_dlp/extractor/mnet.py +++ b/yt_dlp/extractor/mnet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/moevideo.py b/yt_dlp/extractor/moevideo.py index a3f1b3866..fda08cae9 100644 --- a/yt_dlp/extractor/moevideo.py +++ b/yt_dlp/extractor/moevideo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/mofosex.py b/yt_dlp/extractor/mofosex.py index 5234cac02..66a098c97 100644 --- a/yt_dlp/extractor/mofosex.py +++ b/yt_dlp/extractor/mofosex.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mojvideo.py b/yt_dlp/extractor/mojvideo.py index 16d94052b..d47ad0742 100644 --- a/yt_dlp/extractor/mojvideo.py +++ b/yt_dlp/extractor/mojvideo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/morningstar.py b/yt_dlp/extractor/morningstar.py index 71a22a614..e9fcfe3e2 100644 --- a/yt_dlp/extractor/morningstar.py +++ b/yt_dlp/extractor/morningstar.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py index 111c7c544..9e53a8a97 100644 --- a/yt_dlp/extractor/motherless.py +++ b/yt_dlp/extractor/motherless.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import datetime import re diff --git a/yt_dlp/extractor/motorsport.py b/yt_dlp/extractor/motorsport.py index c9d1ab64d..b292aeb9a 100644 --- a/yt_dlp/extractor/motorsport.py +++ b/yt_dlp/extractor/motorsport.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urlparse, diff --git a/yt_dlp/extractor/movieclips.py b/yt_dlp/extractor/movieclips.py index 5453da1ac..4777f440e 100644 --- a/yt_dlp/extractor/movieclips.py +++ b/yt_dlp/extractor/movieclips.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( smuggle_url, diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py index 4605d3481..ca541567a 100644 --- a/yt_dlp/extractor/moviepilot.py +++ b/yt_dlp/extractor/moviepilot.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .dailymotion import DailymotionIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/moviezine.py b/yt_dlp/extractor/moviezine.py index 730da4bd7..5757322d6 100644 --- a/yt_dlp/extractor/moviezine.py +++ b/yt_dlp/extractor/moviezine.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py index 4f62d628a..cdd8ba4dc 100644 --- a/yt_dlp/extractor/movingimage.py +++ b/yt_dlp/extractor/movingimage.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( unescapeHTML, diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py index f34e2102c..6f4935e51 100644 --- a/yt_dlp/extractor/msn.py +++ b/yt_dlp/extractor/msn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index cff314e27..3ef851e0b 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/muenchentv.py b/yt_dlp/extractor/muenchentv.py index a53929e1b..b9681d1bd 100644 --- a/yt_dlp/extractor/muenchentv.py +++ b/yt_dlp/extractor/muenchentv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/murrtube.py b/yt_dlp/extractor/murrtube.py index 1eb5de660..508d51247 100644 --- a/yt_dlp/extractor/murrtube.py +++ b/yt_dlp/extractor/murrtube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py index 09fadf8d9..289ae5733 100644 --- a/yt_dlp/extractor/musescore.py +++ b/yt_dlp/extractor/musescore.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/musicdex.py b/yt_dlp/extractor/musicdex.py index 05f722091..4d8e74f6b 100644 --- a/yt_dlp/extractor/musicdex.py +++ b/yt_dlp/extractor/musicdex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( date_from_str, diff --git a/yt_dlp/extractor/mwave.py b/yt_dlp/extractor/mwave.py index a67276596..0cbb16736 100644 --- a/yt_dlp/extractor/mwave.py +++ b/yt_dlp/extractor/mwave.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py index 3c2afd838..cdc340a80 100644 --- a/yt_dlp/extractor/mxplayer.py +++ b/yt_dlp/extractor/mxplayer.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import try_get diff --git a/yt_dlp/extractor/mychannels.py b/yt_dlp/extractor/mychannels.py index d820d4eb8..8a70c1f7b 100644 --- a/yt_dlp/extractor/mychannels.py +++ b/yt_dlp/extractor/mychannels.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/myspace.py b/yt_dlp/extractor/myspace.py index 4227d4248..63d36c30a 100644 --- a/yt_dlp/extractor/myspace.py +++ b/yt_dlp/extractor/myspace.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/myspass.py b/yt_dlp/extractor/myspass.py index 1775d5f0b..28ac982d6 100644 --- a/yt_dlp/extractor/myspass.py +++ b/yt_dlp/extractor/myspass.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/myvi.py b/yt_dlp/extractor/myvi.py index 75d286365..b31cf4493 100644 --- a/yt_dlp/extractor/myvi.py +++ b/yt_dlp/extractor/myvi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/myvideoge.py b/yt_dlp/extractor/myvideoge.py index 0a1d7d0cb..513d4cb77 100644 --- a/yt_dlp/extractor/myvideoge.py +++ b/yt_dlp/extractor/myvideoge.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/myvidster.py b/yt_dlp/extractor/myvidster.py index 2117d302d..c91f294bf 100644 --- a/yt_dlp/extractor/myvidster.py +++ b/yt_dlp/extractor/myvidster.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py index fdb7f32db..cc0ff533e 100644 --- a/yt_dlp/extractor/n1.py +++ b/yt_dlp/extractor/n1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nate.py b/yt_dlp/extractor/nate.py index 072faf6ea..c83b2acbd 100644 --- a/yt_dlp/extractor/nate.py +++ b/yt_dlp/extractor/nate.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/nationalgeographic.py b/yt_dlp/extractor/nationalgeographic.py index ee12e2b47..f22317d56 100644 --- a/yt_dlp/extractor/nationalgeographic.py +++ b/yt_dlp/extractor/nationalgeographic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .fox import FOXIE from ..utils import ( diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index a6821ba86..a230d9cdd 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index 359cc52b7..e95c1b795 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 109403440..8aab80a0f 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import base64 import json import re diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index 1917254b8..de0142ccf 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/ndtv.py b/yt_dlp/extractor/ndtv.py index bc3eb9160..fbb033169 100644 --- a/yt_dlp/extractor/ndtv.py +++ b/yt_dlp/extractor/ndtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote_plus diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 77f253519..eccf740aa 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json import time diff --git a/yt_dlp/extractor/nerdcubed.py b/yt_dlp/extractor/nerdcubed.py index 9feccc672..7c801b5d3 100644 --- a/yt_dlp/extractor/nerdcubed.py +++ b/yt_dlp/extractor/nerdcubed.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime from .common import InfoExtractor diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 57b4774b6..4def7e76b 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from hashlib import md5 from base64 import b64encode from datetime import datetime diff --git a/yt_dlp/extractor/netzkino.py b/yt_dlp/extractor/netzkino.py index 4ad0d8e96..49b29b67c 100644 --- a/yt_dlp/extractor/netzkino.py +++ b/yt_dlp/extractor/netzkino.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 6525a6d8a..ba24720e3 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/newstube.py b/yt_dlp/extractor/newstube.py index 479141ae0..20db46057 100644 --- a/yt_dlp/extractor/newstube.py +++ b/yt_dlp/extractor/newstube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import hashlib diff --git a/yt_dlp/extractor/newsy.py b/yt_dlp/extractor/newsy.py index cf3164100..9fde6c079 100644 --- a/yt_dlp/extractor/newsy.py +++ b/yt_dlp/extractor/newsy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( js_to_json, diff --git a/yt_dlp/extractor/nextmedia.py b/yt_dlp/extractor/nextmedia.py index 7bd1290bf..1f83089fc 100644 --- a/yt_dlp/extractor/nextmedia.py +++ b/yt_dlp/extractor/nextmedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index a521bb6e4..01376be3d 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import random import re diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py index a12e503de..79c6aaf0c 100644 --- a/yt_dlp/extractor/nfb.py +++ b/yt_dlp/extractor/nfb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py index 802f6caf0..e6f98b036 100644 --- a/yt_dlp/extractor/nfhsnetwork.py +++ b/yt_dlp/extractor/nfhsnetwork.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py index 821276a31..e5810b346 100644 --- a/yt_dlp/extractor/nfl.py +++ b/yt_dlp/extractor/nfl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 3b8efc3e6..cf2ec7b79 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py index d3a5e17e9..884f9e2ae 100644 --- a/yt_dlp/extractor/nhl.py +++ b/yt_dlp/extractor/nhl.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/nick.py b/yt_dlp/extractor/nick.py index ba7da7602..2a228d8de 100644 --- a/yt_dlp/extractor/nick.py +++ b/yt_dlp/extractor/nick.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .mtv import MTVServicesInfoExtractor from ..utils import update_url_query diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 4eb6ed070..e60556a4d 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import functools import itertools diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 781842721..462caf466 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/ninegag.py b/yt_dlp/extractor/ninegag.py index 14390823b..00ca95ea2 100644 --- a/yt_dlp/extractor/ninegag.py +++ b/yt_dlp/extractor/ninegag.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index 6043674ba..b970f8ccb 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/nintendo.py b/yt_dlp/extractor/nintendo.py index ff8f70ba6..ed839af25 100644 --- a/yt_dlp/extractor/nintendo.py +++ b/yt_dlp/extractor/nintendo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py index 8bb709cd7..251bf444f 100644 --- a/yt_dlp/extractor/nitter.py +++ b/yt_dlp/extractor/nitter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/njpwworld.py b/yt_dlp/extractor/njpwworld.py index 68c8c8e52..e761cf257 100644 --- a/yt_dlp/extractor/njpwworld.py +++ b/yt_dlp/extractor/njpwworld.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py index 4dfdb09d6..35b64530f 100644 --- a/yt_dlp/extractor/nobelprize.py +++ b/yt_dlp/extractor/nobelprize.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( js_to_json, diff --git a/yt_dlp/extractor/noco.py b/yt_dlp/extractor/noco.py index 28af909d5..583d399cc 100644 --- a/yt_dlp/extractor/noco.py +++ b/yt_dlp/extractor/noco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time import hashlib diff --git a/yt_dlp/extractor/nonktube.py b/yt_dlp/extractor/nonktube.py index ca1424e06..f191be33b 100644 --- a/yt_dlp/extractor/nonktube.py +++ b/yt_dlp/extractor/nonktube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .nuevo import NuevoBaseIE diff --git a/yt_dlp/extractor/noodlemagazine.py b/yt_dlp/extractor/noodlemagazine.py index 2f170bbfe..3e04da67e 100644 --- a/yt_dlp/extractor/noodlemagazine.py +++ b/yt_dlp/extractor/noodlemagazine.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/noovo.py b/yt_dlp/extractor/noovo.py index b40770d07..acbb74c6e 100644 --- a/yt_dlp/extractor/noovo.py +++ b/yt_dlp/extractor/noovo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/normalboots.py b/yt_dlp/extractor/normalboots.py index 61fe571df..07babcd2c 100644 --- a/yt_dlp/extractor/normalboots.py +++ b/yt_dlp/extractor/normalboots.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .jwplatform import JWPlatformIE diff --git a/yt_dlp/extractor/nosvideo.py b/yt_dlp/extractor/nosvideo.py index 53c500c35..b6d3ea40c 100644 --- a/yt_dlp/extractor/nosvideo.py +++ b/yt_dlp/extractor/nosvideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index 00a64f88d..6875d26ba 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/novaplay.py b/yt_dlp/extractor/novaplay.py index bfb2c8751..4f1a84651 100644 --- a/yt_dlp/extractor/novaplay.py +++ b/yt_dlp/extractor/novaplay.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import int_or_none, parse_duration, parse_iso8601 diff --git a/yt_dlp/extractor/nowness.py b/yt_dlp/extractor/nowness.py index b2c715f41..fc9043bce 100644 --- a/yt_dlp/extractor/nowness.py +++ b/yt_dlp/extractor/nowness.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index bdc2efcd7..22cb08e8a 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote, diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py index a8aaef6f1..0b5f32c2e 100644 --- a/yt_dlp/extractor/npo.py +++ b/yt_dlp/extractor/npo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py index 49f062d7a..6d93f154c 100644 --- a/yt_dlp/extractor/npr.py +++ b/yt_dlp/extractor/npr.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 0cf26d598..553c55132 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import random import re diff --git a/yt_dlp/extractor/nrl.py b/yt_dlp/extractor/nrl.py index 0bd5086ae..798d03417 100644 --- a/yt_dlp/extractor/nrl.py +++ b/yt_dlp/extractor/nrl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py index c9af91188..422ec6eb0 100644 --- a/yt_dlp/extractor/ntvcojp.py +++ b/yt_dlp/extractor/ntvcojp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ntvde.py b/yt_dlp/extractor/ntvde.py index 035582ee8..d252ced86 100644 --- a/yt_dlp/extractor/ntvde.py +++ b/yt_dlp/extractor/ntvde.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ntvru.py b/yt_dlp/extractor/ntvru.py index c47d1dfa4..c8df110e8 100644 --- a/yt_dlp/extractor/ntvru.py +++ b/yt_dlp/extractor/ntvru.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py index be1e09d37..ec54041f1 100644 --- a/yt_dlp/extractor/nuevo.py +++ b/yt_dlp/extractor/nuevo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index 84fb97d6a..fafcc8f4b 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py index 99964737d..f388688c4 100644 --- a/yt_dlp/extractor/nytimes.py +++ b/yt_dlp/extractor/nytimes.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hmac import hashlib import base64 diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py index e5601b495..7c9efd922 100644 --- a/yt_dlp/extractor/nzherald.py +++ b/yt_dlp/extractor/nzherald.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor diff --git a/yt_dlp/extractor/nzz.py b/yt_dlp/extractor/nzz.py index 61ee77adb..ac3b73156 100644 --- a/yt_dlp/extractor/nzz.py +++ b/yt_dlp/extractor/nzz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/odatv.py b/yt_dlp/extractor/odatv.py index 314527f98..24ab93942 100644 --- a/yt_dlp/extractor/odatv.py +++ b/yt_dlp/extractor/odatv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 293f1aa60..36a7f5f4e 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/oktoberfesttv.py b/yt_dlp/extractor/oktoberfesttv.py index 276567436..e0ac8563a 100644 --- a/yt_dlp/extractor/oktoberfesttv.py +++ b/yt_dlp/extractor/oktoberfesttv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 784f282c7..85f17a2f4 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/on24.py b/yt_dlp/extractor/on24.py index d4d824430..779becc70 100644 --- a/yt_dlp/extractor/on24.py +++ b/yt_dlp/extractor/on24.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/once.py b/yt_dlp/extractor/once.py index 3e44b7829..460b82d02 100644 --- a/yt_dlp/extractor/once.py +++ b/yt_dlp/extractor/once.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ondemandkorea.py b/yt_dlp/extractor/ondemandkorea.py index e933ea2cc..84687ef47 100644 --- a/yt_dlp/extractor/ondemandkorea.py +++ b/yt_dlp/extractor/ondemandkorea.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py index 826faadd2..41815bef1 100644 --- a/yt_dlp/extractor/onefootball.py +++ b/yt_dlp/extractor/onefootball.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py index 95177a213..ea46d7def 100644 --- a/yt_dlp/extractor/onet.py +++ b/yt_dlp/extractor/onet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/onionstudios.py b/yt_dlp/extractor/onionstudios.py index cf5c39e66..9776b4d97 100644 --- a/yt_dlp/extractor/onionstudios.py +++ b/yt_dlp/extractor/onionstudios.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ooyala.py b/yt_dlp/extractor/ooyala.py index 20cfa0a87..77017f08b 100644 --- a/yt_dlp/extractor/ooyala.py +++ b/yt_dlp/extractor/ooyala.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import base64 import re diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py index cf8d91717..c640224dd 100644 --- a/yt_dlp/extractor/opencast.py +++ b/yt_dlp/extractor/opencast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index c19d04900..41ef2e892 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import os import subprocess @@ -50,7 +47,7 @@ def cookie_jar_to_list(cookie_jar): return [cookie_to_dict(cookie) for cookie in cookie_jar] -class PhantomJSwrapper(object): +class PhantomJSwrapper: """PhantomJS wrapper class This class is experimental. @@ -136,7 +133,7 @@ class PhantomJSwrapper(object): for name in self._TMP_FILE_NAMES: try: os.remove(self._TMP_FILES[name].name) - except (IOError, OSError, KeyError): + except (OSError, KeyError): pass def _save_cookies(self, url): @@ -217,9 +214,9 @@ class PhantomJSwrapper(object): f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) if video_id is None: - self.extractor.to_screen('%s' % (note2,)) + self.extractor.to_screen(f'{note2}') else: - self.extractor.to_screen('%s: %s' % (video_id, note2)) + self.extractor.to_screen(f'{video_id}: {note2}') p = Popen( [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 5eb1cdbad..7546c12fb 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py index 422d0b330..09b121422 100644 --- a/yt_dlp/extractor/ora.py +++ b/yt_dlp/extractor/ora.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..compat import compat_urlparse diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 0628977a0..56309ffcb 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/outsidetv.py b/yt_dlp/extractor/outsidetv.py index c5333b08c..b1fcbd6a7 100644 --- a/yt_dlp/extractor/outsidetv.py +++ b/yt_dlp/extractor/outsidetv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py index 62c52cd6e..51778d8a2 100644 --- a/yt_dlp/extractor/packtpub.py +++ b/yt_dlp/extractor/packtpub.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/palcomp3.py b/yt_dlp/extractor/palcomp3.py index d0a62fb17..4b0801c1a 100644 --- a/yt_dlp/extractor/palcomp3.py +++ b/yt_dlp/extractor/palcomp3.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/pandoratv.py b/yt_dlp/extractor/pandoratv.py index 623005338..3747f31d2 100644 --- a/yt_dlp/extractor/pandoratv.py +++ b/yt_dlp/extractor/pandoratv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py index 94a9319ea..7987d77c6 100644 --- a/yt_dlp/extractor/paramountplus.py +++ b/yt_dlp/extractor/paramountplus.py @@ -1,4 +1,3 @@ -from __future__ import unicode_literals import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/parliamentliveuk.py b/yt_dlp/extractor/parliamentliveuk.py index 974d65482..38cb03164 100644 --- a/yt_dlp/extractor/parliamentliveuk.py +++ b/yt_dlp/extractor/parliamentliveuk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import uuid diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py index c85eaa7dc..f31ae576c 100644 --- a/yt_dlp/extractor/parlview.py +++ b/yt_dlp/extractor/parlview.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 963a0d6fb..cce9843d4 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index e48a2b8e0..4e6674e85 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py index 1d777221c..d552e0966 100644 --- a/yt_dlp/extractor/pearvideo.py +++ b/yt_dlp/extractor/pearvideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index 4bf68559a..f1c4469d6 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 9d6b82178..0d3bc18a8 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py index 002d33a88..821abe496 100644 --- a/yt_dlp/extractor/peertv.py +++ b/yt_dlp/extractor/peertv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py index 7d832253f..8e50ffc7f 100644 --- a/yt_dlp/extractor/peloton.py +++ b/yt_dlp/extractor/peloton.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/people.py b/yt_dlp/extractor/people.py index 6ca95715e..c5143c3ed 100644 --- a/yt_dlp/extractor/people.py +++ b/yt_dlp/extractor/people.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/performgroup.py b/yt_dlp/extractor/performgroup.py index c00d39375..824495f40 100644 --- a/yt_dlp/extractor/performgroup.py +++ b/yt_dlp/extractor/performgroup.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py index 1a292b8ac..fc8591a2c 100644 --- a/yt_dlp/extractor/periscope.py +++ b/yt_dlp/extractor/periscope.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/philharmoniedeparis.py b/yt_dlp/extractor/philharmoniedeparis.py index 9f4899c09..22164caaa 100644 --- a/yt_dlp/extractor/philharmoniedeparis.py +++ b/yt_dlp/extractor/philharmoniedeparis.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/phoenix.py b/yt_dlp/extractor/phoenix.py index e3ea01443..5fa133afe 100644 --- a/yt_dlp/extractor/phoenix.py +++ b/yt_dlp/extractor/phoenix.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/photobucket.py b/yt_dlp/extractor/photobucket.py index 53aebe2d9..71e9a4805 100644 --- a/yt_dlp/extractor/photobucket.py +++ b/yt_dlp/extractor/photobucket.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index ae160623b..d8d9c7801 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py index adf21fda8..54999a832 100644 --- a/yt_dlp/extractor/picarto.py +++ b/yt_dlp/extractor/picarto.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index 84c3de2f0..14a540859 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pinkbike.py b/yt_dlp/extractor/pinkbike.py index 9f3501f77..313b5cce0 100644 --- a/yt_dlp/extractor/pinkbike.py +++ b/yt_dlp/extractor/pinkbike.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pinterest.py b/yt_dlp/extractor/pinterest.py index 80e9cd00e..171f9e4eb 100644 --- a/yt_dlp/extractor/pinterest.py +++ b/yt_dlp/extractor/pinterest.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/pixivsketch.py b/yt_dlp/extractor/pixivsketch.py index f0ad0b24a..bfdb8b24e 100644 --- a/yt_dlp/extractor/pixivsketch.py +++ b/yt_dlp/extractor/pixivsketch.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py index 99ade85ec..301f5c838 100644 --- a/yt_dlp/extractor/pladform.py +++ b/yt_dlp/extractor/pladform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py index 07ac15b54..03b9d6aaa 100644 --- a/yt_dlp/extractor/planetmarathi.py +++ b/yt_dlp/extractor/planetmarathi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py index 17f52e7f4..29d3210ac 100644 --- a/yt_dlp/extractor/platzi.py +++ b/yt_dlp/extractor/platzi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/playfm.py b/yt_dlp/extractor/playfm.py index 4298cbe30..e895ba480 100644 --- a/yt_dlp/extractor/playfm.py +++ b/yt_dlp/extractor/playfm.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py index cad2c3a0f..05dbaf066 100644 --- a/yt_dlp/extractor/playplustv.py +++ b/yt_dlp/extractor/playplustv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/plays.py b/yt_dlp/extractor/plays.py index ddfc6f148..700dfe407 100644 --- a/yt_dlp/extractor/plays.py +++ b/yt_dlp/extractor/plays.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/playstuff.py b/yt_dlp/extractor/playstuff.py index 5a329957f..b424ba187 100644 --- a/yt_dlp/extractor/playstuff.py +++ b/yt_dlp/extractor/playstuff.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py index 30c8a599e..f7e5ddbe7 100644 --- a/yt_dlp/extractor/playtvak.py +++ b/yt_dlp/extractor/playtvak.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urlparse, diff --git a/yt_dlp/extractor/playvid.py b/yt_dlp/extractor/playvid.py index e1c406b6c..5ffefc934 100644 --- a/yt_dlp/extractor/playvid.py +++ b/yt_dlp/extractor/playvid.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/playwire.py b/yt_dlp/extractor/playwire.py index 9c9e597b5..ab7f71493 100644 --- a/yt_dlp/extractor/playwire.py +++ b/yt_dlp/extractor/playwire.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index 2a5e0e488..b50152ad8 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import collections import json import os diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py index 26aff1af5..6e8f46fa3 100644 --- a/yt_dlp/extractor/plutotv.py +++ b/yt_dlp/extractor/plutotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import uuid diff --git a/yt_dlp/extractor/podomatic.py b/yt_dlp/extractor/podomatic.py index 673a3ab94..985bfae9d 100644 --- a/yt_dlp/extractor/podomatic.py +++ b/yt_dlp/extractor/podomatic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py index b411390e2..eef0d02ca 100644 --- a/yt_dlp/extractor/pokemon.py +++ b/yt_dlp/extractor/pokemon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pokergo.py b/yt_dlp/extractor/pokergo.py index c9e2fed12..5c7baadf2 100644 --- a/yt_dlp/extractor/pokergo.py +++ b/yt_dlp/extractor/pokergo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 from .common import InfoExtractor diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py index 1e3f46c07..e44d951e6 100644 --- a/yt_dlp/extractor/polsatgo.py +++ b/yt_dlp/extractor/polsatgo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from uuid import uuid4 import json diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index b2b3eb29c..514753b64 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json import math diff --git a/yt_dlp/extractor/popcorntimes.py b/yt_dlp/extractor/popcorntimes.py index 5f9d0e720..ed741a07b 100644 --- a/yt_dlp/extractor/popcorntimes.py +++ b/yt_dlp/extractor/popcorntimes.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/popcorntv.py b/yt_dlp/extractor/popcorntv.py index 66d2e5094..77984626f 100644 --- a/yt_dlp/extractor/popcorntv.py +++ b/yt_dlp/extractor/popcorntv.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( extract_attributes, diff --git a/yt_dlp/extractor/porn91.py b/yt_dlp/extractor/porn91.py index 20eac647a..af4a0dc9c 100644 --- a/yt_dlp/extractor/porn91.py +++ b/yt_dlp/extractor/porn91.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/porncom.py b/yt_dlp/extractor/porncom.py index 83df22141..2ebd3fa09 100644 --- a/yt_dlp/extractor/porncom.py +++ b/yt_dlp/extractor/porncom.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pornez.py b/yt_dlp/extractor/pornez.py index 713dc0080..df0e44a69 100644 --- a/yt_dlp/extractor/pornez.py +++ b/yt_dlp/extractor/pornez.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py index accf45269..26536bc65 100644 --- a/yt_dlp/extractor/pornflip.py +++ b/yt_dlp/extractor/pornflip.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/pornhd.py b/yt_dlp/extractor/pornhd.py index 9dbd72f1d..06a44ddd1 100644 --- a/yt_dlp/extractor/pornhd.py +++ b/yt_dlp/extractor/pornhd.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 17c8c9100..d296ccacb 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import itertools import math diff --git a/yt_dlp/extractor/pornotube.py b/yt_dlp/extractor/pornotube.py index 1b5b9a320..e0960f4c6 100644 --- a/yt_dlp/extractor/pornotube.py +++ b/yt_dlp/extractor/pornotube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py index 18459fc94..96d2da7c7 100644 --- a/yt_dlp/extractor/pornovoisines.py +++ b/yt_dlp/extractor/pornovoisines.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/pornoxo.py b/yt_dlp/extractor/pornoxo.py index 489dc2b25..5104d8a49 100644 --- a/yt_dlp/extractor/pornoxo.py +++ b/yt_dlp/extractor/pornoxo.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( str_to_int, diff --git a/yt_dlp/extractor/presstv.py b/yt_dlp/extractor/presstv.py index bfb2eb71e..26ce74a59 100644 --- a/yt_dlp/extractor/presstv.py +++ b/yt_dlp/extractor/presstv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import remove_start diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py index 9e9867ba5..e4aa4bd35 100644 --- a/yt_dlp/extractor/projectveritas.py +++ b/yt_dlp/extractor/projectveritas.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py index e89bbfd27..cb5ada1b9 100644 --- a/yt_dlp/extractor/prosiebensat1.py +++ b/yt_dlp/extractor/prosiebensat1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from hashlib import sha1 diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py index 80561b80a..5bb183270 100644 --- a/yt_dlp/extractor/prx.py +++ b/yt_dlp/extractor/prx.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py index ca71665e0..a5dac1dff 100644 --- a/yt_dlp/extractor/puhutv.py +++ b/yt_dlp/extractor/puhutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_HTTPError, diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py index 80091b85f..3c13d1f56 100644 --- a/yt_dlp/extractor/puls4.py +++ b/yt_dlp/extractor/puls4.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .prosiebensat1 import ProSiebenSat1BaseIE from ..utils import ( unified_strdate, diff --git a/yt_dlp/extractor/pyvideo.py b/yt_dlp/extractor/pyvideo.py index 869619723..7b25166b2 100644 --- a/yt_dlp/extractor/pyvideo.py +++ b/yt_dlp/extractor/pyvideo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py index 0106d166f..fa2454df4 100644 --- a/yt_dlp/extractor/qqmusic.py +++ b/yt_dlp/extractor/qqmusic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import time diff --git a/yt_dlp/extractor/r7.py b/yt_dlp/extractor/r7.py index e2202d603..b459efceb 100644 --- a/yt_dlp/extractor/r7.py +++ b/yt_dlp/extractor/r7.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index 1e60de153..a0f5ebdd0 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import base64 import calendar diff --git a/yt_dlp/extractor/radiobremen.py b/yt_dlp/extractor/radiobremen.py index 2c35f9845..99ba050d0 100644 --- a/yt_dlp/extractor/radiobremen.py +++ b/yt_dlp/extractor/radiobremen.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py index 4b4445c30..dd6f899a4 100644 --- a/yt_dlp/extractor/radiocanada.py +++ b/yt_dlp/extractor/radiocanada.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/radiode.py b/yt_dlp/extractor/radiode.py index 038287363..befb0b72b 100644 --- a/yt_dlp/extractor/radiode.py +++ b/yt_dlp/extractor/radiode.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index 082238bbc..8fef54dab 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiojavan.py b/yt_dlp/extractor/radiojavan.py index 3f74f0c01..6a6118899 100644 --- a/yt_dlp/extractor/radiojavan.py +++ b/yt_dlp/extractor/radiojavan.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py index 2e93e034f..8f9737ac3 100644 --- a/yt_dlp/extractor/radiokapital.py +++ b/yt_dlp/extractor/radiokapital.py @@ -1,5 +1,3 @@ -# coding: utf-8 - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py index 2e1ff36c2..67520172e 100644 --- a/yt_dlp/extractor/radiozet.py +++ b/yt_dlp/extractor/radiozet.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 7c72d60c6..31199e32e 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/raywenderlich.py b/yt_dlp/extractor/raywenderlich.py index f04d51f7b..e0e3c3ead 100644 --- a/yt_dlp/extractor/raywenderlich.py +++ b/yt_dlp/extractor/raywenderlich.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rbmaradio.py b/yt_dlp/extractor/rbmaradio.py index 9642fbbe1..86c63dbb7 100644 --- a/yt_dlp/extractor/rbmaradio.py +++ b/yt_dlp/extractor/rbmaradio.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py index ace611bc9..abbc167c0 100644 --- a/yt_dlp/extractor/rcs.py +++ b/yt_dlp/extractor/rcs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py index ac42e58d9..0cfecbc9a 100644 --- a/yt_dlp/extractor/rcti.py +++ b/yt_dlp/extractor/rcti.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import random import time diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py index 0c497856e..9a2e0d985 100644 --- a/yt_dlp/extractor/rds.py +++ b/yt_dlp/extractor/rds.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index 756a3666b..2f0e41c5b 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index 55196b768..e3712a1d6 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -1,4 +1,3 @@ -# coding: utf-8 import functools from .common import InfoExtractor diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 7fee54fee..ab7c505da 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/regiotv.py b/yt_dlp/extractor/regiotv.py index e250a52f0..6114841fb 100644 --- a/yt_dlp/extractor/regiotv.py +++ b/yt_dlp/extractor/regiotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/rentv.py b/yt_dlp/extractor/rentv.py index 7c8909d95..ab47ee552 100644 --- a/yt_dlp/extractor/rentv.py +++ b/yt_dlp/extractor/rentv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/restudy.py b/yt_dlp/extractor/restudy.py index d47fb45ca..cd3c20d7a 100644 --- a/yt_dlp/extractor/restudy.py +++ b/yt_dlp/extractor/restudy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py index 9dc482d21..1428b7cc9 100644 --- a/yt_dlp/extractor/reuters.py +++ b/yt_dlp/extractor/reuters.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index 4cb99c244..06b6c3c2f 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( qualities, diff --git a/yt_dlp/extractor/rice.py b/yt_dlp/extractor/rice.py index cf2bb1b51..9ca47f3d4 100644 --- a/yt_dlp/extractor/rice.py +++ b/yt_dlp/extractor/rice.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py index 8bfce3416..8d29b302b 100644 --- a/yt_dlp/extractor/rmcdecouverte.py +++ b/yt_dlp/extractor/rmcdecouverte.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE from ..compat import ( diff --git a/yt_dlp/extractor/rockstargames.py b/yt_dlp/extractor/rockstargames.py index cd6904bc9..5f1db0f05 100644 --- a/yt_dlp/extractor/rockstargames.py +++ b/yt_dlp/extractor/rockstargames.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 0fd65db4b..d7e8ba620 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -1,4 +1,3 @@ -# coding: utf-8 import itertools from datetime import datetime diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py index a55dd4f8b..011dadfaa 100644 --- a/yt_dlp/extractor/roosterteeth.py +++ b/yt_dlp/extractor/roosterteeth.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/rottentomatoes.py b/yt_dlp/extractor/rottentomatoes.py index 14c8e8236..f133c851b 100644 --- a/yt_dlp/extractor/rottentomatoes.py +++ b/yt_dlp/extractor/rottentomatoes.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py index fccf69401..a8189676f 100644 --- a/yt_dlp/extractor/rozhlas.py +++ b/yt_dlp/extractor/rozhlas.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/rtbf.py b/yt_dlp/extractor/rtbf.py index 4b61fdb17..a300a2482 100644 --- a/yt_dlp/extractor/rtbf.py +++ b/yt_dlp/extractor/rtbf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py index 1fbc72915..93faf1b32 100644 --- a/yt_dlp/extractor/rte.py +++ b/yt_dlp/extractor/rte.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rtl2.py b/yt_dlp/extractor/rtl2.py index e29171474..afa0d33cf 100644 --- a/yt_dlp/extractor/rtl2.py +++ b/yt_dlp/extractor/rtl2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rtlnl.py b/yt_dlp/extractor/rtlnl.py index 9eaa06f25..ed89554ab 100644 --- a/yt_dlp/extractor/rtlnl.py +++ b/yt_dlp/extractor/rtlnl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/rtnews.py b/yt_dlp/extractor/rtnews.py index 68b6044b6..6be9945f7 100644 --- a/yt_dlp/extractor/rtnews.py +++ b/yt_dlp/extractor/rtnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py index c165ade78..5928a207a 100644 --- a/yt_dlp/extractor/rtp.py +++ b/yt_dlp/extractor/rtp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json import re diff --git a/yt_dlp/extractor/rtrfm.py b/yt_dlp/extractor/rtrfm.py index 93d51e8ed..7381d8202 100644 --- a/yt_dlp/extractor/rtrfm.py +++ b/yt_dlp/extractor/rtrfm.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py index 865a73024..e5ba1a26b 100644 --- a/yt_dlp/extractor/rts.py +++ b/yt_dlp/extractor/rts.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .srgssr import SRGSSRIE diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 7a1dc6f32..e5837e8c8 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import io import sys diff --git a/yt_dlp/extractor/rtvnh.py b/yt_dlp/extractor/rtvnh.py index 6a00f7007..58af3dda2 100644 --- a/yt_dlp/extractor/rtvnh.py +++ b/yt_dlp/extractor/rtvnh.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py index 3ea0f1883..fb06efa4b 100644 --- a/yt_dlp/extractor/rtvs.py +++ b/yt_dlp/extractor/rtvs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ruhd.py b/yt_dlp/extractor/ruhd.py index 3c8053a26..abaa3f9ea 100644 --- a/yt_dlp/extractor/ruhd.py +++ b/yt_dlp/extractor/ruhd.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py index a602a9f33..bb113d822 100644 --- a/yt_dlp/extractor/rule34video.py +++ b/yt_dlp/extractor/rule34video.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import re from ..utils import parse_duration diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index a0d5f88d9..50c383d79 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 2f753b41f..ecfcea939 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import itertools diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index 0ea8253fa..adf78ddb0 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index 5a30e3360..f5dadf278 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/ruv.py b/yt_dlp/extractor/ruv.py index d806ed068..12499d6ca 100644 --- a/yt_dlp/extractor/ruv.py +++ b/yt_dlp/extractor/ruv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index 7b4571daa..450a661e9 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/saitosan.py b/yt_dlp/extractor/saitosan.py index 621335ca0..d2f60e92f 100644 --- a/yt_dlp/extractor/saitosan.py +++ b/yt_dlp/extractor/saitosan.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, try_get diff --git a/yt_dlp/extractor/samplefocus.py b/yt_dlp/extractor/samplefocus.py index 806c3c354..e9f5c227b 100644 --- a/yt_dlp/extractor/samplefocus.py +++ b/yt_dlp/extractor/samplefocus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sapo.py b/yt_dlp/extractor/sapo.py index df202a3a4..9a601a01c 100644 --- a/yt_dlp/extractor/sapo.py +++ b/yt_dlp/extractor/sapo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/savefrom.py b/yt_dlp/extractor/savefrom.py index 98efdc2a4..9c9e74b6d 100644 --- a/yt_dlp/extractor/savefrom.py +++ b/yt_dlp/extractor/savefrom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import os.path from .common import InfoExtractor diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py index 4090f6385..711524406 100644 --- a/yt_dlp/extractor/sbs.py +++ b/yt_dlp/extractor/sbs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( smuggle_url, diff --git a/yt_dlp/extractor/screencast.py b/yt_dlp/extractor/screencast.py index 69a0d01f3..e3dbaab69 100644 --- a/yt_dlp/extractor/screencast.py +++ b/yt_dlp/extractor/screencast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/screencastomatic.py b/yt_dlp/extractor/screencastomatic.py index 0afdc1715..f2f281f47 100644 --- a/yt_dlp/extractor/screencastomatic.py +++ b/yt_dlp/extractor/screencastomatic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( get_element_by_class, diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py index 84918b67f..c3cee6e4a 100644 --- a/yt_dlp/extractor/scrippsnetworks.py +++ b/yt_dlp/extractor/scrippsnetworks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import hashlib diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py index 7215cf5d1..d839ffcde 100644 --- a/yt_dlp/extractor/scte.py +++ b/yt_dlp/extractor/scte.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/seeker.py b/yt_dlp/extractor/seeker.py index e5c18c7a5..65eb16a09 100644 --- a/yt_dlp/extractor/seeker.py +++ b/yt_dlp/extractor/seeker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py index b295184a1..bced14328 100644 --- a/yt_dlp/extractor/senategov.py +++ b/yt_dlp/extractor/senategov.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py index 858547b54..cf4b93d45 100644 --- a/yt_dlp/extractor/sendtonews.py +++ b/yt_dlp/extractor/sendtonews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/servus.py b/yt_dlp/extractor/servus.py index 1610ddc2c..ac030ea41 100644 --- a/yt_dlp/extractor/servus.py +++ b/yt_dlp/extractor/servus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py index 9867961f0..8e95bc230 100644 --- a/yt_dlp/extractor/sevenplus.py +++ b/yt_dlp/extractor/sevenplus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/sexu.py b/yt_dlp/extractor/sexu.py index 3df51520b..000f7e166 100644 --- a/yt_dlp/extractor/sexu.py +++ b/yt_dlp/extractor/sexu.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py index eef4975cb..891bfcfee 100644 --- a/yt_dlp/extractor/seznamzpravy.py +++ b/yt_dlp/extractor/seznamzpravy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index ab45d9ce4..53ca86b73 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import math import re diff --git a/yt_dlp/extractor/shared.py b/yt_dlp/extractor/shared.py index 93ab2a167..5bc097b0d 100644 --- a/yt_dlp/extractor/shared.py +++ b/yt_dlp/extractor/shared.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index 45c12915a..c0780abe2 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..aes import aes_cbc_decrypt, unpad_pkcs7 from ..compat import ( diff --git a/yt_dlp/extractor/showroomlive.py b/yt_dlp/extractor/showroomlive.py index 1aada69ac..cd681a035 100644 --- a/yt_dlp/extractor/showroomlive.py +++ b/yt_dlp/extractor/showroomlive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/simplecast.py b/yt_dlp/extractor/simplecast.py index 857e9414f..ecbb6123b 100644 --- a/yt_dlp/extractor/simplecast.py +++ b/yt_dlp/extractor/simplecast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sina.py b/yt_dlp/extractor/sina.py index b62b0c3e5..d30d57d85 100644 --- a/yt_dlp/extractor/sina.py +++ b/yt_dlp/extractor/sina.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( HEADRequest, diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py index fd747f59b..b7b7d7d7f 100644 --- a/yt_dlp/extractor/sixplay.py +++ b/yt_dlp/extractor/sixplay.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py index 81aecb311..e02f8cef0 100644 --- a/yt_dlp/extractor/skeb.py +++ b/yt_dlp/extractor/skeb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj diff --git a/yt_dlp/extractor/sky.py b/yt_dlp/extractor/sky.py index ad1e62d88..0a8b6cc76 100644 --- a/yt_dlp/extractor/sky.py +++ b/yt_dlp/extractor/sky.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index ddb43c075..438fb60e3 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/skylinewebcams.py b/yt_dlp/extractor/skylinewebcams.py index 47bbb7632..4292bb2ae 100644 --- a/yt_dlp/extractor/skylinewebcams.py +++ b/yt_dlp/extractor/skylinewebcams.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py index fffc9aa22..6264b04bb 100644 --- a/yt_dlp/extractor/skynewsarabia.py +++ b/yt_dlp/extractor/skynewsarabia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py index 8e079ee31..43a9c82cf 100644 --- a/yt_dlp/extractor/skynewsau.py +++ b/yt_dlp/extractor/skynewsau.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/slideshare.py b/yt_dlp/extractor/slideshare.py index 9b3ad0ad4..ab9dad0ec 100644 --- a/yt_dlp/extractor/slideshare.py +++ b/yt_dlp/extractor/slideshare.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index df6084647..72ca56057 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( bool_or_none, diff --git a/yt_dlp/extractor/slutload.py b/yt_dlp/extractor/slutload.py index 661f9e59d..8e6e89c9a 100644 --- a/yt_dlp/extractor/slutload.py +++ b/yt_dlp/extractor/slutload.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/snotr.py b/yt_dlp/extractor/snotr.py index 0bb548255..6889f1929 100644 --- a/yt_dlp/extractor/snotr.py +++ b/yt_dlp/extractor/snotr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py index 3bff5c595..c3a135955 100644 --- a/yt_dlp/extractor/sohu.py +++ b/yt_dlp/extractor/sohu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py index 5b6849fc9..771f890cc 100644 --- a/yt_dlp/extractor/sonyliv.py +++ b/yt_dlp/extractor/sonyliv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import math import random diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 749e6dda3..6dfa50c60 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re import json diff --git a/yt_dlp/extractor/soundgasm.py b/yt_dlp/extractor/soundgasm.py index d608eb7a7..9e59c7c0e 100644 --- a/yt_dlp/extractor/soundgasm.py +++ b/yt_dlp/extractor/soundgasm.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 942a52dcf..855f1d6d3 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 4bc2263f0..fc5a492a6 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index dd849ae13..1aa8eaba1 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/spankwire.py b/yt_dlp/extractor/spankwire.py index e97c1d23e..603f17e9d 100644 --- a/yt_dlp/extractor/spankwire.py +++ b/yt_dlp/extractor/spankwire.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/spiegel.py b/yt_dlp/extractor/spiegel.py index 58f2ed353..3701e295a 100644 --- a/yt_dlp/extractor/spiegel.py +++ b/yt_dlp/extractor/spiegel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .jwplatform import JWPlatformIE diff --git a/yt_dlp/extractor/spiegeltv.py b/yt_dlp/extractor/spiegeltv.py index 6ccf4c342..69942334e 100644 --- a/yt_dlp/extractor/spiegeltv.py +++ b/yt_dlp/extractor/spiegeltv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .nexx import NexxIE diff --git a/yt_dlp/extractor/spike.py b/yt_dlp/extractor/spike.py index 5805f3d44..5c1c78d8f 100644 --- a/yt_dlp/extractor/spike.py +++ b/yt_dlp/extractor/spike.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor diff --git a/yt_dlp/extractor/sport5.py b/yt_dlp/extractor/sport5.py index 35c57d62a..f4ac98b6e 100644 --- a/yt_dlp/extractor/sport5.py +++ b/yt_dlp/extractor/sport5.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/sportbox.py b/yt_dlp/extractor/sportbox.py index b9017fd2a..1041cc7d1 100644 --- a/yt_dlp/extractor/sportbox.py +++ b/yt_dlp/extractor/sportbox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 15b488ab7..75074b310 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py index 826f98cff..3b8dea8f4 100644 --- a/yt_dlp/extractor/spotify.py +++ b/yt_dlp/extractor/spotify.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py index 6c7e40ae4..36a9bd291 100644 --- a/yt_dlp/extractor/spreaker.py +++ b/yt_dlp/extractor/spreaker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py index 49ac1f559..8e156bf1a 100644 --- a/yt_dlp/extractor/springboardplatform.py +++ b/yt_dlp/extractor/springboardplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sprout.py b/yt_dlp/extractor/sprout.py index e243732f2..444a6c270 100644 --- a/yt_dlp/extractor/sprout.py +++ b/yt_dlp/extractor/sprout.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .adobepass import AdobePassIE from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/srgssr.py b/yt_dlp/extractor/srgssr.py index f9919816d..6dd312985 100644 --- a/yt_dlp/extractor/srgssr.py +++ b/yt_dlp/extractor/srgssr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/srmediathek.py b/yt_dlp/extractor/srmediathek.py index 359dadaa3..3cc39870f 100644 --- a/yt_dlp/extractor/srmediathek.py +++ b/yt_dlp/extractor/srmediathek.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .ard import ARDMediathekBaseIE from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/stanfordoc.py b/yt_dlp/extractor/stanfordoc.py index 0003075ac..be0f4afc1 100644 --- a/yt_dlp/extractor/stanfordoc.py +++ b/yt_dlp/extractor/stanfordoc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py index 411320ede..bb6e8f1ea 100644 --- a/yt_dlp/extractor/startv.py +++ b/yt_dlp/extractor/startv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py index 4ed0fb592..ab22fdbc6 100644 --- a/yt_dlp/extractor/steam.py +++ b/yt_dlp/extractor/steam.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py index 822782507..2fd200f87 100644 --- a/yt_dlp/extractor/stitcher.py +++ b/yt_dlp/extractor/stitcher.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index e18a59a49..716190220 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index 808129649..a2935b04b 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamanity.py b/yt_dlp/extractor/streamanity.py index 2e2d5eedf..f8c37c0dd 100644 --- a/yt_dlp/extractor/streamanity.py +++ b/yt_dlp/extractor/streamanity.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamcloud.py b/yt_dlp/extractor/streamcloud.py index b97bb4374..728980921 100644 --- a/yt_dlp/extractor/streamcloud.py +++ b/yt_dlp/extractor/streamcloud.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py index 4cb9923e2..85fc3a3c3 100644 --- a/yt_dlp/extractor/streamcz.py +++ b/yt_dlp/extractor/streamcz.py @@ -1,4 +1,3 @@ -# coding: utf-8 import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamff.py b/yt_dlp/extractor/streamff.py index 6b190bb3b..93c42942c 100644 --- a/yt_dlp/extractor/streamff.py +++ b/yt_dlp/extractor/streamff.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import int_or_none, parse_iso8601 diff --git a/yt_dlp/extractor/streetvoice.py b/yt_dlp/extractor/streetvoice.py index f21681ae7..a32c8bc37 100644 --- a/yt_dlp/extractor/streetvoice.py +++ b/yt_dlp/extractor/streetvoice.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/stretchinternet.py b/yt_dlp/extractor/stretchinternet.py index ec08eae55..e438dee11 100644 --- a/yt_dlp/extractor/stretchinternet.py +++ b/yt_dlp/extractor/stretchinternet.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index 0d4a0ce4c..a7c7b0649 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py index ba5661d74..618dc4329 100644 --- a/yt_dlp/extractor/stv.py +++ b/yt_dlp/extractor/stv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( compat_str, diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py index 59b77bf92..19498701c 100644 --- a/yt_dlp/extractor/sunporno.py +++ b/yt_dlp/extractor/sunporno.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sverigesradio.py b/yt_dlp/extractor/sverigesradio.py index aa0691f0d..4a4b5cf7e 100644 --- a/yt_dlp/extractor/sverigesradio.py +++ b/yt_dlp/extractor/sverigesradio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py index 8ca62e370..e0c436b67 100644 --- a/yt_dlp/extractor/svt.py +++ b/yt_dlp/extractor/svt.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/swrmediathek.py b/yt_dlp/extractor/swrmediathek.py index 0f615979e..deebdd1a4 100644 --- a/yt_dlp/extractor/swrmediathek.py +++ b/yt_dlp/extractor/swrmediathek.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py index def7e5a2c..c79d27a0d 100644 --- a/yt_dlp/extractor/syfy.py +++ b/yt_dlp/extractor/syfy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .adobepass import AdobePassIE from ..utils import ( update_url_query, diff --git a/yt_dlp/extractor/sztvhu.py b/yt_dlp/extractor/sztvhu.py index cfad33146..1cbc2a3cf 100644 --- a/yt_dlp/extractor/sztvhu.py +++ b/yt_dlp/extractor/sztvhu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py index 6e03d0a7d..9b9513f07 100644 --- a/yt_dlp/extractor/tagesschau.py +++ b/yt_dlp/extractor/tagesschau.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tass.py b/yt_dlp/extractor/tass.py index 6d336da78..d20dacfc1 100644 --- a/yt_dlp/extractor/tass.py +++ b/yt_dlp/extractor/tass.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/tastytrade.py b/yt_dlp/extractor/tastytrade.py index 7fe96bd5f..bb26926e8 100644 --- a/yt_dlp/extractor/tastytrade.py +++ b/yt_dlp/extractor/tastytrade.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .ooyala import OoyalaIE diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index c7d62ff4e..808c6c73d 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .turner import TurnerBaseIE diff --git a/yt_dlp/extractor/tdslifeway.py b/yt_dlp/extractor/tdslifeway.py index 101c6ee31..3623a68c8 100644 --- a/yt_dlp/extractor/tdslifeway.py +++ b/yt_dlp/extractor/tdslifeway.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 232eaa521..e480d7610 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py index e22f0114c..2bf836abd 100644 --- a/yt_dlp/extractor/teachertube.py +++ b/yt_dlp/extractor/teachertube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/teachingchannel.py b/yt_dlp/extractor/teachingchannel.py index 624cdb3ad..275f6d1f9 100644 --- a/yt_dlp/extractor/teachingchannel.py +++ b/yt_dlp/extractor/teachingchannel.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py index 5793b711f..840702ed9 100644 --- a/yt_dlp/extractor/teamcoco.py +++ b/yt_dlp/extractor/teamcoco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .turner import TurnerBaseIE diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py index 64522ec4c..dd802db5b 100644 --- a/yt_dlp/extractor/teamtreehouse.py +++ b/yt_dlp/extractor/teamtreehouse.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/techtalks.py b/yt_dlp/extractor/techtalks.py index 78f07319b..d37de360b 100644 --- a/yt_dlp/extractor/techtalks.py +++ b/yt_dlp/extractor/techtalks.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py index f8a27550e..8e35bc85f 100644 --- a/yt_dlp/extractor/tele13.py +++ b/yt_dlp/extractor/tele13.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index c7beee153..58d343b44 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .dplay import DPlayIE from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/telebruxelles.py b/yt_dlp/extractor/telebruxelles.py index 9e8c89bd6..8d87b6ec1 100644 --- a/yt_dlp/extractor/telebruxelles.py +++ b/yt_dlp/extractor/telebruxelles.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index eecd6a5c9..a9c0755f4 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/telegraaf.py b/yt_dlp/extractor/telegraaf.py index 2dc020537..bc9a8d608 100644 --- a/yt_dlp/extractor/telegraaf.py +++ b/yt_dlp/extractor/telegraaf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/telemb.py b/yt_dlp/extractor/telemb.py index ac2d603b6..7e444c0d0 100644 --- a/yt_dlp/extractor/telemb.py +++ b/yt_dlp/extractor/telemb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py index ebcecf55f..64954b8f1 100644 --- a/yt_dlp/extractor/telemundo.py +++ b/yt_dlp/extractor/telemundo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/telequebec.py b/yt_dlp/extractor/telequebec.py index 4bef2fe76..e89137269 100644 --- a/yt_dlp/extractor/telequebec.py +++ b/yt_dlp/extractor/telequebec.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/teletask.py b/yt_dlp/extractor/teletask.py index b9e2ef8ca..a73dd68fb 100644 --- a/yt_dlp/extractor/teletask.py +++ b/yt_dlp/extractor/teletask.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py index 1207b1a1b..550549f05 100644 --- a/yt_dlp/extractor/telewebion.py +++ b/yt_dlp/extractor/telewebion.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py index 58fdecebe..80acaf190 100644 --- a/yt_dlp/extractor/tennistv.py +++ b/yt_dlp/extractor/tennistv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index 5c7b54531..fc4781447 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from datetime import datetime import base64 diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index 8bc512a9c..140fa4a96 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tf1.py b/yt_dlp/extractor/tf1.py index 44785bc65..4cf0322b3 100644 --- a/yt_dlp/extractor/tf1.py +++ b/yt_dlp/extractor/tf1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/tfo.py b/yt_dlp/extractor/tfo.py index 0631cb7ab..a24789cb3 100644 --- a/yt_dlp/extractor/tfo.py +++ b/yt_dlp/extractor/tfo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py index f23b58713..a991a4dfd 100644 --- a/yt_dlp/extractor/theintercept.py +++ b/yt_dlp/extractor/theintercept.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index c2729f12d..bf7efc013 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time import hmac diff --git a/yt_dlp/extractor/thestar.py b/yt_dlp/extractor/thestar.py index c3f118894..293c34c06 100644 --- a/yt_dlp/extractor/thestar.py +++ b/yt_dlp/extractor/thestar.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/thesun.py b/yt_dlp/extractor/thesun.py index 15d4a6932..ba5848283 100644 --- a/yt_dlp/extractor/thesun.py +++ b/yt_dlp/extractor/thesun.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/theta.py b/yt_dlp/extractor/theta.py index 8b6d70a9f..3ec6b9711 100644 --- a/yt_dlp/extractor/theta.py +++ b/yt_dlp/extractor/theta.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import try_get diff --git a/yt_dlp/extractor/theweatherchannel.py b/yt_dlp/extractor/theweatherchannel.py index 9e506c9e0..9e94cd1ea 100644 --- a/yt_dlp/extractor/theweatherchannel.py +++ b/yt_dlp/extractor/theweatherchannel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .theplatform import ThePlatformIE diff --git a/yt_dlp/extractor/thisamericanlife.py b/yt_dlp/extractor/thisamericanlife.py index 91e45f2c3..9a3d79840 100644 --- a/yt_dlp/extractor/thisamericanlife.py +++ b/yt_dlp/extractor/thisamericanlife.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/thisav.py b/yt_dlp/extractor/thisav.py index 6bb00b3ab..b1cd57d1f 100644 --- a/yt_dlp/extractor/thisav.py +++ b/yt_dlp/extractor/thisav.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import remove_end diff --git a/yt_dlp/extractor/thisoldhouse.py b/yt_dlp/extractor/thisoldhouse.py index 8a1d17311..55b6413ae 100644 --- a/yt_dlp/extractor/thisoldhouse.py +++ b/yt_dlp/extractor/thisoldhouse.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import HEADRequest diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py index 00a51dccd..1c0baf5ed 100644 --- a/yt_dlp/extractor/threeqsdn.py +++ b/yt_dlp/extractor/threeqsdn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py index fe6a9554a..ce28a37c0 100644 --- a/yt_dlp/extractor/threespeak.py +++ b/yt_dlp/extractor/threespeak.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 987b0c43b..4ba993582 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import random import string diff --git a/yt_dlp/extractor/tinypic.py b/yt_dlp/extractor/tinypic.py index 39056e52e..216208cbd 100644 --- a/yt_dlp/extractor/tinypic.py +++ b/yt_dlp/extractor/tinypic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tmz.py b/yt_dlp/extractor/tmz.py index aee2273b8..a8c91f617 100644 --- a/yt_dlp/extractor/tmz.py +++ b/yt_dlp/extractor/tmz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tnaflix.py b/yt_dlp/extractor/tnaflix.py index d7617f708..6b766f3cc 100644 --- a/yt_dlp/extractor/tnaflix.py +++ b/yt_dlp/extractor/tnaflix.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/toggle.py b/yt_dlp/extractor/toggle.py index eb873495f..51a51d84b 100644 --- a/yt_dlp/extractor/toggle.py +++ b/yt_dlp/extractor/toggle.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/tokentube.py b/yt_dlp/extractor/tokentube.py index 579623fed..a30cabb3c 100644 --- a/yt_dlp/extractor/tokentube.py +++ b/yt_dlp/extractor/tokentube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/tonline.py b/yt_dlp/extractor/tonline.py index 9b6a40db5..720282663 100644 --- a/yt_dlp/extractor/tonline.py +++ b/yt_dlp/extractor/tonline.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none, join_nonempty diff --git a/yt_dlp/extractor/toongoggles.py b/yt_dlp/extractor/toongoggles.py index df13d64c0..1b8fc3acd 100644 --- a/yt_dlp/extractor/toongoggles.py +++ b/yt_dlp/extractor/toongoggles.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py index 1d5da1040..349c0bded 100644 --- a/yt_dlp/extractor/toutv.py +++ b/yt_dlp/extractor/toutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .radiocanada import RadioCanadaIE diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py index f705a06c9..bc7336186 100644 --- a/yt_dlp/extractor/toypics.py +++ b/yt_dlp/extractor/toypics.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor import re diff --git a/yt_dlp/extractor/traileraddict.py b/yt_dlp/extractor/traileraddict.py index 514f4793e..5c4a138c4 100644 --- a/yt_dlp/extractor/traileraddict.py +++ b/yt_dlp/extractor/traileraddict.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/trilulilu.py b/yt_dlp/extractor/trilulilu.py index a800449e9..fb97be737 100644 --- a/yt_dlp/extractor/trilulilu.py +++ b/yt_dlp/extractor/trilulilu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 65ea13ddb..3487f3acc 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py index fc98303ab..696343627 100644 --- a/yt_dlp/extractor/trueid.py +++ b/yt_dlp/extractor/trueid.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/trunews.py b/yt_dlp/extractor/trunews.py index cca5b5ceb..d5ce86ece 100644 --- a/yt_dlp/extractor/trunews.py +++ b/yt_dlp/extractor/trunews.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/trutv.py b/yt_dlp/extractor/trutv.py index c09ff897c..ea0f2f40e 100644 --- a/yt_dlp/extractor/trutv.py +++ b/yt_dlp/extractor/trutv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .turner import TurnerBaseIE from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/tube8.py b/yt_dlp/extractor/tube8.py index db93b0182..32e80d9d2 100644 --- a/yt_dlp/extractor/tube8.py +++ b/yt_dlp/extractor/tube8.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from ..utils import ( diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 31feb9a70..9c8e1ac87 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tudou.py b/yt_dlp/extractor/tudou.py index 7421378a8..69774ee38 100644 --- a/yt_dlp/extractor/tudou.py +++ b/yt_dlp/extractor/tudou.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index 8086f613d..5d6615100 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/tunein.py b/yt_dlp/extractor/tunein.py index 7e51de89e..e3d3f2a96 100644 --- a/yt_dlp/extractor/tunein.py +++ b/yt_dlp/extractor/tunein.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tunepk.py b/yt_dlp/extractor/tunepk.py index 9d42651ce..2973d15ec 100644 --- a/yt_dlp/extractor/tunepk.py +++ b/yt_dlp/extractor/tunepk.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/turbo.py b/yt_dlp/extractor/turbo.py index f6bbf2529..e3f8941c4 100644 --- a/yt_dlp/extractor/turbo.py +++ b/yt_dlp/extractor/turbo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 519dc323c..568b6de49 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .adobepass import AdobePassIE diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index 977da30fe..391baa6c5 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tv2dk.py b/yt_dlp/extractor/tv2dk.py index ec5cbdf03..0af286312 100644 --- a/yt_dlp/extractor/tv2dk.py +++ b/yt_dlp/extractor/tv2dk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py index f2104358b..6ac07716b 100644 --- a/yt_dlp/extractor/tv2hu.py +++ b/yt_dlp/extractor/tv2hu.py @@ -1,6 +1,4 @@ # encoding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/tv4.py b/yt_dlp/extractor/tv4.py index 4043e6366..e8cdd5c8c 100644 --- a/yt_dlp/extractor/tv4.py +++ b/yt_dlp/extractor/tv4.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py index a0832d28f..d449cdc04 100644 --- a/yt_dlp/extractor/tv5mondeplus.py +++ b/yt_dlp/extractor/tv5mondeplus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/tv5unis.py b/yt_dlp/extractor/tv5unis.py index 398b85db5..978255b17 100644 --- a/yt_dlp/extractor/tv5unis.py +++ b/yt_dlp/extractor/tv5unis.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py index 52a4ddf32..9afe23328 100644 --- a/yt_dlp/extractor/tva.py +++ b/yt_dlp/extractor/tva.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py index 1086176a2..b9f5e110e 100644 --- a/yt_dlp/extractor/tvanouvelles.py +++ b/yt_dlp/extractor/tvanouvelles.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvc.py b/yt_dlp/extractor/tvc.py index 008f64cc2..4ccc8f522 100644 --- a/yt_dlp/extractor/tvc.py +++ b/yt_dlp/extractor/tvc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index f23af1f14..19236f8e8 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/tvigle.py b/yt_dlp/extractor/tvigle.py index aa25ba0dc..cc1d35dc2 100644 --- a/yt_dlp/extractor/tvigle.py +++ b/yt_dlp/extractor/tvigle.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/tvland.py b/yt_dlp/extractor/tvland.py index 9ebf57f74..481d5eb19 100644 --- a/yt_dlp/extractor/tvland.py +++ b/yt_dlp/extractor/tvland.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor # TODO: Remove - Reason not used anymore - Service moved to youtube diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py index de0fb5063..22b605823 100644 --- a/yt_dlp/extractor/tvn24.py +++ b/yt_dlp/extractor/tvn24.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/tvnet.py b/yt_dlp/extractor/tvnet.py index aa1e9d923..5820bb4a7 100644 --- a/yt_dlp/extractor/tvnet.py +++ b/yt_dlp/extractor/tvnet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvnoe.py b/yt_dlp/extractor/tvnoe.py index 26a5aeae4..712fbb275 100644 --- a/yt_dlp/extractor/tvnoe.py +++ b/yt_dlp/extractor/tvnoe.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/tvnow.py b/yt_dlp/extractor/tvnow.py index b31818477..4aa558d83 100644 --- a/yt_dlp/extractor/tvnow.py +++ b/yt_dlp/extractor/tvnow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvopengr.py b/yt_dlp/extractor/tvopengr.py index a11cdc6b0..aded261f3 100644 --- a/yt_dlp/extractor/tvopengr.py +++ b/yt_dlp/extractor/tvopengr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 48e2c6e76..69168f655 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import random import re diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index b5dbc5526..f815b5137 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py index 5970596b2..31d70b6b8 100644 --- a/yt_dlp/extractor/tvplayer.py +++ b/yt_dlp/extractor/tvplayer.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_HTTPError, diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py index 2b10d9bca..6d1f92bbb 100644 --- a/yt_dlp/extractor/tweakers.py +++ b/yt_dlp/extractor/tweakers.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/twentyfourvideo.py b/yt_dlp/extractor/twentyfourvideo.py index ae19e11e1..baeb85d47 100644 --- a/yt_dlp/extractor/twentyfourvideo.py +++ b/yt_dlp/extractor/twentyfourvideo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_iso8601, diff --git a/yt_dlp/extractor/twentymin.py b/yt_dlp/extractor/twentymin.py index a42977f39..616c3c36e 100644 --- a/yt_dlp/extractor/twentymin.py +++ b/yt_dlp/extractor/twentymin.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/twentythreevideo.py b/yt_dlp/extractor/twentythreevideo.py index e8cf5a1e9..290c3761e 100644 --- a/yt_dlp/extractor/twentythreevideo.py +++ b/yt_dlp/extractor/twentythreevideo.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 7f3fa0735..3d6a12265 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 10de74c8e..834350d12 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import collections import itertools import json diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 8ccc38e24..af6750333 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 77485247f..d35cd0d43 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py index 2c8e5c7b4..4fa74b9e8 100644 --- a/yt_dlp/extractor/udn.py +++ b/yt_dlp/extractor/udn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ufctv.py b/yt_dlp/extractor/ufctv.py index 3d74ba071..2c1c5e0ff 100644 --- a/yt_dlp/extractor/ufctv.py +++ b/yt_dlp/extractor/ufctv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .imggaming import ImgGamingBaseIE diff --git a/yt_dlp/extractor/ukcolumn.py b/yt_dlp/extractor/ukcolumn.py index d2626f0d3..aade79f20 100644 --- a/yt_dlp/extractor/ukcolumn.py +++ b/yt_dlp/extractor/ukcolumn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from ..utils import ( unescapeHTML, urljoin, diff --git a/yt_dlp/extractor/uktvplay.py b/yt_dlp/extractor/uktvplay.py index f28fd514d..abea07ab5 100644 --- a/yt_dlp/extractor/uktvplay.py +++ b/yt_dlp/extractor/uktvplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/umg.py b/yt_dlp/extractor/umg.py index c1b65d189..e6ed656b9 100644 --- a/yt_dlp/extractor/umg.py +++ b/yt_dlp/extractor/umg.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/unistra.py b/yt_dlp/extractor/unistra.py index 685d74f35..083c87209 100644 --- a/yt_dlp/extractor/unistra.py +++ b/yt_dlp/extractor/unistra.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/unity.py b/yt_dlp/extractor/unity.py index 73daacf29..d1b0ecbf3 100644 --- a/yt_dlp/extractor/unity.py +++ b/yt_dlp/extractor/unity.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/uol.py b/yt_dlp/extractor/uol.py index 1baee0b10..e3d9127d8 100644 --- a/yt_dlp/extractor/uol.py +++ b/yt_dlp/extractor/uol.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/uplynk.py b/yt_dlp/extractor/uplynk.py index 9adb96943..04c96f388 100644 --- a/yt_dlp/extractor/uplynk.py +++ b/yt_dlp/extractor/uplynk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/urort.py b/yt_dlp/extractor/urort.py index 020425fc7..296799d38 100644 --- a/yt_dlp/extractor/urort.py +++ b/yt_dlp/extractor/urort.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urllib_parse, diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index eb2ab26e1..30bd3dcbf 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/usanetwork.py b/yt_dlp/extractor/usanetwork.py index d953e460b..d6b58a51c 100644 --- a/yt_dlp/extractor/usanetwork.py +++ b/yt_dlp/extractor/usanetwork.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .nbc import NBCIE diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py index b2103448d..3243f3e3b 100644 --- a/yt_dlp/extractor/usatoday.py +++ b/yt_dlp/extractor/usatoday.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index 4a7a8f879..fff21667a 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import random import re diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py index 92509d1bf..fd5dad0fc 100644 --- a/yt_dlp/extractor/ustudio.py +++ b/yt_dlp/extractor/ustudio.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py index 4986635f2..1213ae1bf 100644 --- a/yt_dlp/extractor/utreon.py +++ b/yt_dlp/extractor/utreon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/varzesh3.py b/yt_dlp/extractor/varzesh3.py index 32655b96d..2c13cbdc0 100644 --- a/yt_dlp/extractor/varzesh3.py +++ b/yt_dlp/extractor/varzesh3.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/vbox7.py b/yt_dlp/extractor/vbox7.py index 8152acefd..76c844cb8 100644 --- a/yt_dlp/extractor/vbox7.py +++ b/yt_dlp/extractor/vbox7.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/veehd.py b/yt_dlp/extractor/veehd.py index a6dc3c8d8..5ecd88726 100644 --- a/yt_dlp/extractor/veehd.py +++ b/yt_dlp/extractor/veehd.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index d87bb5b47..25d462a7d 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py index d9afb5617..70280ae85 100644 --- a/yt_dlp/extractor/veoh.py +++ b/yt_dlp/extractor/veoh.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py index 002047dbf..e9731a941 100644 --- a/yt_dlp/extractor/vesti.py +++ b/yt_dlp/extractor/vesti.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index 8a0f29259..bc0187511 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py index 9d6090b08..6564b7b0b 100644 --- a/yt_dlp/extractor/vgtv.py +++ b/yt_dlp/extractor/vgtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vh1.py b/yt_dlp/extractor/vh1.py index 862c5c7dc..41b8a4607 100644 --- a/yt_dlp/extractor/vh1.py +++ b/yt_dlp/extractor/vh1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor # TODO Remove - Reason: Outdated Site diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py index c8c30559e..abb4a6fa0 100644 --- a/yt_dlp/extractor/vice.py +++ b/yt_dlp/extractor/vice.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import hashlib import json diff --git a/yt_dlp/extractor/vidbit.py b/yt_dlp/extractor/vidbit.py index 91f45b7cc..2813032db 100644 --- a/yt_dlp/extractor/vidbit.py +++ b/yt_dlp/extractor/vidbit.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/viddler.py b/yt_dlp/extractor/viddler.py index ecc48246f..f491b67ef 100644 --- a/yt_dlp/extractor/viddler.py +++ b/yt_dlp/extractor/viddler.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index 90d705092..251eb78fe 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import string diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py index 96e98573f..fe9e061ae 100644 --- a/yt_dlp/extractor/videocampus_sachsen.py +++ b/yt_dlp/extractor/videocampus_sachsen.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor diff --git a/yt_dlp/extractor/videodetective.py b/yt_dlp/extractor/videodetective.py index fe70db713..7928a41c2 100644 --- a/yt_dlp/extractor/videodetective.py +++ b/yt_dlp/extractor/videodetective.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE diff --git a/yt_dlp/extractor/videofyme.py b/yt_dlp/extractor/videofyme.py index cd3f50a63..1d1c8f7b7 100644 --- a/yt_dlp/extractor/videofyme.py +++ b/yt_dlp/extractor/videofyme.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/videomore.py b/yt_dlp/extractor/videomore.py index 17ef3b1b9..09d12d192 100644 --- a/yt_dlp/extractor/videomore.py +++ b/yt_dlp/extractor/videomore.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/videopress.py b/yt_dlp/extractor/videopress.py index 6376ff096..3c5e27a9d 100644 --- a/yt_dlp/extractor/videopress.py +++ b/yt_dlp/extractor/videopress.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 6bfb8d442..599996bf9 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index a63919ff2..b9845affd 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vidzi.py b/yt_dlp/extractor/vidzi.py index 42ea4952c..efa9be116 100644 --- a/yt_dlp/extractor/vidzi.py +++ b/yt_dlp/extractor/vidzi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vier.py b/yt_dlp/extractor/vier.py index 94aa350e7..eab894ab6 100644 --- a/yt_dlp/extractor/vier.py +++ b/yt_dlp/extractor/vier.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import itertools diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index 4627f66fd..d081a2f12 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/viidea.py b/yt_dlp/extractor/viidea.py index 0da06818b..157ce4d8f 100644 --- a/yt_dlp/extractor/viidea.py +++ b/yt_dlp/extractor/viidea.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py index 8a930798d..a922b195c 100644 --- a/yt_dlp/extractor/viki.py +++ b/yt_dlp/extractor/viki.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import hashlib import hmac import json diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index a00b387f3..b2c929373 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import functools import re diff --git a/yt_dlp/extractor/vimm.py b/yt_dlp/extractor/vimm.py index 060b92ba6..3522b8e33 100644 --- a/yt_dlp/extractor/vimm.py +++ b/yt_dlp/extractor/vimm.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor diff --git a/yt_dlp/extractor/vimple.py b/yt_dlp/extractor/vimple.py index c74b43766..a8b16dd29 100644 --- a/yt_dlp/extractor/vimple.py +++ b/yt_dlp/extractor/vimple.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py index e59b1037b..bbf43a83f 100644 --- a/yt_dlp/extractor/vine.py +++ b/yt_dlp/extractor/vine.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/viqeo.py b/yt_dlp/extractor/viqeo.py index be7dfa814..d214223e9 100644 --- a/yt_dlp/extractor/viqeo.py +++ b/yt_dlp/extractor/viqeo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index ba627ca5b..63b6fd3a1 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json import uuid diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index cbc315961..402508aa3 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import collections import re diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index ae35c976c..c60801417 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json diff --git a/yt_dlp/extractor/vodlocker.py b/yt_dlp/extractor/vodlocker.py index 02c9617d2..1c7236ed3 100644 --- a/yt_dlp/extractor/vodlocker.py +++ b/yt_dlp/extractor/vodlocker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/vodpl.py b/yt_dlp/extractor/vodpl.py index 9e919708e..8af1572d0 100644 --- a/yt_dlp/extractor/vodpl.py +++ b/yt_dlp/extractor/vodpl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .onet import OnetBaseIE diff --git a/yt_dlp/extractor/vodplatform.py b/yt_dlp/extractor/vodplatform.py index 74d2257e7..2b45dcd86 100644 --- a/yt_dlp/extractor/vodplatform.py +++ b/yt_dlp/extractor/vodplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unescapeHTML diff --git a/yt_dlp/extractor/voicerepublic.py b/yt_dlp/extractor/voicerepublic.py index a52e40afa..e8cbd0e32 100644 --- a/yt_dlp/extractor/voicerepublic.py +++ b/yt_dlp/extractor/voicerepublic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py index 37c7d5685..e4570a03a 100644 --- a/yt_dlp/extractor/voicy.py +++ b/yt_dlp/extractor/voicy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py index a9b66b95c..7ac38a813 100644 --- a/yt_dlp/extractor/voot.py +++ b/yt_dlp/extractor/voot.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/voxmedia.py b/yt_dlp/extractor/voxmedia.py index 661208125..a7bf298aa 100644 --- a/yt_dlp/extractor/voxmedia.py +++ b/yt_dlp/extractor/voxmedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .once import OnceIE from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/vrak.py b/yt_dlp/extractor/vrak.py index daa247cce..198c0a294 100644 --- a/yt_dlp/extractor/vrak.py +++ b/yt_dlp/extractor/vrak.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py index 10dc94abc..26f48bf67 100644 --- a/yt_dlp/extractor/vrt.py +++ b/yt_dlp/extractor/vrt.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( extract_attributes, diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 00e1006c4..35662753e 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import json import hashlib diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py index b4874ac39..8ef75d30e 100644 --- a/yt_dlp/extractor/vshare.py +++ b/yt_dlp/extractor/vshare.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vtm.py b/yt_dlp/extractor/vtm.py index 093f1aa69..6381fd311 100644 --- a/yt_dlp/extractor/vtm.py +++ b/yt_dlp/extractor/vtm.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/vuclip.py b/yt_dlp/extractor/vuclip.py index 55e087bdb..0e562983d 100644 --- a/yt_dlp/extractor/vuclip.py +++ b/yt_dlp/extractor/vuclip.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vupload.py b/yt_dlp/extractor/vupload.py index b561f63f7..23ea70c77 100644 --- a/yt_dlp/extractor/vupload.py +++ b/yt_dlp/extractor/vupload.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/vvvvid.py b/yt_dlp/extractor/vvvvid.py index 3faa90fbd..ccc44d08a 100644 --- a/yt_dlp/extractor/vvvvid.py +++ b/yt_dlp/extractor/vvvvid.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vyborymos.py b/yt_dlp/extractor/vyborymos.py index 4d93666c5..386518795 100644 --- a/yt_dlp/extractor/vyborymos.py +++ b/yt_dlp/extractor/vyborymos.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/vzaar.py b/yt_dlp/extractor/vzaar.py index 54f88bba8..7ce0ba9f5 100644 --- a/yt_dlp/extractor/vzaar.py +++ b/yt_dlp/extractor/vzaar.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py index a70a71961..155008f8c 100644 --- a/yt_dlp/extractor/wakanim.py +++ b/yt_dlp/extractor/wakanim.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from urllib.parse import unquote from .common import InfoExtractor diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py index 00f081bca..6b954c5cc 100644 --- a/yt_dlp/extractor/walla.py +++ b/yt_dlp/extractor/walla.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wasdtv.py b/yt_dlp/extractor/wasdtv.py index 38c10dc62..bf1ad65b2 100644 --- a/yt_dlp/extractor/wasdtv.py +++ b/yt_dlp/extractor/wasdtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py index 9d6ae2870..7274eaa39 100644 --- a/yt_dlp/extractor/washingtonpost.py +++ b/yt_dlp/extractor/washingtonpost.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wat.py b/yt_dlp/extractor/wat.py index 9ff4523db..2ad664890 100644 --- a/yt_dlp/extractor/wat.py +++ b/yt_dlp/extractor/wat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/watchbox.py b/yt_dlp/extractor/watchbox.py index d19d80102..e41148d4a 100644 --- a/yt_dlp/extractor/watchbox.py +++ b/yt_dlp/extractor/watchbox.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/watchindianporn.py b/yt_dlp/extractor/watchindianporn.py index a86819173..3ded2d1d4 100644 --- a/yt_dlp/extractor/watchindianporn.py +++ b/yt_dlp/extractor/watchindianporn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index ef58a66c3..d0ad69477 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/webcaster.py b/yt_dlp/extractor/webcaster.py index a858e992c..374fe35cd 100644 --- a/yt_dlp/extractor/webcaster.py +++ b/yt_dlp/extractor/webcaster.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/webofstories.py b/yt_dlp/extractor/webofstories.py index f2b8d19b4..fde9300b0 100644 --- a/yt_dlp/extractor/webofstories.py +++ b/yt_dlp/extractor/webofstories.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index dafa2af3b..d5a52ce20 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor import json diff --git a/yt_dlp/extractor/weiqitv.py b/yt_dlp/extractor/weiqitv.py index 7e0befd39..c9ff64154 100644 --- a/yt_dlp/extractor/weiqitv.py +++ b/yt_dlp/extractor/weiqitv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py index e4b610d00..21574471c 100644 --- a/yt_dlp/extractor/whowatch.py +++ b/yt_dlp/extractor/whowatch.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/willow.py b/yt_dlp/extractor/willow.py index 4d3d62f95..6c71e9a04 100644 --- a/yt_dlp/extractor/willow.py +++ b/yt_dlp/extractor/willow.py @@ -1,4 +1,3 @@ -# coding: utf-8 from ..utils import ExtractorError from .common import InfoExtractor diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py index ea953bf77..6e7ec3436 100644 --- a/yt_dlp/extractor/wimtv.py +++ b/yt_dlp/extractor/wimtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index a170966c3..8f0e7949b 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/worldstarhiphop.py b/yt_dlp/extractor/worldstarhiphop.py index 82587b4ce..c6948a1eb 100644 --- a/yt_dlp/extractor/worldstarhiphop.py +++ b/yt_dlp/extractor/worldstarhiphop.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py index 3003a0f10..6349e5326 100644 --- a/yt_dlp/extractor/wppilot.py +++ b/yt_dlp/extractor/wppilot.py @@ -1,5 +1,3 @@ -# coding: utf-8 - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py index 67236f377..8be3645e3 100644 --- a/yt_dlp/extractor/wsj.py +++ b/yt_dlp/extractor/wsj.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/wwe.py b/yt_dlp/extractor/wwe.py index bebc77bb5..9bbd477c3 100644 --- a/yt_dlp/extractor/wwe.py +++ b/yt_dlp/extractor/wwe.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xbef.py b/yt_dlp/extractor/xbef.py index 4c41e98b2..ac69528a3 100644 --- a/yt_dlp/extractor/xbef.py +++ b/yt_dlp/extractor/xbef.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/xboxclips.py b/yt_dlp/extractor/xboxclips.py index 9bac982f8..235b567d9 100644 --- a/yt_dlp/extractor/xboxclips.py +++ b/yt_dlp/extractor/xboxclips.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py index cd97c77dc..28b6ecb6e 100644 --- a/yt_dlp/extractor/xfileshare.py +++ b/yt_dlp/extractor/xfileshare.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 9d4ed47d4..ff15d3707 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/xiami.py b/yt_dlp/extractor/xiami.py index 769aab331..71b2956a8 100644 --- a/yt_dlp/extractor/xiami.py +++ b/yt_dlp/extractor/xiami.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote from ..utils import int_or_none diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index 802d1bb1b..c3447fba0 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py index 9832d2398..96e23bb8d 100644 --- a/yt_dlp/extractor/xinpianchang.py +++ b/yt_dlp/extractor/xinpianchang.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/xminus.py b/yt_dlp/extractor/xminus.py index 36e5ead1e..5f113810f 100644 --- a/yt_dlp/extractor/xminus.py +++ b/yt_dlp/extractor/xminus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py index 27f991627..14beb1347 100644 --- a/yt_dlp/extractor/xnxx.py +++ b/yt_dlp/extractor/xnxx.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py index 792843df5..42bffb071 100644 --- a/yt_dlp/extractor/xstream.py +++ b/yt_dlp/extractor/xstream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xtube.py b/yt_dlp/extractor/xtube.py index abd319188..93a6a3f33 100644 --- a/yt_dlp/extractor/xtube.py +++ b/yt_dlp/extractor/xtube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/xuite.py b/yt_dlp/extractor/xuite.py index 0276c0dbb..52423a327 100644 --- a/yt_dlp/extractor/xuite.py +++ b/yt_dlp/extractor/xuite.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index d5261b6ab..50b939496 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py index 0d536015c..e3e3a9fe6 100644 --- a/yt_dlp/extractor/xxxymovies.py +++ b/yt_dlp/extractor/xxxymovies.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 20504de2c..3fe6192bf 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import itertools import re diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index c15f3a4f3..d87a7f9be 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py index a3558cc12..8ea416a1d 100644 --- a/yt_dlp/extractor/yandexmusic.py +++ b/yt_dlp/extractor/yandexmusic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import itertools diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index 7d3966bf1..37ff514b3 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/yapfiles.py b/yt_dlp/extractor/yapfiles.py index cfb368de9..8fabdf81c 100644 --- a/yt_dlp/extractor/yapfiles.py +++ b/yt_dlp/extractor/yapfiles.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/yesjapan.py b/yt_dlp/extractor/yesjapan.py index 681338c96..b45fa8f14 100644 --- a/yt_dlp/extractor/yesjapan.py +++ b/yt_dlp/extractor/yesjapan.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( HEADRequest, diff --git a/yt_dlp/extractor/yinyuetai.py b/yt_dlp/extractor/yinyuetai.py index 1fd8d35c6..b28c39380 100644 --- a/yt_dlp/extractor/yinyuetai.py +++ b/yt_dlp/extractor/yinyuetai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/ynet.py b/yt_dlp/extractor/ynet.py index c4ae4d88e..444785947 100644 --- a/yt_dlp/extractor/ynet.py +++ b/yt_dlp/extractor/ynet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/youjizz.py b/yt_dlp/extractor/youjizz.py index 111623ffe..cd12be500 100644 --- a/yt_dlp/extractor/youjizz.py +++ b/yt_dlp/extractor/youjizz.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py index b50579915..45856fbbe 100644 --- a/yt_dlp/extractor/youku.py +++ b/yt_dlp/extractor/youku.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import string diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py index 583aea38d..76d89f3ce 100644 --- a/yt_dlp/extractor/younow.py +++ b/yt_dlp/extractor/younow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 5feb568e7..5aea82295 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/yourporn.py b/yt_dlp/extractor/yourporn.py index 98347491e..38f42a991 100644 --- a/yt_dlp/extractor/yourporn.py +++ b/yt_dlp/extractor/yourporn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/yourupload.py b/yt_dlp/extractor/yourupload.py index 9fa772838..def63293a 100644 --- a/yt_dlp/extractor/yourupload.py +++ b/yt_dlp/extractor/yourupload.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import urljoin diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f284487b8..21c6143bd 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import calendar import copy import datetime @@ -452,7 +448,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return None # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323 sapisidhash = hashlib.sha1( - f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest() + f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest() return f'SAPISIDHASH {time_now}_{sapisidhash}' def _call_api(self, ep, query, video_id, fatal=True, headers=None, @@ -466,14 +462,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if headers: real_headers.update(headers) return self._download_json( - 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep), + f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}', video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'}) def extract_yt_initial_data(self, item_id, webpage, fatal=True): data = self._search_regex( - (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE), + (fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}', self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal) if data: return self._parse_json(data, item_id, fatal=fatal) @@ -657,7 +653,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): warnings.append([alert_type, alert_message]) for alert_type, alert_message in (warnings + errors[:-1]): - self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once) + self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once) if errors: raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) @@ -2214,10 +2210,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): qs = parse_qs(url) if qs.get('list', [None])[0]: return False - return super(YoutubeIE, cls).suitable(url) + return super().suitable(url) def __init__(self, *args, **kwargs): - super(YoutubeIE, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self._code_cache = {} self._player_cache = {} @@ -2413,8 +2409,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_id = self._extract_player_info(player_url) # Read from filesystem cache - func_id = 'js_%s_%s' % ( - player_id, self._signature_cache_id(example_sig)) + func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}' assert os.path.basename(func_id) == func_id cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) @@ -2441,7 +2436,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): starts = '' if start == 0 else str(start) ends = (':%d' % (end + step)) if end + step >= 0 else ':' steps = '' if step == 1 else (':%d' % step) - return 's[%s%s%s]' % (starts, ends, steps) + return f's[{starts}{ends}{steps}]' step = None # Quelch pyflakes warnings - start will be set when step is set @@ -2603,7 +2598,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # cpn generation algorithm is reverse engineered from base.js. # In fact it works even with dummy cpn. CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' - cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))) + cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) qs.update({ 'ver': ['2'], @@ -2714,7 +2709,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_yt_initial_variable(self, webpage, regex, video_id, name): return self._parse_json(self._search_regex( - (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), + (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', regex), webpage, name, default='{}'), video_id, fatal=False) def _extract_comment(self, comment_renderer, parent=None): @@ -2812,8 +2807,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): comment_entries_iter = self._comment_entries( comment_replies_renderer, ytcfg, video_id, parent=comment.get('id'), tracker=tracker) - for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))): - yield reply_comment + yield from itertools.islice(comment_entries_iter, min( + max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))) # Keeps track of counts across recursive calls if not tracker: @@ -2955,7 +2950,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): requested_clients = [] default = ['android', 'web'] allowed_clients = sorted( - [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'], + (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): if client in allowed_clients: @@ -3865,8 +3860,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): # TODO: add support for nested playlists so each shelf is processed # as separate playlist # TODO: this includes only first N items - for entry in self._grid_entries(renderer): - yield entry + yield from self._grid_entries(renderer) renderer = content.get('horizontalListRenderer') if renderer: # TODO @@ -3886,8 +3880,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): title = self._get_text(shelf_renderer, 'title') yield self.url_result(shelf_url, video_title=title) # Shelf may not contain shelf URL, fallback to extraction from content - for entry in self._shelf_entries_from_content(shelf_renderer): - yield entry + yield from self._shelf_entries_from_content(shelf_renderer) def _playlist_entries(self, video_list_renderer): for content in video_list_renderer['contents']: @@ -3965,8 +3958,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): renderer = content.get('backstagePostThreadRenderer') if not isinstance(renderer, dict): continue - for entry in self._post_thread_entries(renderer): - yield entry + yield from self._post_thread_entries(renderer) r''' # unused def _rich_grid_entries(self, contents): @@ -4036,8 +4028,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): parent_renderer = ( try_get(tab_content, lambda x: x['sectionListRenderer'], dict) or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {}) - for entry in extract_entries(parent_renderer): - yield entry + yield from extract_entries(parent_renderer) continuation = continuation_list[0] for page_num in itertools.count(1): @@ -4046,7 +4037,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): headers = self.generate_api_headers( ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data) response = self._extract_response( - item_id='%s page %s' % (item_id, page_num), + item_id=f'{item_id} page {page_num}', query=continuation, headers=headers, ytcfg=ytcfg, check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')) @@ -4070,8 +4061,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): continue continuation_renderer = value continuation_list = [None] - for entry in known_continuation_renderers[key](continuation_renderer): - yield entry + yield from known_continuation_renderers[key](continuation_renderer) continuation = continuation_list[0] or self._extract_continuation(continuation_renderer) break if continuation_renderer: @@ -4097,8 +4087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): continue video_items_renderer = {known_renderers[key][1]: continuation_items} continuation_list = [None] - for entry in known_renderers[key][0](video_items_renderer): - yield entry + yield from known_renderers[key][0](video_items_renderer) continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) break if video_items_renderer: @@ -4470,7 +4459,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): ('continuationContents', ), ) display_id = f'query "{query}"' - check_get_keys = tuple(set(keys[0] for keys in content_keys)) + check_get_keys = tuple({keys[0] for keys in content_keys}) ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {} self._report_playlist_authcheck(ytcfg, fatal=False) @@ -5180,8 +5169,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): @classmethod def suitable(cls, url): - return False if YoutubeIE.suitable(url) else super( - YoutubeTabIE, cls).suitable(url) + return False if YoutubeIE.suitable(url) else super().suitable(url) _URL_RE = re.compile(rf'(?P
    {_VALID_URL})(?(not_channel)|(?P/\w+))?(?P.*)$')
     
    @@ -5228,7 +5216,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
     
             # Handle both video/playlist URLs
             qs = parse_qs(url)
    -        video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
    +        video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
     
             if not video_id and mobj['not_channel'].startswith('watch'):
                 if not playlist_id:
    @@ -5414,7 +5402,7 @@ class YoutubePlaylistIE(InfoExtractor):
             qs = parse_qs(url)
             if qs.get('v', [None])[0]:
                 return False
    -        return super(YoutubePlaylistIE, cls).suitable(url)
    +        return super().suitable(url)
     
         def _real_extract(self, url):
             playlist_id = self._match_id(url)
    @@ -5883,5 +5871,5 @@ class YoutubeTruncatedIDIE(InfoExtractor):
         def _real_extract(self, url):
             video_id = self._match_id(url)
             raise ExtractorError(
    -            'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
    +            f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
                 expected=True)
    diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py
    index 161b011ab..a1546fd88 100644
    --- a/yt_dlp/extractor/zapiks.py
    +++ b/yt_dlp/extractor/zapiks.py
    @@ -1,6 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals
    -
     import re
     
     from .common import InfoExtractor
    diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py
    index 8614ca23d..16f827a7e 100644
    --- a/yt_dlp/extractor/zattoo.py
    +++ b/yt_dlp/extractor/zattoo.py
    @@ -1,6 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals
    -
     import re
     from uuid import uuid4
     
    diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py
    index 5f4d26622..a388ff562 100644
    --- a/yt_dlp/extractor/zdf.py
    +++ b/yt_dlp/extractor/zdf.py
    @@ -1,6 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals
    -
     import re
     
     from .common import InfoExtractor
    diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py
    index 9e411d83f..9ff36052e 100644
    --- a/yt_dlp/extractor/zee5.py
    +++ b/yt_dlp/extractor/zee5.py
    @@ -1,6 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals
    -
     import json
     
     from .common import InfoExtractor
    @@ -96,14 +93,14 @@ class Zee5IE(InfoExtractor):
         def _perform_login(self, username, password):
             if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
                 self.report_login()
    -            otp_request_json = self._download_json('https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{}'.format(username),
    +            otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}',
                                                        None, note='Sending OTP')
                 if otp_request_json['code'] == 0:
                     self.to_screen(otp_request_json['message'])
                 else:
                     raise ExtractorError(otp_request_json['message'], expected=True)
                 otp_code = self._get_tfa_info('OTP')
    -            otp_verify_json = self._download_json('https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{}&otp={}&guest_token={}&platform=web'.format(username, otp_code, self._DEVICE_ID),
    +            otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web',
                                                       None, note='Verifying OTP', fatal=False)
                 if not otp_verify_json:
                     raise ExtractorError('Unable to verify OTP.', expected=True)
    @@ -227,13 +224,13 @@ class Zee5SeriesIE(InfoExtractor):
                 'X-Access-Token': access_token_request['token'],
                 'Referer': 'https://www.zee5.com/',
             }
    -        show_url = 'https://gwapi.zee5.com/content/tvshow/{}?translation=en&country=IN'.format(show_id)
    +        show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN'
     
             page_num = 0
             show_json = self._download_json(show_url, video_id=show_id, headers=headers)
             for season in show_json.get('seasons') or []:
                 season_id = try_get(season, lambda x: x['id'], compat_str)
    -            next_url = 'https://gwapi.zee5.com/content/tvshow/?season_id={}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'.format(season_id)
    +            next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'
                 while next_url:
                     page_num += 1
                     episodes_json = self._download_json(
    diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py
    index 278a9438e..70eb3ccd1 100644
    --- a/yt_dlp/extractor/zhihu.py
    +++ b/yt_dlp/extractor/zhihu.py
    @@ -1,6 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals
    -
     from .common import InfoExtractor
     from ..utils import format_field, float_or_none, int_or_none
     
    diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py
    index 419bf30d8..42a8ac056 100644
    --- a/yt_dlp/extractor/zingmp3.py
    +++ b/yt_dlp/extractor/zingmp3.py
    @@ -1,6 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals
    -
     import hashlib
     import hmac
     import urllib.parse
    diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py
    index c00548839..a455f8c04 100644
    --- a/yt_dlp/extractor/zoom.py
    +++ b/yt_dlp/extractor/zoom.py
    @@ -1,7 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals
    -
    -
     from .common import InfoExtractor
     from ..utils import (
         ExtractorError,
    diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py
    index 7663cb36b..6f2fbb9e9 100644
    --- a/yt_dlp/extractor/zype.py
    +++ b/yt_dlp/extractor/zype.py
    @@ -1,6 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals
    -
     import re
     
     from .common import InfoExtractor
    diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py
    index 350b44dd0..3695a282d 100644
    --- a/yt_dlp/jsinterp.py
    +++ b/yt_dlp/jsinterp.py
    @@ -71,7 +71,7 @@ class LocalNameSpace(MutableMapping):
             return f'LocalNameSpace{self.stack}'
     
     
    -class JSInterpreter(object):
    +class JSInterpreter:
         def __init__(self, code, objects=None):
             if objects is None:
                 objects = {}
    @@ -232,7 +232,7 @@ class JSInterpreter(object):
                 for default in (False, True):
                     matched = False
                     for item in items:
    -                    case, stmt = [i.strip() for i in self._separate(item, ':', 1)]
    +                    case, stmt = (i.strip() for i in self._separate(item, ':', 1))
                         if default:
                             matched = matched or case == 'default'
                         elif not matched:
    @@ -268,10 +268,10 @@ class JSInterpreter(object):
                 expr = expr[:start] + json.dumps(ret) + expr[end:]
     
             for op, opfunc in _ASSIGN_OPERATORS:
    -            m = re.match(r'''(?x)
    -                (?P%s)(?:\[(?P[^\]]+?)\])?
    -                \s*%s
    -                (?P.*)$''' % (_NAME_RE, re.escape(op)), expr)
    +            m = re.match(rf'''(?x)
    +                (?P{_NAME_RE})(?:\[(?P[^\]]+?)\])?
    +                \s*{re.escape(op)}
    +                (?P.*)$''', expr)
                 if not m:
                     continue
                 right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)
    @@ -451,9 +451,9 @@ class JSInterpreter(object):
             m = re.match(r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
             if m:
                 fname = m.group('func')
    -            argvals = tuple([
    +            argvals = tuple(
                     int(v) if v.isdigit() else local_vars[v]
    -                for v in self._separate(m.group('args'))])
    +                for v in self._separate(m.group('args')))
                 if fname in local_vars:
                     return local_vars[fname](argvals)
                 elif fname not in self._functions:
    diff --git a/yt_dlp/options.py b/yt_dlp/options.py
    index 8839b44d4..c434e32b9 100644
    --- a/yt_dlp/options.py
    +++ b/yt_dlp/options.py
    @@ -1,5 +1,3 @@
    -from __future__ import unicode_literals
    -
     import os.path
     import optparse
     import re
    @@ -124,7 +122,7 @@ class _YoutubeDLOptionParser(optparse.OptionParser):
             try:
                 return super()._match_long_opt(opt)
             except optparse.AmbiguousOptionError as e:
    -            if len(set(self._long_opt[p] for p in e.possibilities)) == 1:
    +            if len({self._long_opt[p] for p in e.possibilities}) == 1:
                     return e.possibilities[0]
                 raise
     
    @@ -189,9 +187,9 @@ def create_parser():
             out_dict = dict(getattr(parser.values, option.dest))
             multiple_args = not isinstance(value, str)
             if multiple_keys:
    -            allowed_keys = r'(%s)(,(%s))*' % (allowed_keys, allowed_keys)
    +            allowed_keys = fr'({allowed_keys})(,({allowed_keys}))*'
             mobj = re.match(
    -            r'(?i)(?P%s)%s(?P.*)$' % (allowed_keys, delimiter),
    +            fr'(?i)(?P{allowed_keys}){delimiter}(?P.*)$',
                 value[0] if multiple_args else value)
             if mobj is not None:
                 keys, val = mobj.group('keys').split(','), mobj.group('val')
    @@ -201,7 +199,7 @@ def create_parser():
                 keys, val = [default_key], value
             else:
                 raise optparse.OptionValueError(
    -                'wrong %s formatting; it should be %s, not "%s"' % (opt_str, option.metavar, value))
    +                f'wrong {opt_str} formatting; it should be {option.metavar}, not "{value}"')
             try:
                 keys = map(process_key, keys) if process_key else keys
                 val = process(val) if process else val
    diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py
    index 8420ee864..3f55b24f2 100644
    --- a/yt_dlp/postprocessor/common.py
    +++ b/yt_dlp/postprocessor/common.py
    @@ -1,5 +1,3 @@
    -from __future__ import unicode_literals
    -
     import functools
     import itertools
     import json
    @@ -73,7 +71,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
         def to_screen(self, text, prefix=True, *args, **kwargs):
             tag = '[%s] ' % self.PP_NAME if prefix else ''
             if self._downloader:
    -            return self._downloader.to_screen('%s%s' % (tag, text), *args, **kwargs)
    +            return self._downloader.to_screen(f'{tag}{text}', *args, **kwargs)
     
         def report_warning(self, text, *args, **kwargs):
             if self._downloader:
    diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py
    index 057007f2e..2fca97784 100644
    --- a/yt_dlp/postprocessor/embedthumbnail.py
    +++ b/yt_dlp/postprocessor/embedthumbnail.py
    @@ -1,6 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals
    -
     import base64
     import imghdr
     import os
    @@ -61,7 +58,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
             return int(mobj.group('w')), int(mobj.group('h'))
     
         def _report_run(self, exe, filename):
    -        self.to_screen('%s: Adding thumbnail to "%s"' % (exe, filename))
    +        self.to_screen(f'{exe}: Adding thumbnail to "{filename}"')
     
         @PostProcessor._restrict_to(images=False)
         def run(self, info):
    diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py
    index 63f4d23f2..6621889d5 100644
    --- a/yt_dlp/postprocessor/exec.py
    +++ b/yt_dlp/postprocessor/exec.py
    @@ -1,5 +1,3 @@
    -from __future__ import unicode_literals
    -
     import subprocess
     
     from .common import PostProcessor
    diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
    index 78c6f9107..3175c8d10 100644
    --- a/yt_dlp/postprocessor/ffmpeg.py
    +++ b/yt_dlp/postprocessor/ffmpeg.py
    @@ -1,7 +1,4 @@
    -from __future__ import unicode_literals
    -
     import collections
    -import io
     import itertools
     import os
     import subprocess
    @@ -73,11 +70,9 @@ class FFmpegPostProcessor(PostProcessor):
                 raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
     
             required_version = '10-0' if self.basename == 'avconv' else '1.0'
    -        if is_outdated_version(
    -                self._versions[self.basename], required_version):
    -            warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
    -                self.basename, self.basename, required_version)
    -            self.report_warning(warning)
    +        if is_outdated_version(self._versions[self.basename], required_version):
    +            self.report_warning(f'Your copy of {self.basename} is outdated, update {self.basename} '
    +                                f'to version {required_version} or newer if you encounter any errors')
     
         @staticmethod
         def get_versions_and_features(downloader=None):
    @@ -147,8 +142,8 @@ class FFmpegPostProcessor(PostProcessor):
                     if basename in ('ffmpeg', 'ffprobe'):
                         prefer_ffmpeg = True
     
    -            self._paths = dict(
    -                (p, os.path.join(dirname, p)) for p in programs)
    +            self._paths = {
    +                p: os.path.join(dirname, p) for p in programs}
                 if basename:
                     self._paths[basename] = location
     
    @@ -211,13 +206,13 @@ class FFmpegPostProcessor(PostProcessor):
                         encodeFilename(self.executable, True),
                         encodeArgument('-i')]
                 cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
    -            self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd)))
    +            self.write_debug(f'{self.basename} command line: {shell_quote(cmd)}')
                 handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                 stdout_data, stderr_data = handle.communicate_or_kill()
                 expected_ret = 0 if self.probe_available else 1
                 if handle.wait() != expected_ret:
                     return None
    -        except (IOError, OSError):
    +        except OSError:
                 return None
             output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
             if self.probe_available:
    @@ -539,7 +534,7 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
         _ACTION = 'converting'
     
         def __init__(self, downloader=None, preferedformat=None):
    -        super(FFmpegVideoConvertorPP, self).__init__(downloader)
    +        super().__init__(downloader)
             self._preferedformats = preferedformat.lower().split('/')
     
         def _target_ext(self, source_ext):
    @@ -585,7 +580,7 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP):
     
     class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         def __init__(self, downloader=None, already_have_subtitle=False):
    -        super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
    +        super().__init__(downloader)
             self._already_have_subtitle = already_have_subtitle
     
         @PostProcessor._restrict_to(images=False)
    @@ -713,7 +708,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
     
         @staticmethod
         def _get_chapter_opts(chapters, metadata_filename):
    -        with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
    +        with open(metadata_filename, 'wt', encoding='utf-8') as f:
                 def ffmpeg_escape(text):
                     return re.sub(r'([\\=;#\n])', r'\\\1', text)
     
    @@ -899,7 +894,7 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor):
     
         def __init__(self, downloader=None, trim=0.001):
             # "trim" should be used when the video contains unintended packets
    -        super(FFmpegFixupTimestampPP, self).__init__(downloader)
    +        super().__init__(downloader)
             assert isinstance(trim, (int, float))
             self.trim = str(trim)
     
    @@ -937,7 +932,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
         SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc')
     
         def __init__(self, downloader=None, format=None):
    -        super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
    +        super().__init__(downloader)
             self.format = format
     
         def run(self, info):
    @@ -979,7 +974,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
                     with open(dfxp_file, 'rb') as f:
                         srt_data = dfxp2srt(f.read())
     
    -                with io.open(srt_file, 'wt', encoding='utf-8') as f:
    +                with open(srt_file, 'wt', encoding='utf-8') as f:
                         f.write(srt_data)
                     old_file = srt_file
     
    @@ -996,7 +991,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
     
                 self.run_ffmpeg(old_file, new_file, ['-f', new_format])
     
    -            with io.open(new_file, 'rt', encoding='utf-8') as f:
    +            with open(new_file, encoding='utf-8') as f:
                     subs[lang] = {
                         'ext': new_ext,
                         'data': f.read(),
    @@ -1059,7 +1054,7 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
         SUPPORTED_EXTS = ('jpg', 'png', 'webp')
     
         def __init__(self, downloader=None, format=None):
    -        super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)
    +        super().__init__(downloader)
             self.format = format
     
         @staticmethod
    @@ -1090,7 +1085,7 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
         def convert_thumbnail(self, thumbnail_filename, target_ext):
             thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext)
     
    -        self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext))
    +        self.to_screen(f'Converting thumbnail "{thumbnail_filename}" to {target_ext}')
             self.real_run_ffmpeg(
                 [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])],
                 [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))])
    diff --git a/yt_dlp/postprocessor/movefilesafterdownload.py b/yt_dlp/postprocessor/movefilesafterdownload.py
    index 1064a8cb8..bc3d15ca4 100644
    --- a/yt_dlp/postprocessor/movefilesafterdownload.py
    +++ b/yt_dlp/postprocessor/movefilesafterdownload.py
    @@ -1,4 +1,3 @@
    -from __future__ import unicode_literals
     import os
     import shutil
     
    @@ -47,7 +46,7 @@ class MoveFilesAfterDownloadPP(PostProcessor):
                             % (oldfile, newfile))
                         continue
                 make_dir(newfile, PostProcessingError)
    -            self.to_screen('Moving file "%s" to "%s"' % (oldfile, newfile))
    +            self.to_screen(f'Moving file "{oldfile}" to "{newfile}"')
                 shutil.move(oldfile, newfile)  # os.rename cannot move between volumes
     
             info['filepath'] = finalpath
    diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py
    index 59cf0e0c3..38089de08 100644
    --- a/yt_dlp/postprocessor/sponskrub.py
    +++ b/yt_dlp/postprocessor/sponskrub.py
    @@ -1,4 +1,3 @@
    -from __future__ import unicode_literals
     import os
     import shlex
     import subprocess
    diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py
    index 93acd6d13..5ad8509e7 100644
    --- a/yt_dlp/postprocessor/xattrpp.py
    +++ b/yt_dlp/postprocessor/xattrpp.py
    @@ -1,5 +1,3 @@
    -from __future__ import unicode_literals
    -
     from .common import PostProcessor
     from ..compat import compat_os_name
     from ..utils import (
    diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py
    index 5d4adbe72..ffa960e03 100644
    --- a/yt_dlp/socks.py
    +++ b/yt_dlp/socks.py
    @@ -1,8 +1,5 @@
     # Public Domain SOCKS proxy protocol implementation
     # Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3
    -
    -from __future__ import unicode_literals
    -
     # References:
     # SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
     # SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
    @@ -33,7 +30,7 @@ SOCKS5_USER_AUTH_VERSION = 0x01
     SOCKS5_USER_AUTH_SUCCESS = 0x00
     
     
    -class Socks4Command(object):
    +class Socks4Command:
         CMD_CONNECT = 0x01
         CMD_BIND = 0x02
     
    @@ -42,14 +39,14 @@ class Socks5Command(Socks4Command):
         CMD_UDP_ASSOCIATE = 0x03
     
     
    -class Socks5Auth(object):
    +class Socks5Auth:
         AUTH_NONE = 0x00
         AUTH_GSSAPI = 0x01
         AUTH_USER_PASS = 0x02
         AUTH_NO_ACCEPTABLE = 0xFF  # For server response
     
     
    -class Socks5AddressType(object):
    +class Socks5AddressType:
         ATYP_IPV4 = 0x01
         ATYP_DOMAINNAME = 0x03
         ATYP_IPV6 = 0x04
    @@ -61,14 +58,14 @@ class ProxyError(socket.error):
         def __init__(self, code=None, msg=None):
             if code is not None and msg is None:
                 msg = self.CODES.get(code) or 'unknown error'
    -        super(ProxyError, self).__init__(code, msg)
    +        super().__init__(code, msg)
     
     
     class InvalidVersionError(ProxyError):
         def __init__(self, expected_version, got_version):
    -        msg = ('Invalid response version from server. Expected {0:02x} got '
    -               '{1:02x}'.format(expected_version, got_version))
    -        super(InvalidVersionError, self).__init__(0, msg)
    +        msg = ('Invalid response version from server. Expected {:02x} got '
    +               '{:02x}'.format(expected_version, got_version))
    +        super().__init__(0, msg)
     
     
     class Socks4Error(ProxyError):
    @@ -98,7 +95,7 @@ class Socks5Error(ProxyError):
         }
     
     
    -class ProxyType(object):
    +class ProxyType:
         SOCKS4 = 0
         SOCKS4A = 1
         SOCKS5 = 2
    @@ -111,7 +108,7 @@ Proxy = collections.namedtuple('Proxy', (
     class sockssocket(socket.socket):
         def __init__(self, *args, **kwargs):
             self._proxy = None
    -        super(sockssocket, self).__init__(*args, **kwargs)
    +        super().__init__(*args, **kwargs)
     
         def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None):
             assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5)
    @@ -123,13 +120,13 @@ class sockssocket(socket.socket):
             while len(data) < cnt:
                 cur = self.recv(cnt - len(data))
                 if not cur:
    -                raise EOFError('{0} bytes missing'.format(cnt - len(data)))
    +                raise EOFError(f'{cnt - len(data)} bytes missing')
                 data += cur
             return data
     
         def _recv_bytes(self, cnt):
             data = self.recvall(cnt)
    -        return compat_struct_unpack('!{0}B'.format(cnt), data)
    +        return compat_struct_unpack(f'!{cnt}B', data)
     
         @staticmethod
         def _len_and_data(data):
    @@ -143,7 +140,7 @@ class sockssocket(socket.socket):
         def _resolve_address(self, destaddr, default, use_remote_dns):
             try:
                 return socket.inet_aton(destaddr)
    -        except socket.error:
    +        except OSError:
                 if use_remote_dns and self._proxy.remote_dns:
                     return default
                 else:
    @@ -185,7 +182,7 @@ class sockssocket(socket.socket):
                 auth_methods.append(Socks5Auth.AUTH_USER_PASS)
     
             packet += compat_struct_pack('!B', len(auth_methods))
    -        packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods)
    +        packet += compat_struct_pack(f'!{len(auth_methods)}B', *auth_methods)
     
             self.sendall(packet)
     
    diff --git a/yt_dlp/update.py b/yt_dlp/update.py
    index f6ac207a1..7db260e96 100644
    --- a/yt_dlp/update.py
    +++ b/yt_dlp/update.py
    @@ -1,5 +1,3 @@
    -from __future__ import unicode_literals
    -
     import hashlib
     import json
     import os
    @@ -111,11 +109,11 @@ def run_update(ydl):
         }
     
         def get_bin_info(bin_or_exe, version):
    -        label = version_labels['%s_%s' % (bin_or_exe, version)]
    +        label = version_labels[f'{bin_or_exe}_{version}']
             return next((i for i in version_info['assets'] if i['name'] == 'yt-dlp%s' % label), {})
     
         def get_sha256sum(bin_or_exe, version):
    -        filename = 'yt-dlp%s' % version_labels['%s_%s' % (bin_or_exe, version)]
    +        filename = 'yt-dlp%s' % version_labels[f'{bin_or_exe}_{version}']
             urlh = next(
                 (i for i in version_info['assets'] if i['name'] in ('SHA2-256SUMS')),
                 {}).get('browser_download_url')
    @@ -136,7 +134,7 @@ def run_update(ydl):
             try:
                 if os.path.exists(filename + '.old'):
                     os.remove(filename + '.old')
    -        except (IOError, OSError):
    +        except OSError:
                 return report_unable('remove the old version')
     
             try:
    @@ -147,13 +145,13 @@ def run_update(ydl):
                 urlh = ydl._opener.open(url)
                 newcontent = urlh.read()
                 urlh.close()
    -        except (IOError, OSError):
    +        except OSError:
                 return report_network_error('download latest version')
     
             try:
                 with open(filename + '.new', 'wb') as outf:
                     outf.write(newcontent)
    -        except (IOError, OSError):
    +        except OSError:
                 return report_permission_error(f'{filename}.new')
     
             expected_sum = get_sha256sum(variant, arch)
    @@ -168,11 +166,11 @@ def run_update(ydl):
     
             try:
                 os.rename(filename, filename + '.old')
    -        except (IOError, OSError):
    +        except OSError:
                 return report_unable('move current version')
             try:
                 os.rename(filename + '.new', filename)
    -        except (IOError, OSError):
    +        except OSError:
                 report_unable('overwrite current version')
                 os.rename(filename + '.old', filename)
                 return
    @@ -195,7 +193,7 @@ def run_update(ydl):
                 urlh = ydl._opener.open(url)
                 newcontent = urlh.read()
                 urlh.close()
    -        except (IOError, OSError):
    +        except OSError:
                 return report_network_error('download the latest version')
     
             expected_sum = get_sha256sum(variant, pack_type)
    @@ -207,7 +205,7 @@ def run_update(ydl):
             try:
                 with open(filename, 'wb') as outf:
                     outf.write(newcontent)
    -        except (IOError, OSError):
    +        except OSError:
                 return report_unable('overwrite current version')
     
             ydl.to_screen('Updated yt-dlp to version %s; Restart yt-dlp to use the new version' % version_id)
    diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
    index 3f70b1f60..91e1a9870 100644
    --- a/yt_dlp/utils.py
    +++ b/yt_dlp/utils.py
    @@ -1,8 +1,4 @@
     #!/usr/bin/env python3
    -# coding: utf-8
    -
    -from __future__ import unicode_literals
    -
     import asyncio
     import atexit
     import base64
    @@ -311,7 +307,7 @@ def write_json_file(obj, fn):
     def find_xpath_attr(node, xpath, key, val=None):
         """ Find the xpath xpath[@key=val] """
         assert re.match(r'^[a-zA-Z_-]+$', key)
    -    expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
    +    expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
         return node.find(expr)
     
     # On python2.6 the xml.etree.ElementTree.Element methods don't support
    @@ -374,7 +370,7 @@ def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
             if default is not NO_DEFAULT:
                 return default
             elif fatal:
    -            name = '%s[@%s]' % (xpath, key) if name is None else name
    +            name = f'{xpath}[@{key}]' if name is None else name
                 raise ExtractorError('Could not find XML attribute %s' % name)
             else:
                 return None
    @@ -443,15 +439,15 @@ def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value
         attribute in the passed HTML document
         """
     
    -    value_quote_optional = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
    +    quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
     
         value = re.escape(value) if escape_value else value
     
    -    partial_element_re = r'''(?x)
    +    partial_element_re = rf'''(?x)
             <(?P[a-zA-Z0-9:._-]+)
              (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
    -         \s%(attribute)s\s*=\s*(?P<_q>['"]%(vqo)s)(?-x:%(value)s)(?P=_q)
    -        ''' % {'attribute': re.escape(attribute), 'value': value, 'vqo': value_quote_optional}
    +         \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
    +        '''
     
         for m in re.finditer(partial_element_re, html):
             content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
    @@ -644,7 +640,7 @@ def sanitize_open(filename, open_mode):
                 except LockingUnsupportedError:
                     stream = open(filename, open_mode)
                 return (stream, filename)
    -        except (IOError, OSError) as err:
    +        except OSError as err:
                 if attempt or err.errno in (errno.EACCES,):
                     raise
                 old_filename, filename = filename, sanitize_path(filename)
    @@ -853,7 +849,7 @@ class Popen(subprocess.Popen):
             _startupinfo = None
     
         def __init__(self, *args, **kwargs):
    -        super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
    +        super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
     
         def communicate_or_kill(self, *args, **kwargs):
             return process_communicate_or_kill(self, *args, **kwargs)
    @@ -1013,7 +1009,7 @@ class ExtractorError(YoutubeDLError):
             self.ie = ie
             self.exc_info = sys.exc_info()  # preserve original exception
     
    -        super(ExtractorError, self).__init__(''.join((
    +        super().__init__(''.join((
                 format_field(ie, template='[%s] '),
                 format_field(video_id, template='%s: '),
                 msg,
    @@ -1029,7 +1025,7 @@ class ExtractorError(YoutubeDLError):
     
     class UnsupportedError(ExtractorError):
         def __init__(self, url):
    -        super(UnsupportedError, self).__init__(
    +        super().__init__(
                 'Unsupported URL: %s' % url, expected=True)
             self.url = url
     
    @@ -1048,7 +1044,7 @@ class GeoRestrictedError(ExtractorError):
     
         def __init__(self, msg, countries=None, **kwargs):
             kwargs['expected'] = True
    -        super(GeoRestrictedError, self).__init__(msg, **kwargs)
    +        super().__init__(msg, **kwargs)
             self.countries = countries
     
     
    @@ -1062,7 +1058,7 @@ class DownloadError(YoutubeDLError):
     
         def __init__(self, msg, exc_info=None):
             """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
    -        super(DownloadError, self).__init__(msg)
    +        super().__init__(msg)
             self.exc_info = exc_info
     
     
    @@ -1156,9 +1152,7 @@ class ContentTooShortError(YoutubeDLError):
         """
     
         def __init__(self, downloaded, expected):
    -        super(ContentTooShortError, self).__init__(
    -            'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
    -        )
    +        super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
             # Both in bytes
             self.downloaded = downloaded
             self.expected = expected
    @@ -1166,7 +1160,7 @@ class ContentTooShortError(YoutubeDLError):
     
     class XAttrMetadataError(YoutubeDLError):
         def __init__(self, code=None, msg='Unknown error'):
    -        super(XAttrMetadataError, self).__init__(msg)
    +        super().__init__(msg)
             self.code = code
             self.msg = msg
     
    @@ -1202,7 +1196,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
                 ip_addrs = [addr for addr in addrs if addr[0] == af]
                 if addrs and not ip_addrs:
                     ip_version = 'v4' if af == socket.AF_INET else 'v6'
    -                raise socket.error(
    +                raise OSError(
                         "No remote IP%s addresses available for connect, can't use '%s' as source address"
                         % (ip_version, source_address[0]))
                 for res in ip_addrs:
    @@ -1216,14 +1210,14 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
                         sock.connect(sa)
                         err = None  # Explicitly break reference cycle
                         return sock
    -                except socket.error as _:
    +                except OSError as _:
                         err = _
                         if sock is not None:
                             sock.close()
                 if err is not None:
                     raise err
                 else:
    -                raise socket.error('getaddrinfo returns an empty list')
    +                raise OSError('getaddrinfo returns an empty list')
             if hasattr(hc, '_create_connection'):
                 hc._create_connection = _create_connection
             hc.source_address = (source_address, 0)
    @@ -1235,7 +1229,7 @@ def handle_youtubedl_headers(headers):
         filtered_headers = headers
     
         if 'Youtubedl-no-compression' in filtered_headers:
    -        filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
    +        filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
             del filtered_headers['Youtubedl-no-compression']
     
         return filtered_headers
    @@ -1327,14 +1321,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
                 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
                 try:
                     uncompressed = io.BytesIO(gz.read())
    -            except IOError as original_ioerror:
    +            except OSError as original_ioerror:
                     # There may be junk add the end of the file
                     # See http://stackoverflow.com/q/4928560/35070 for details
                     for i in range(1, 1024):
                         try:
                             gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
                             uncompressed = io.BytesIO(gz.read())
    -                    except IOError:
    +                    except OSError:
                             continue
                         break
                     else:
    @@ -1474,7 +1468,7 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
                 if cookie.expires is None:
                     cookie.expires = 0
     
    -        with io.open(filename, 'w', encoding='utf-8') as f:
    +        with open(filename, 'w', encoding='utf-8') as f:
                 f.write(self._HEADER)
                 now = time.time()
                 for cookie in self:
    @@ -1530,7 +1524,7 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
                 return line
     
             cf = io.StringIO()
    -        with io.open(filename, encoding='utf-8') as f:
    +        with open(filename, encoding='utf-8') as f:
                 for line in f:
                     try:
                         cf.write(prepare_line(line))
    @@ -1612,8 +1606,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
     
             CONTENT_HEADERS = ("content-length", "content-type")
             # NB: don't use dict comprehension for python 2.6 compatibility
    -        newheaders = dict((k, v) for k, v in req.headers.items()
    -                          if k.lower() not in CONTENT_HEADERS)
    +        newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
             return compat_urllib_request.Request(
                 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
                 unverifiable=True)
    @@ -1657,7 +1650,7 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
             timezone, date_str = extract_timezone(date_str)
     
         try:
    -        date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
    +        date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
             dt = datetime.datetime.strptime(date_str, date_format) - timezone
             return calendar.timegm(dt.timetuple())
         except ValueError:
    @@ -1839,7 +1832,7 @@ def hyphenate_date(date_str):
             return date_str
     
     
    -class DateRange(object):
    +class DateRange:
         """Represents a time interval between two dates"""
     
         def __init__(self, start=None, end=None):
    @@ -1867,7 +1860,7 @@ class DateRange(object):
             return self.start <= date <= self.end
     
         def __str__(self):
    -        return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
    +        return f'{self.start.isoformat()} - {self.end.isoformat()}'
     
     
     def platform_name():
    @@ -2012,7 +2005,7 @@ else:
                 raise LockingUnsupportedError()
     
     
    -class locked_file(object):
    +class locked_file:
         locked = False
     
         def __init__(self, filename, mode, block=True, encoding=None):
    @@ -2039,7 +2032,7 @@ class locked_file(object):
             try:
                 _lock_file(self.f, exclusive, self.block)
                 self.locked = True
    -        except IOError:
    +        except OSError:
                 self.f.close()
                 raise
             if 'w' in self.mode:
    @@ -2510,14 +2503,14 @@ def parse_duration(s):
     def prepend_extension(filename, ext, expected_real_ext=None):
         name, real_ext = os.path.splitext(filename)
         return (
    -        '{0}.{1}{2}'.format(name, ext, real_ext)
    +        f'{name}.{ext}{real_ext}'
             if not expected_real_ext or real_ext[1:] == expected_real_ext
    -        else '{0}.{1}'.format(filename, ext))
    +        else f'{filename}.{ext}')
     
     
     def replace_extension(filename, ext, expected_real_ext=None):
         name, real_ext = os.path.splitext(filename)
    -    return '{0}.{1}'.format(
    +    return '{}.{}'.format(
             name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
             ext)
     
    @@ -2700,6 +2693,7 @@ class PagedList:
     
     class OnDemandPagedList(PagedList):
         """Download pages until a page with less than maximum results"""
    +
         def _getslice(self, start, end):
             for pagenum in itertools.count(start // self._pagesize):
                 firstid = pagenum * self._pagesize
    @@ -2740,6 +2734,7 @@ class OnDemandPagedList(PagedList):
     
     class InAdvancePagedList(PagedList):
         """PagedList with total number of pages known in advance"""
    +
         def __init__(self, pagefunc, pagecount, pagesize):
             PagedList.__init__(self, pagefunc, pagesize, True)
             self._pagecount = pagecount
    @@ -2994,10 +2989,10 @@ def strip_jsonp(code):
     def js_to_json(code, vars={}):
         # vars is a dict of var, val pairs to substitute
         COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
    -    SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
    +    SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
         INTEGER_TABLE = (
    -        (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
    -        (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
    +        (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
    +        (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
         )
     
         def fix_kv(m):
    @@ -3518,7 +3513,7 @@ def dfxp2srt(dfxp_data):
         styles = {}
         default_style = {}
     
    -    class TTMLPElementParser(object):
    +    class TTMLPElementParser:
             _out = ''
             _unclosed_elements = []
             _applied_styles = []
    @@ -3703,7 +3698,7 @@ def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compa
         return cli_configuration_args(argdict, keys, default, use_compat)
     
     
    -class ISO639Utils(object):
    +class ISO639Utils:
         # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
         _lang_map = {
             'aa': 'aar',
    @@ -3908,7 +3903,7 @@ class ISO639Utils(object):
                     return short_name
     
     
    -class ISO3166Utils(object):
    +class ISO3166Utils:
         # From http://data.okfn.org/data/core/country-list
         _country_map = {
             'AF': 'Afghanistan',
    @@ -4168,7 +4163,7 @@ class ISO3166Utils(object):
             return cls._country_map.get(code.upper())
     
     
    -class GeoUtils(object):
    +class GeoUtils:
         # Major IPv4 address blocks per country
         _country_ip_map = {
             'AD': '46.172.224.0/19',
    @@ -4605,7 +4600,7 @@ def decode_png(png_data):
         header = png_data[8:]
     
         if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
    -        raise IOError('Not a valid PNG file.')
    +        raise OSError('Not a valid PNG file.')
     
         int_map = {1: '>B', 2: '>H', 4: '>I'}
         unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
    @@ -4642,7 +4637,7 @@ def decode_png(png_data):
                 idat += chunk['data']
     
         if not idat:
    -        raise IOError('Unable to read PNG data.')
    +        raise OSError('Unable to read PNG data.')
     
         decompressed_data = bytearray(zlib.decompress(idat))
     
    @@ -4730,7 +4725,7 @@ def write_xattr(path, key, value):
     
             try:
                 setxattr(path, key, value)
    -        except EnvironmentError as e:
    +        except OSError as e:
                 raise XAttrMetadataError(e.errno, e.strerror)
     
         except ImportError:
    @@ -4744,7 +4739,7 @@ def write_xattr(path, key, value):
                 try:
                     with open(ads_fn, 'wb') as f:
                         f.write(value)
    -            except EnvironmentError as e:
    +            except OSError as e:
                     raise XAttrMetadataError(e.errno, e.strerror)
             else:
                 user_has_setfattr = check_executable('setfattr', ['--version'])
    @@ -4767,7 +4762,7 @@ def write_xattr(path, key, value):
                     try:
                         p = Popen(
                             cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
    -                except EnvironmentError as e:
    +                except OSError as e:
                         raise XAttrMetadataError(e.errno, e.strerror)
                     stdout, stderr = p.communicate_or_kill()
                     stderr = stderr.decode('utf-8', 'replace')
    @@ -4923,7 +4918,7 @@ def make_dir(path, to_screen=None):
             if dn and not os.path.exists(dn):
                 os.makedirs(dn)
             return True
    -    except (OSError, IOError) as err:
    +    except OSError as err:
             if callable(to_screen) is not None:
                 to_screen('unable to create directory ' + error_to_compat_str(err))
             return False
    @@ -5155,7 +5150,7 @@ def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
         """
         _keys = ('width', 'height')
         max_dimensions = max(
    -        [tuple(format.get(k) or 0 for k in _keys) for format in formats],
    +        (tuple(format.get(k) or 0 for k in _keys) for format in formats),
             default=(0, 0))
         if not max_dimensions[0]:
             return thumbnails
    @@ -5220,7 +5215,7 @@ class Config:
         def read_file(filename, default=[]):
             try:
                 optionf = open(filename)
    -        except IOError:
    +        except OSError:
                 return default  # silently skip if file is not present
             try:
                 # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
    @@ -5232,7 +5227,7 @@ class Config:
     
         @staticmethod
         def hide_login_info(opts):
    -        PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
    +        PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
             eqre = re.compile('^(?P' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
     
             def _scrub_eq(o):
    diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py
    index c78078f17..e0d7f6743 100644
    --- a/yt_dlp/webvtt.py
    +++ b/yt_dlp/webvtt.py
    @@ -1,6 +1,3 @@
    -# coding: utf-8
    -from __future__ import unicode_literals, print_function, division
    -
     """
     A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
     to be able to assemble a single stand-alone subtitle file, suitably adjusting
    @@ -20,7 +17,7 @@ from .compat import (
     )
     
     
    -class _MatchParser(object):
    +class _MatchParser:
         """
         An object that maintains the current parsing position and allows
         conveniently advancing it as syntax elements are successfully parsed.
    @@ -69,7 +66,7 @@ class _MatchChildParser(_MatchParser):
         """
     
         def __init__(self, parent):
    -        super(_MatchChildParser, self).__init__(parent._data)
    +        super().__init__(parent._data)
             self.__parent = parent
             self._pos = parent._pos
     
    @@ -83,7 +80,7 @@ class _MatchChildParser(_MatchParser):
     
     class ParseError(Exception):
         def __init__(self, parser):
    -        super(ParseError, self).__init__("Parse error at position %u (near %r)" % (
    +        super().__init__("Parse error at position %u (near %r)" % (
                 parser._pos, parser._data[parser._pos:parser._pos + 20]
             ))
     
    @@ -126,7 +123,7 @@ def _format_ts(ts):
         return '%02u:%02u:%02u.%03u' % timetuple_from_msec(int((ts + 45) // 90))
     
     
    -class Block(object):
    +class Block:
         """
         An abstract WebVTT block.
         """
    diff --git a/ytdlp_plugins/extractor/sample.py b/ytdlp_plugins/extractor/sample.py
    index d99b7ca33..82c0af459 100644
    --- a/ytdlp_plugins/extractor/sample.py
    +++ b/ytdlp_plugins/extractor/sample.py
    @@ -1,5 +1,3 @@
    -# coding: utf-8
    -
     # ⚠ Don't use relative imports
     from yt_dlp.extractor.common import InfoExtractor
     
    diff --git a/ytdlp_plugins/postprocessor/sample.py b/ytdlp_plugins/postprocessor/sample.py
    index 6ba49266e..4563e1c11 100644
    --- a/ytdlp_plugins/postprocessor/sample.py
    +++ b/ytdlp_plugins/postprocessor/sample.py
    @@ -1,5 +1,3 @@
    -# coding: utf-8
    -
     # ⚠ Don't use relative imports
     from yt_dlp.postprocessor.common import PostProcessor
     
    
    From f82711587cee043cb2496fe180b5cc0e07c06eda Mon Sep 17 00:00:00 2001
    From: pukkandan 
    Date: Tue, 12 Apr 2022 04:02:57 +0530
    Subject: [PATCH 0943/2552] [cleanup] Sort imports
    
    Using https://github.com/PyCQA/isort
    
        isort -m VERTICAL_HANGING_INDENT --py 36 -l 80 --rr -n --tc .
    ---
     .gitignore                                    |   1 +
     devscripts/bash-completion.py                 |   2 +-
     devscripts/check-porn.py                      |   5 +-
     devscripts/fish-completion.py                 |   2 +-
     devscripts/generate_aes_testdata.py           |   6 +-
     devscripts/make_lazy_extractors.py            |   4 +-
     devscripts/make_readme.py                     |   2 +-
     devscripts/make_supportedsites.py             |   1 -
     devscripts/update-formulae.py                 |   1 -
     devscripts/update-version.py                  |   5 +-
     devscripts/zsh-completion.py                  |   2 +-
     pyinst.py                                     |  12 +-
     setup.py                                      |   6 +-
     test/helper.py                                |  13 +-
     test/test_InfoExtractor.py                    |  14 +-
     test/test_YoutubeDL.py                        |  17 +-
     test/test_YoutubeDLCookieJar.py               |   1 +
     test/test_aes.py                              |  16 +-
     test/test_age_restriction.py                  |   3 +-
     test/test_all_urls.py                         |   9 +-
     test/test_cache.py                            |   5 +-
     test/test_compat.py                           |   5 +-
     test/test_cookies.py                          |   4 +-
     test/test_download.py                         |  15 +-
     test/test_downloader_http.py                  |   4 +-
     test/test_execution.py                        |   6 +-
     test/test_http.py                             |   6 +-
     test/test_iqiyi_sdk_interpreter.py            |   2 +
     test/test_jsinterp.py                         |   1 +
     test/test_netrc.py                            |   1 +
     test/test_overwrites.py                       |   4 +-
     test/test_post_hooks.py                       |   4 +-
     test/test_postprocessors.py                   |   2 +-
     test/test_socks.py                            |  12 +-
     test/test_subtitles.py                        |  28 +--
     test/test_update.py.disabled                  |   2 +
     test/test_utils.py                            |  95 ++++----
     test/test_verbose_output.py                   |   6 +-
     test/test_write_annotations.py.disabled       |   8 +-
     test/test_youtube_lists.py                    |   6 +-
     test/test_youtube_misc.py                     |   1 +
     test/test_youtube_signature.py                |   5 +-
     yt_dlp/YoutubeDL.py                           | 124 +++++-----
     yt_dlp/__init__.py                            |  42 ++--
     yt_dlp/aes.py                                 |  12 +-
     yt_dlp/cache.py                               |   5 +-
     yt_dlp/cookies.py                             |  12 +-
     yt_dlp/downloader/__init__.py                 |  14 +-
     yt_dlp/downloader/common.py                   |  18 +-
     yt_dlp/downloader/dash.py                     |   3 +-
     yt_dlp/downloader/external.py                 |  17 +-
     yt_dlp/downloader/f4m.py                      |  11 +-
     yt_dlp/downloader/fragment.py                 |   8 +-
     yt_dlp/downloader/hls.py                      |  20 +-
     yt_dlp/downloader/http.py                     |  13 +-
     yt_dlp/downloader/ism.py                      |   7 +-
     yt_dlp/downloader/mhtml.py                    |   7 +-
     yt_dlp/downloader/rtmp.py                     |   4 +-
     yt_dlp/downloader/rtsp.py                     |   5 +-
     yt_dlp/downloader/websocket.py                |   2 +-
     yt_dlp/downloader/youtube_live_chat.py        |   7 +-
     yt_dlp/extractor/abematv.py                   |  28 +--
     yt_dlp/extractor/common.py                    |  23 +-
     yt_dlp/extractor/commonprotocols.py           |   4 +-
     yt_dlp/extractor/generic.py                   | 215 +++++++++---------
     yt_dlp/extractor/mtv.py                       |   4 +-
     yt_dlp/extractor/noz.py                       |   4 +-
     yt_dlp/extractor/openload.py                  |   8 +-
     yt_dlp/extractor/youtube.py                   |   7 +-
     yt_dlp/jsinterp.py                            |   7 +-
     yt_dlp/minicurses.py                          |   2 +-
     yt_dlp/options.py                             |  29 +--
     yt_dlp/postprocessor/__init__.py              |  15 +-
     yt_dlp/postprocessor/common.py                |   2 +-
     yt_dlp/postprocessor/embedthumbnail.py        |  13 +-
     yt_dlp/postprocessor/exec.py                  |   6 +-
     yt_dlp/postprocessor/ffmpeg.py                |  16 +-
     yt_dlp/postprocessor/modify_chapters.py       |  12 +-
     .../postprocessor/movefilesafterdownload.py   |   2 +-
     yt_dlp/postprocessor/sponskrub.py             |   6 +-
     yt_dlp/postprocessor/sponsorblock.py          |   2 +-
     yt_dlp/postprocessor/xattrpp.py               |   4 +-
     yt_dlp/socks.py                               |   6 +-
     yt_dlp/update.py                              |   3 +-
     yt_dlp/utils.py                               |  22 +-
     yt_dlp/webvtt.py                              |   8 +-
     86 files changed, 504 insertions(+), 619 deletions(-)
    
    diff --git a/.gitignore b/.gitignore
    index c815538e8..92f9029e3 100644
    --- a/.gitignore
    +++ b/.gitignore
    @@ -82,6 +82,7 @@ updates_key.pem
     *.egg-info
     .tox
     *.class
    +*.isorted
     
     # Generated
     AUTHORS
    diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
    index 23a9a5781..73d698c39 100755
    --- a/devscripts/bash-completion.py
    +++ b/devscripts/bash-completion.py
    @@ -1,7 +1,7 @@
     #!/usr/bin/env python3
     import os
    -from os.path import dirname as dirn
     import sys
    +from os.path import dirname as dirn
     
     sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
     import yt_dlp
    diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
    index 6188f68ec..08f663e4b 100644
    --- a/devscripts/check-porn.py
    +++ b/devscripts/check-porn.py
    @@ -10,11 +10,12 @@ pass the list filename as the only argument
     # Allow direct execution
     import os
     import sys
    +
     sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
     
     from test.helper import gettestcases
    -from yt_dlp.utils import compat_urllib_parse_urlparse
    -from yt_dlp.utils import compat_urllib_request
    +
    +from yt_dlp.utils import compat_urllib_parse_urlparse, compat_urllib_request
     
     if len(sys.argv) > 1:
         METHOD = 'LIST'
    diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
    index d958a5d6b..c318b69e4 100755
    --- a/devscripts/fish-completion.py
    +++ b/devscripts/fish-completion.py
    @@ -1,8 +1,8 @@
     #!/usr/bin/env python3
     import optparse
     import os
    -from os.path import dirname as dirn
     import sys
    +from os.path import dirname as dirn
     
     sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
     import yt_dlp
    diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
    index 308c74a20..c7d83f1a7 100644
    --- a/devscripts/generate_aes_testdata.py
    +++ b/devscripts/generate_aes_testdata.py
    @@ -1,13 +1,13 @@
     #!/usr/bin/env python3
     import codecs
    -import subprocess
    -
     import os
    +import subprocess
     import sys
    +
     sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
     
    -from yt_dlp.utils import intlist_to_bytes
     from yt_dlp.aes import aes_encrypt, key_expansion
    +from yt_dlp.utils import intlist_to_bytes
     
     secret_msg = b'Secret message goes here'
     
    diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
    index 24e8cfa5b..6d5f96cf0 100644
    --- a/devscripts/make_lazy_extractors.py
    +++ b/devscripts/make_lazy_extractors.py
    @@ -1,8 +1,8 @@
     #!/usr/bin/env python3
    -from inspect import getsource
     import os
    -from os.path import dirname as dirn
     import sys
    +from inspect import getsource
    +from os.path import dirname as dirn
     
     sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
     
    diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
    index 5d85bcc63..1719ac8e4 100755
    --- a/devscripts/make_readme.py
    +++ b/devscripts/make_readme.py
    @@ -2,8 +2,8 @@
     
     # yt-dlp --help | make_readme.py
     # This must be run in a console of correct width
    -import sys
     import re
    +import sys
     
     README_FILE = 'README.md'
     helptext = sys.stdin.read()
    diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
    index 26d25704e..0a0d08f56 100644
    --- a/devscripts/make_supportedsites.py
    +++ b/devscripts/make_supportedsites.py
    @@ -3,7 +3,6 @@ import optparse
     import os
     import sys
     
    -
     # Import yt_dlp
     ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
     sys.path.insert(0, ROOT_DIR)
    diff --git a/devscripts/update-formulae.py b/devscripts/update-formulae.py
    index 3a0bef52e..6424f5d9b 100644
    --- a/devscripts/update-formulae.py
    +++ b/devscripts/update-formulae.py
    @@ -8,7 +8,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
     
     from yt_dlp.compat import compat_urllib_request
     
    -
     # usage: python3 ./devscripts/update-formulae.py  
     # version can be either 0-aligned (yt-dlp version) or normalized (PyPl version)
     
    diff --git a/devscripts/update-version.py b/devscripts/update-version.py
    index 233cdaa76..991cfb2af 100644
    --- a/devscripts/update-version.py
    +++ b/devscripts/update-version.py
    @@ -1,8 +1,7 @@
     #!/usr/bin/env python3
    -from datetime import datetime
    -import sys
     import subprocess
    -
    +import sys
    +from datetime import datetime
     
     with open('yt_dlp/version.py') as f:
         exec(compile(f.read(), 'yt_dlp/version.py', 'exec'))
    diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
    index 677fe7373..2d5ac2a45 100755
    --- a/devscripts/zsh-completion.py
    +++ b/devscripts/zsh-completion.py
    @@ -1,7 +1,7 @@
     #!/usr/bin/env python3
     import os
    -from os.path import dirname as dirn
     import sys
    +from os.path import dirname as dirn
     
     sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
     import yt_dlp
    diff --git a/pyinst.py b/pyinst.py
    index 1f72bd4be..9e8128e09 100644
    --- a/pyinst.py
    +++ b/pyinst.py
    @@ -2,14 +2,20 @@
     import os
     import platform
     import sys
    -from PyInstaller.utils.hooks import collect_submodules
     
    +from PyInstaller.utils.hooks import collect_submodules
     
     OS_NAME = platform.system()
     if OS_NAME == 'Windows':
         from PyInstaller.utils.win32.versioninfo import (
    -        VarStruct, VarFileInfo, StringStruct, StringTable,
    -        StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion,
    +        FixedFileInfo,
    +        SetVersion,
    +        StringFileInfo,
    +        StringStruct,
    +        StringTable,
    +        VarFileInfo,
    +        VarStruct,
    +        VSVersionInfo,
         )
     elif OS_NAME == 'Darwin':
         pass
    diff --git a/setup.py b/setup.py
    index 9eab7f1d7..45f4d6b49 100644
    --- a/setup.py
    +++ b/setup.py
    @@ -1,13 +1,13 @@
     #!/usr/bin/env python3
     import os.path
    -import warnings
     import sys
    +import warnings
     
     try:
    -    from setuptools import setup, Command, find_packages
    +    from setuptools import Command, find_packages, setup
         setuptools_available = True
     except ImportError:
    -    from distutils.core import setup, Command
    +    from distutils.core import Command, setup
         setuptools_available = False
     from distutils.spawn import spawn
     
    diff --git a/test/helper.py b/test/helper.py
    index d940e327c..81e53ed74 100644
    --- a/test/helper.py
    +++ b/test/helper.py
    @@ -3,21 +3,14 @@ import hashlib
     import json
     import os.path
     import re
    -import types
     import ssl
     import sys
    +import types
     
     import yt_dlp.extractor
     from yt_dlp import YoutubeDL
    -from yt_dlp.compat import (
    -    compat_os_name,
    -    compat_str,
    -)
    -from yt_dlp.utils import (
    -    preferredencoding,
    -    write_string,
    -)
    -
    +from yt_dlp.compat import compat_os_name, compat_str
    +from yt_dlp.utils import preferredencoding, write_string
     
     if 'pytest' in sys.modules:
         import pytest
    diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
    index 4fd21bed4..173b62920 100644
    --- a/test/test_InfoExtractor.py
    +++ b/test/test_InfoExtractor.py
    @@ -3,15 +3,21 @@
     import os
     import sys
     import unittest
    +
     sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
     
    +import threading
     from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
    +
     from yt_dlp.compat import compat_etree_fromstring, compat_http_server
    -from yt_dlp.extractor.common import InfoExtractor
     from yt_dlp.extractor import YoutubeIE, get_info_extractor
    -from yt_dlp.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
    -import threading
    -
    +from yt_dlp.extractor.common import InfoExtractor
    +from yt_dlp.utils import (
    +    ExtractorError,
    +    RegexNotFoundError,
    +    encode_data_uri,
    +    strip_jsonp,
    +)
     
     TEAPOT_RESPONSE_STATUS = 418
     TEAPOT_RESPONSE_BODY = "

    418 I'm a teapot

    " diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 480c7539c..051a203ac 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -3,18 +3,29 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import copy import json - from test.helper import FakeYDL, assertRegexpMatches + from yt_dlp import YoutubeDL -from yt_dlp.compat import compat_os_name, compat_setenv, compat_str, compat_urllib_error +from yt_dlp.compat import ( + compat_os_name, + compat_setenv, + compat_str, + compat_urllib_error, +) from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor.common import InfoExtractor from yt_dlp.postprocessor.common import PostProcessor -from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func, LazyList +from yt_dlp.utils import ( + ExtractorError, + LazyList, + int_or_none, + match_filter_func, +) TEST_URL = 'http://localhost/sample.mp4' diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index 1e5bedcae..13a4569b2 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -4,6 +4,7 @@ import re import sys import tempfile import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.utils import YoutubeDLCookieJar diff --git a/test/test_aes.py b/test/test_aes.py index 34584a04f..1c1238c8b 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -3,26 +3,28 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import base64 + from yt_dlp.aes import ( - aes_decrypt, - aes_encrypt, - aes_ecb_encrypt, - aes_ecb_decrypt, + BLOCK_SIZE_BYTES, aes_cbc_decrypt, aes_cbc_decrypt_bytes, aes_cbc_encrypt, aes_ctr_decrypt, aes_ctr_encrypt, + aes_decrypt, + aes_decrypt_text, + aes_ecb_decrypt, + aes_ecb_encrypt, + aes_encrypt, aes_gcm_decrypt_and_verify, aes_gcm_decrypt_and_verify_bytes, - aes_decrypt_text, - BLOCK_SIZE_BYTES, ) from yt_dlp.compat import compat_pycrypto_AES from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes -import base64 # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py index 50d16a729..e1012f69b 100644 --- a/test/test_age_restriction.py +++ b/test/test_age_restriction.py @@ -3,9 +3,10 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import try_rm, is_download_test +from test.helper import is_download_test, try_rm from yt_dlp import YoutubeDL diff --git a/test/test_all_urls.py b/test/test_all_urls.py index d70da8cae..b6019554e 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -1,19 +1,16 @@ #!/usr/bin/env python3 # Allow direct execution +import collections import os import sys import unittest -import collections + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import gettestcases -from yt_dlp.extractor import ( - FacebookIE, - gen_extractors, - YoutubeIE, -) +from yt_dlp.extractor import FacebookIE, YoutubeIE, gen_extractors class TestAllURLsMatching(unittest.TestCase): diff --git a/test/test_cache.py b/test/test_cache.py index 4e4641eba..14e54ba20 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -1,14 +1,15 @@ #!/usr/bin/env python3 -import shutil - # Allow direct execution import os +import shutil import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL + from yt_dlp.cache import Cache diff --git a/test/test_compat.py b/test/test_compat.py index 31524c5ab..20dab9573 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -3,14 +3,15 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.compat import ( - compat_getenv, - compat_setenv, compat_etree_fromstring, compat_expanduser, + compat_getenv, + compat_setenv, compat_str, compat_struct_unpack, compat_urllib_parse_unquote, diff --git a/test/test_cookies.py b/test/test_cookies.py index 842ebcb99..5bfaec367 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -6,10 +6,10 @@ from yt_dlp.cookies import ( LinuxChromeCookieDecryptor, MacChromeCookieDecryptor, WindowsChromeCookieDecryptor, - parse_safari_cookies, - pbkdf2_sha1, _get_linux_desktop_environment, _LinuxDesktopEnvironment, + parse_safari_cookies, + pbkdf2_sha1, ) diff --git a/test/test_download.py b/test/test_download.py index 3c6b55d98..9a83bee2f 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -1,8 +1,12 @@ #!/usr/bin/env python3 # Allow direct execution +import hashlib +import json import os +import socket import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import ( @@ -16,24 +20,19 @@ from test.helper import ( try_rm, ) - -import hashlib -import json -import socket - import yt_dlp.YoutubeDL from yt_dlp.compat import ( compat_http_client, - compat_urllib_error, compat_HTTPError, + compat_urllib_error, ) +from yt_dlp.extractor import get_info_extractor from yt_dlp.utils import ( DownloadError, ExtractorError, - format_bytes, UnavailableVideoError, + format_bytes, ) -from yt_dlp.extractor import get_info_extractor RETRIES = 3 diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index c511909c7..c33308064 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -4,14 +4,16 @@ import os import re import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import threading from test.helper import http_server_port, try_rm + from yt_dlp import YoutubeDL from yt_dlp.compat import compat_http_server from yt_dlp.downloader.http import HttpFD from yt_dlp.utils import encodeFilename -import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/test/test_execution.py b/test/test_execution.py index 623f08165..6a3e9944b 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 -import unittest - -import sys import os import subprocess +import sys +import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.utils import encodeArgument diff --git a/test/test_http.py b/test/test_http.py index 2106220eb..029996ca9 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -3,13 +3,15 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import ssl +import threading from test.helper import http_server_port + from yt_dlp import YoutubeDL from yt_dlp.compat import compat_http_server, compat_urllib_request -import ssl -import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py index 57a7ed3a8..4b82b7187 100644 --- a/test/test_iqiyi_sdk_interpreter.py +++ b/test/test_iqiyi_sdk_interpreter.py @@ -3,9 +3,11 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test + from yt_dlp.extractor import IqiyiIE diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 10a465cf9..872c58c8f 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -3,6 +3,7 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.jsinterp import JSInterpreter diff --git a/test/test_netrc.py b/test/test_netrc.py index adc3a0ed1..f7a0b33d2 100644 --- a/test/test_netrc.py +++ b/test/test_netrc.py @@ -1,6 +1,7 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) diff --git a/test/test_overwrites.py b/test/test_overwrites.py index 8e0548db5..39741b65c 100644 --- a/test/test_overwrites.py +++ b/test/test_overwrites.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 import os -from os.path import join import subprocess import sys import unittest +from os.path import join + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import is_download_test, try_rm - root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) download_file = join(root_dir, 'test.webm') diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py index 020203f2f..e84a08f29 100644 --- a/test/test_post_hooks.py +++ b/test/test_post_hooks.py @@ -2,9 +2,11 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import get_params, try_rm, is_download_test +from test.helper import get_params, is_download_test, try_rm + import yt_dlp.YoutubeDL from yt_dlp.utils import DownloadError diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index e5893f7d2..9d8a4dcc5 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -13,7 +13,7 @@ from yt_dlp.postprocessor import ( FFmpegThumbnailsConvertorPP, MetadataFromFieldPP, MetadataParserPP, - ModifyChaptersPP + ModifyChaptersPP, ) diff --git a/test/test_socks.py b/test/test_socks.py index 02723b469..546f0d73d 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -3,20 +3,14 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import random import subprocess +from test.helper import FakeYDL, get_params, is_download_test -from test.helper import ( - FakeYDL, - get_params, - is_download_test, -) -from yt_dlp.compat import ( - compat_str, - compat_urllib_request, -) +from yt_dlp.compat import compat_str, compat_urllib_request @is_download_test diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 0be1842da..362b67cef 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -3,29 +3,29 @@ import os import sys import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, md5, is_download_test +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import FakeYDL, is_download_test, md5 from yt_dlp.extractor import ( - YoutubeIE, - DailymotionIE, - TedTalkIE, - VimeoIE, - WallaIE, - CeskaTelevizeIE, - LyndaIE, NPOIE, + NRKTVIE, PBSIE, + CeskaTelevizeIE, ComedyCentralIE, - NRKTVIE, + DailymotionIE, + DemocracynowIE, + LyndaIE, RaiPlayIE, - VikiIE, - ThePlatformIE, - ThePlatformFeedIE, RTVEALaCartaIE, - DemocracynowIE, + TedTalkIE, + ThePlatformFeedIE, + ThePlatformIE, + VikiIE, + VimeoIE, + WallaIE, + YoutubeIE, ) diff --git a/test/test_update.py.disabled b/test/test_update.py.disabled index 5f0794ae2..389b8ffe5 100644 --- a/test/test_update.py.disabled +++ b/test/test_update.py.disabled @@ -3,10 +3,12 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import json + from yt_dlp.update import rsa_verify diff --git a/test/test_utils.py b/test/test_utils.py index e0c862807..7909dc61c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -3,6 +3,7 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -12,75 +13,95 @@ import itertools import json import xml.etree.ElementTree +from yt_dlp.compat import ( + compat_chr, + compat_etree_fromstring, + compat_getenv, + compat_HTMLParseError, + compat_os_name, + compat_setenv, +) from yt_dlp.utils import ( + Config, + DateRange, + ExtractorError, + InAdvancePagedList, + LazyList, + OnDemandPagedList, age_restricted, args_to_str, - encode_base_n, + base_url, caesar, clean_html, clean_podcast_url, - Config, + cli_bool_option, + cli_option, + cli_valueless_option, date_from_str, datetime_from_str, - DateRange, detect_exe_version, determine_ext, + dfxp2srt, dict_get, + encode_base_n, encode_compat_str, encodeFilename, escape_rfc3986, escape_url, + expand_path, extract_attributes, - ExtractorError, find_xpath_attr, fix_xml_ampersands, - format_bytes, float_or_none, - get_element_by_class, + format_bytes, get_element_by_attribute, - get_elements_by_class, - get_elements_by_attribute, - get_element_html_by_class, + get_element_by_class, get_element_html_by_attribute, - get_elements_html_by_class, + get_element_html_by_class, + get_element_text_and_html_by_tag, + get_elements_by_attribute, + get_elements_by_class, get_elements_html_by_attribute, + get_elements_html_by_class, get_elements_text_and_html_by_attribute, - get_element_text_and_html_by_tag, - InAdvancePagedList, int_or_none, intlist_to_bytes, + iri_to_uri, is_html, js_to_json, limit_length, locked_file, + lowercase_escape, + match_str, merge_dicts, mimetype2ext, month_by_name, multipart_encode, ohdave_rsa_encrypt, - OnDemandPagedList, orderedSet, parse_age_limit, + parse_bitrate, + parse_codecs, + parse_count, + parse_dfxp_time_expr, parse_duration, parse_filesize, - parse_count, parse_iso8601, - parse_resolution, - parse_bitrate, parse_qs, + parse_resolution, pkcs1pad, + prepend_extension, read_batch_urls, + remove_end, + remove_quotes, + remove_start, + render_table, + replace_extension, + rot47, sanitize_filename, sanitize_path, sanitize_url, sanitized_Request, - expand_path, - prepend_extension, - replace_extension, - remove_start, - remove_end, - remove_quotes, - rot47, shell_quote, smuggle_url, str_to_int, @@ -92,38 +113,18 @@ from yt_dlp.utils import ( unified_strdate, unified_timestamp, unsmuggle_url, + update_url_query, uppercase_escape, - lowercase_escape, url_basename, url_or_none, - base_url, - urljoin, urlencode_postdata, + urljoin, urshift, - update_url_query, version_tuple, - xpath_with_ns, + xpath_attr, xpath_element, xpath_text, - xpath_attr, - render_table, - match_str, - parse_dfxp_time_expr, - dfxp2srt, - cli_option, - cli_valueless_option, - cli_bool_option, - parse_codecs, - iri_to_uri, - LazyList, -) -from yt_dlp.compat import ( - compat_chr, - compat_etree_fromstring, - compat_getenv, - compat_HTMLParseError, - compat_os_name, - compat_setenv, + xpath_with_ns, ) diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py index 17aeafbc0..1213a9726 100644 --- a/test/test_verbose_output.py +++ b/test/test_verbose_output.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 -import unittest - -import sys import os import subprocess +import sys +import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) diff --git a/test/test_write_annotations.py.disabled b/test/test_write_annotations.py.disabled index 4173fd09d..bf13efe2c 100644 --- a/test/test_write_annotations.py.disabled +++ b/test/test_write_annotations.py.disabled @@ -3,17 +3,15 @@ import os import sys import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from test.helper import get_params, try_rm, is_download_test +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import io - import xml.etree.ElementTree +from test.helper import get_params, is_download_test, try_rm -import yt_dlp.YoutubeDL import yt_dlp.extractor +import yt_dlp.YoutubeDL class YoutubeDL(yt_dlp.YoutubeDL): diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 8691abb67..66611e236 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -3,14 +3,12 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test -from yt_dlp.extractor import ( - YoutubeIE, - YoutubeTabIE, -) +from yt_dlp.extractor import YoutubeIE, YoutubeTabIE @is_download_test diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py index 70d6d9949..36f8be689 100644 --- a/test/test_youtube_misc.py +++ b/test/test_youtube_misc.py @@ -3,6 +3,7 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index d751d5396..ca23c910d 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -3,16 +3,17 @@ import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import re import string import urllib.request - from test.helper import FakeYDL, is_download_test + +from yt_dlp.compat import compat_str from yt_dlp.extractor import YoutubeIE from yt_dlp.jsinterp import JSInterpreter -from yt_dlp.compat import compat_str _SIG_TESTS = [ ( diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 56f0346dc..a8bb7f45c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -12,6 +12,7 @@ import locale import operator import os import platform +import random import re import shutil import subprocess @@ -20,13 +21,12 @@ import tempfile import time import tokenize import traceback -import random import unicodedata import urllib.request - from enum import Enum from string import ascii_letters +from .cache import Cache from .compat import ( compat_brotli, compat_get_terminal_size, @@ -39,74 +39,100 @@ from .compat import ( windows_enable_vt_mode, ) from .cookies import load_cookies +from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name +from .downloader.rtmp import rtmpdump_version +from .extractor import _LAZY_LOADER +from .extractor import _PLUGIN_CLASSES as plugin_extractors +from .extractor import gen_extractor_classes, get_info_extractor +from .extractor.openload import PhantomJSwrapper +from .minicurses import format_text +from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors +from .postprocessor import ( + EmbedThumbnailPP, + FFmpegFixupDuplicateMoovPP, + FFmpegFixupDurationPP, + FFmpegFixupM3u8PP, + FFmpegFixupM4aPP, + FFmpegFixupStretchedPP, + FFmpegFixupTimestampPP, + FFmpegMergerPP, + FFmpegPostProcessor, + MoveFilesAfterDownloadPP, + get_postprocessor, +) +from .update import detect_variant from .utils import ( + DEFAULT_OUTTMPL, + LINK_TEMPLATES, + NO_DEFAULT, + OUTTMPL_TYPES, + POSTPROCESS_WHEN, + STR_FORMAT_RE_TMPL, + STR_FORMAT_TYPES, + ContentTooShortError, + DateRange, + DownloadCancelled, + DownloadError, + EntryNotInPlaylist, + ExistingVideoReached, + ExtractorError, + GeoRestrictedError, + HEADRequest, + InAdvancePagedList, + ISO3166Utils, + LazyList, + MaxDownloadsReached, + PagedList, + PerRequestProxyHandler, + Popen, + PostProcessingError, + ReExtractInfo, + RejectedVideoReached, + SameFileError, + UnavailableVideoError, + YoutubeDLCookieProcessor, + YoutubeDLHandler, + YoutubeDLRedirectHandler, age_restricted, args_to_str, - ContentTooShortError, date_from_str, - DateRange, - DEFAULT_OUTTMPL, determine_ext, determine_protocol, - DownloadCancelled, - DownloadError, encode_compat_str, encodeFilename, - EntryNotInPlaylist, error_to_compat_str, - ExistingVideoReached, expand_path, - ExtractorError, filter_dict, float_or_none, format_bytes, - format_field, format_decimal_suffix, + format_field, formatSeconds, - GeoRestrictedError, get_domain, has_certifi, - HEADRequest, - InAdvancePagedList, int_or_none, iri_to_uri, - ISO3166Utils, join_nonempty, - LazyList, - LINK_TEMPLATES, locked_file, make_dir, make_HTTPS_handler, - MaxDownloadsReached, merge_headers, network_exceptions, - NO_DEFAULT, number_of_digits, orderedSet, - OUTTMPL_TYPES, - PagedList, parse_filesize, - PerRequestProxyHandler, platform_name, - Popen, - POSTPROCESS_WHEN, - PostProcessingError, preferredencoding, prepend_extension, - ReExtractInfo, register_socks_protocols, - RejectedVideoReached, remove_terminal_sequences, render_table, replace_extension, - SameFileError, sanitize_filename, sanitize_path, sanitize_url, sanitized_Request, std_headers, - STR_FORMAT_RE_TMPL, - STR_FORMAT_TYPES, str_or_none, strftime_or_none, subtitles_filename, @@ -115,47 +141,13 @@ from .utils import ( to_high_limit_path, traverse_obj, try_get, - UnavailableVideoError, url_basename, variadic, version_tuple, write_json_file, write_string, - YoutubeDLCookieProcessor, - YoutubeDLHandler, - YoutubeDLRedirectHandler, -) -from .cache import Cache -from .minicurses import format_text -from .extractor import ( - gen_extractor_classes, - get_info_extractor, - _LAZY_LOADER, - _PLUGIN_CLASSES as plugin_extractors -) -from .extractor.openload import PhantomJSwrapper -from .downloader import ( - FFmpegFD, - get_suitable_downloader, - shorten_protocol_name -) -from .downloader.rtmp import rtmpdump_version -from .postprocessor import ( - get_postprocessor, - EmbedThumbnailPP, - FFmpegFixupDuplicateMoovPP, - FFmpegFixupDurationPP, - FFmpegFixupM3u8PP, - FFmpegFixupM4aPP, - FFmpegFixupStretchedPP, - FFmpegFixupTimestampPP, - FFmpegMergerPP, - FFmpegPostProcessor, - MoveFilesAfterDownloadPP, - _PLUGIN_CLASSES as plugin_postprocessors ) -from .update import detect_variant -from .version import __version__, RELEASE_GIT_HEAD +from .version import RELEASE_GIT_HEAD, __version__ if compat_os_name == 'nt': import ctypes @@ -3666,9 +3658,9 @@ class YoutubeDL: ) or 'none' write_debug('exe versions: %s' % exe_str) + from .cookies import SECRETSTORAGE_AVAILABLE, SQLITE_AVAILABLE from .downloader.websocket import has_websockets from .postprocessor.embedthumbnail import has_mutagen - from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE lib_str = join_nonempty( compat_brotli and compat_brotli.__name__, diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 91bf5c4ce..f339e4cd1 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -9,48 +9,44 @@ import random import re import sys +from .compat import compat_getpass, compat_os_name, compat_shlex_quote +from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS +from .downloader import FileDownloader +from .extractor import gen_extractors, list_extractors +from .extractor.adobepass import MSO_INFO +from .extractor.common import InfoExtractor from .options import parseOpts -from .compat import ( - compat_getpass, - compat_os_name, - compat_shlex_quote, +from .postprocessor import ( + FFmpegExtractAudioPP, + FFmpegSubtitlesConvertorPP, + FFmpegThumbnailsConvertorPP, + FFmpegVideoConvertorPP, + FFmpegVideoRemuxerPP, + MetadataFromFieldPP, + MetadataParserPP, ) -from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS +from .update import run_update from .utils import ( + NO_DEFAULT, DateRange, - decodeOption, DownloadCancelled, DownloadError, + GeoUtils, + SameFileError, + decodeOption, expand_path, float_or_none, - GeoUtils, int_or_none, match_filter_func, - NO_DEFAULT, parse_duration, preferredencoding, read_batch_urls, render_table, - SameFileError, setproctitle, std_headers, traverse_obj, write_string, ) -from .update import run_update -from .downloader import FileDownloader -from .extractor import gen_extractors, list_extractors -from .extractor.common import InfoExtractor -from .extractor.adobepass import MSO_INFO -from .postprocessor import ( - FFmpegExtractAudioPP, - FFmpegSubtitlesConvertorPP, - FFmpegThumbnailsConvertorPP, - FFmpegVideoConvertorPP, - FFmpegVideoRemuxerPP, - MetadataFromFieldPP, - MetadataParserPP, -) from .YoutubeDL import YoutubeDL diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index e5d73f740..01818df61 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -1,15 +1,7 @@ from math import ceil -from .compat import ( - compat_b64decode, - compat_ord, - compat_pycrypto_AES, -) -from .utils import ( - bytes_to_intlist, - intlist_to_bytes, -) - +from .compat import compat_b64decode, compat_ord, compat_pycrypto_AES +from .utils import bytes_to_intlist, intlist_to_bytes if compat_pycrypto_AES: def aes_cbc_decrypt_bytes(data, key, iv): diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index f93ef85e7..0cac3ee88 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -6,10 +6,7 @@ import shutil import traceback from .compat import compat_getenv -from .utils import ( - expand_path, - write_json_file, -) +from .utils import expand_path, write_json_file class Cache: diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 1d92fd8ce..6ff9f6f2d 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -16,17 +16,9 @@ from .aes import ( aes_gcm_decrypt_and_verify_bytes, unpad_pkcs7, ) -from .compat import ( - compat_b64decode, - compat_cookiejar_Cookie, -) +from .compat import compat_b64decode, compat_cookiejar_Cookie from .minicurses import MultilinePrinter, QuietMultilinePrinter -from .utils import ( - error_to_str, - expand_path, - Popen, - YoutubeDLCookieJar, -) +from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path try: import sqlite3 diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index f5abfd5df..5aba303dd 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -1,8 +1,5 @@ from ..compat import compat_str -from ..utils import ( - determine_protocol, - NO_DEFAULT -) +from ..utils import NO_DEFAULT, determine_protocol def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): @@ -27,21 +24,18 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N # Some of these require get_suitable_downloader from .common import FileDownloader from .dash import DashSegmentsFD +from .external import FFmpegFD, get_external_downloader from .f4m import F4mFD from .fc2 import FC2LiveFD from .hls import HlsFD from .http import HttpFD -from .rtmp import RtmpFD -from .rtsp import RtspFD from .ism import IsmFD from .mhtml import MhtmlFD from .niconico import NiconicoDmcFD +from .rtmp import RtmpFD +from .rtsp import RtspFD from .websocket import WebSocketFragmentFD from .youtube_live_chat import YoutubeLiveChatFD -from .external import ( - get_external_downloader, - FFmpegFD, -) PROTOCOL_MAP = { 'rtmp': RtmpFD, diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index d42539931..3033926ae 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -1,26 +1,26 @@ +import errno import os +import random import re import time -import random -import errno +from ..minicurses import ( + BreaklineStatusPrinter, + MultilineLogger, + MultilinePrinter, + QuietMultilinePrinter, +) from ..utils import ( + LockingUnsupportedError, decodeArgument, encodeFilename, error_to_compat_str, format_bytes, - LockingUnsupportedError, sanitize_open, shell_quote, timeconvert, timetuple_from_msec, ) -from ..minicurses import ( - MultilineLogger, - MultilinePrinter, - QuietMultilinePrinter, - BreaklineStatusPrinter -) class FileDownloader: diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index 64eb5e66a..e6efae485 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -1,8 +1,7 @@ import time -from ..downloader import get_suitable_downloader from .fragment import FragmentFD - +from ..downloader import get_suitable_downloader from ..utils import urljoin diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index b6dd32701..6c5616c60 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -5,23 +5,20 @@ import sys import time from .fragment import FragmentFD -from ..compat import ( - compat_setenv, - compat_str, -) -from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS +from ..compat import compat_setenv, compat_str +from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( + Popen, + _configuration_args, + check_executable, classproperty, + cli_bool_option, cli_option, cli_valueless_option, - cli_bool_option, - _configuration_args, determine_ext, - encodeFilename, encodeArgument, + encodeFilename, handle_youtubedl_headers, - check_executable, - Popen, remove_end, ) diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 414071075..12ecec008 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -6,16 +6,13 @@ from .fragment import FragmentFD from ..compat import ( compat_b64decode, compat_etree_fromstring, - compat_urlparse, - compat_urllib_error, - compat_urllib_parse_urlparse, compat_struct_pack, compat_struct_unpack, + compat_urllib_error, + compat_urllib_parse_urlparse, + compat_urlparse, ) -from ..utils import ( - fix_xml_ampersands, - xpath_text, -) +from ..utils import fix_xml_ampersands, xpath_text class DataTruncatedError(Exception): diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 217b89e3f..a2a2fe950 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -13,15 +13,11 @@ except ImportError: from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 -from ..compat import ( - compat_os_name, - compat_urllib_error, - compat_struct_pack, -) +from ..compat import compat_os_name, compat_struct_pack, compat_urllib_error from ..utils import ( DownloadError, - error_to_compat_str, encodeFilename, + error_to_compat_str, sanitized_Request, traverse_obj, ) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 00695f93f..2d65f48ae 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -1,21 +1,13 @@ -import re -import io import binascii +import io +import re -from ..downloader import get_suitable_downloader -from .fragment import FragmentFD from .external import FFmpegFD - -from ..compat import ( - compat_pycrypto_AES, - compat_urlparse, -) -from ..utils import ( - parse_m3u8_attributes, - update_url_query, - bug_reports_message, -) +from .fragment import FragmentFD from .. import webvtt +from ..compat import compat_pycrypto_AES, compat_urlparse +from ..downloader import get_suitable_downloader +from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query class HlsFD(FragmentFD): diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 03efbf1cd..d590dbfbd 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -1,24 +1,21 @@ import os +import random import ssl import time -import random from .common import FileDownloader -from ..compat import ( - compat_urllib_error, - compat_http_client -) +from ..compat import compat_http_client, compat_urllib_error from ..utils import ( ContentTooShortError, + ThrottledDownload, + XAttrMetadataError, + XAttrUnavailableError, encodeFilename, int_or_none, parse_http_range, sanitized_Request, - ThrottledDownload, try_call, write_xattr, - XAttrMetadataError, - XAttrUnavailableError, ) RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException) diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index ca4ca3a19..82ed51e88 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -1,13 +1,10 @@ -import time import binascii import io import struct +import time from .fragment import FragmentFD -from ..compat import ( - compat_urllib_error, -) - +from ..compat import compat_urllib_error u8 = struct.Struct('>B') u88 = struct.Struct('>Bx') diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index 5a322f1db..7bc3ab049 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -4,12 +4,7 @@ import re import uuid from .fragment import FragmentFD -from ..utils import ( - escapeHTML, - formatSeconds, - srt_subtitles_timecode, - urljoin, -) +from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin from ..version import __version__ as YT_DLP_VERSION diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index 12aa04cf3..3464eeef9 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -6,11 +6,11 @@ import time from .common import FileDownloader from ..compat import compat_str from ..utils import ( + Popen, check_executable, - encodeFilename, encodeArgument, + encodeFilename, get_exe_version, - Popen, ) diff --git a/yt_dlp/downloader/rtsp.py b/yt_dlp/downloader/rtsp.py index 26dbd9ef7..e89269fed 100644 --- a/yt_dlp/downloader/rtsp.py +++ b/yt_dlp/downloader/rtsp.py @@ -2,10 +2,7 @@ import os import subprocess from .common import FileDownloader -from ..utils import ( - check_executable, - encodeFilename, -) +from ..utils import check_executable, encodeFilename class RtspFD(FileDownloader): diff --git a/yt_dlp/downloader/websocket.py b/yt_dlp/downloader/websocket.py index daac34884..96d113846 100644 --- a/yt_dlp/downloader/websocket.py +++ b/yt_dlp/downloader/websocket.py @@ -1,6 +1,6 @@ +import asyncio import os import signal -import asyncio import threading try: diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index 36c82b03b..7f06dfb48 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -3,13 +3,8 @@ import time from .fragment import FragmentFD from ..compat import compat_urllib_error -from ..utils import ( - try_get, - dict_get, - int_or_none, - RegexNotFoundError, -) from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE +from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get class YoutubeLiveChatFD(FragmentFD): diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index c7db05475..0dc8dea26 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -1,35 +1,31 @@ -import io -import json -import time +import base64 +import binascii import hashlib import hmac +import io +import json import re import struct +import time import urllib.response import uuid -from base64 import urlsafe_b64encode -from binascii import unhexlify from .common import InfoExtractor from ..aes import aes_ecb_decrypt -from ..compat import ( - compat_urllib_parse_urlparse, - compat_urllib_request, -) +from ..compat import compat_urllib_parse_urlparse, compat_urllib_request from ..utils import ( ExtractorError, + bytes_to_intlist, decode_base, int_or_none, + intlist_to_bytes, request_to_url, time_seconds, - update_url_query, traverse_obj, - intlist_to_bytes, - bytes_to_intlist, + update_url_query, urljoin, ) - # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862) def add_opener(ydl, handler): @@ -130,7 +126,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) h = hmac.new( - unhexlify(self.HKEY), + binascii.unhexlify(self.HKEY), (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), digestmod=hashlib.sha256) enckey = bytes_to_intlist(h.digest()) @@ -238,7 +234,7 @@ class AbemaTVIE(AbemaTVBaseIE): def mix_twist(nonce): nonlocal tmp - mix_once(urlsafe_b64encode(tmp).rstrip(b'=') + nonce) + mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce) mix_once(self._SECRETKEY) mix_tmp(time_struct.tm_mon) @@ -247,7 +243,7 @@ class AbemaTVIE(AbemaTVBaseIE): mix_twist(ts_1hour_str) mix_tmp(time_struct.tm_hour % 5) - return urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8') + return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8') def _get_device_token(self): if self._USERTOKEN: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ef22c7876..10b297708 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,16 +1,16 @@ import base64 import collections -import xml.etree.ElementTree import hashlib import itertools import json +import math import netrc import os import random import re import sys import time -import math +import xml.etree.ElementTree from ..compat import ( compat_cookiejar_Cookie, @@ -29,11 +29,15 @@ from ..compat import ( compat_urlparse, ) from ..downloader import FileDownloader -from ..downloader.f4m import ( - get_base_url, - remove_encrypted_media, -) +from ..downloader.f4m import get_base_url, remove_encrypted_media from ..utils import ( + JSON_LD_RE, + NO_DEFAULT, + ExtractorError, + GeoRestrictedError, + GeoUtils, + RegexNotFoundError, + UnsupportedError, age_restricted, base_url, bug_reports_message, @@ -44,20 +48,15 @@ from ..utils import ( encode_data_uri, error_to_compat_str, extract_attributes, - ExtractorError, filter_dict, fix_xml_ampersands, float_or_none, format_field, - GeoRestrictedError, - GeoUtils, int_or_none, join_nonempty, js_to_json, - JSON_LD_RE, mimetype2ext, network_exceptions, - NO_DEFAULT, orderedSet, parse_bitrate, parse_codecs, @@ -65,7 +64,6 @@ from ..utils import ( parse_iso8601, parse_m3u8_attributes, parse_resolution, - RegexNotFoundError, sanitize_filename, sanitized_Request, str_or_none, @@ -74,7 +72,6 @@ from ..utils import ( traverse_obj, try_get, unescapeHTML, - UnsupportedError, unified_strdate, unified_timestamp, update_Request, diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py index 40475f7ec..e8f19b9e0 100644 --- a/yt_dlp/extractor/commonprotocols.py +++ b/yt_dlp/extractor/commonprotocols.py @@ -1,7 +1,5 @@ from .common import InfoExtractor -from ..compat import ( - compat_urlparse, -) +from ..compat import compat_urlparse class RtmpIE(InfoExtractor): diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index f44f19a54..c708b4cee 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2,8 +2,107 @@ import os import re import xml.etree.ElementTree +from .ant1newsgr import Ant1NewsGrEmbedIE +from .anvato import AnvatoIE +from .apa import APAIE +from .arcpublishing import ArcPublishingIE +from .arkena import ArkenaIE +from .arte import ArteTVEmbedIE +from .bitchute import BitChuteIE +from .blogger import BloggerIE +from .brightcove import BrightcoveLegacyIE, BrightcoveNewIE +from .channel9 import Channel9IE +from .cloudflarestream import CloudflareStreamIE from .common import InfoExtractor +from .commonprotocols import RtmpIE +from .condenast import CondeNastIE +from .dailymail import DailyMailIE +from .dailymotion import DailymotionIE +from .dbtv import DBTVIE +from .digiteka import DigitekaIE +from .drtuber import DrTuberIE +from .eagleplatform import EaglePlatformIE +from .ertgr import ERTWebtvEmbedIE +from .expressen import ExpressenIE +from .facebook import FacebookIE +from .foxnews import FoxNewsIE +from .gedidigital import GediDigitalIE +from .gfycat import GfycatIE +from .glomex import GlomexEmbedIE +from .googledrive import GoogleDriveIE +from .indavideo import IndavideoEmbedIE +from .instagram import InstagramIE +from .joj import JojIE +from .jwplatform import JWPlatformIE +from .kaltura import KalturaIE +from .kinja import KinjaEmbedIE +from .limelight import LimelightBaseIE +from .mainstreaming import MainStreamingIE +from .medialaan import MedialaanIE +from .mediaset import MediasetIE +from .mediasite import MediasiteIE +from .megaphone import MegaphoneIE +from .megatvcom import MegaTVComEmbedIE +from .mofosex import MofosexEmbedIE +from .mtv import MTVServicesEmbeddedIE +from .myvi import MyviIE +from .nbc import NBCSportsVPlayerIE +from .nexx import NexxEmbedIE, NexxIE +from .odnoklassniki import OdnoklassnikiIE +from .onionstudios import OnionStudiosIE +from .ooyala import OoyalaIE +from .panopto import PanoptoBaseIE +from .peertube import PeerTubeIE +from .piksel import PikselIE +from .pladform import PladformIE +from .pornhub import PornHubIE +from .rcs import RCSEmbedsIE +from .redtube import RedTubeIE +from .rumble import RumbleEmbedIE +from .rutube import RutubeIE +from .rutv import RUTVIE +from .ruutu import RuutuIE +from .senategov import SenateISVPIE +from .simplecast import SimplecastIE +from .soundcloud import SoundcloudEmbedIE +from .spankwire import SpankwireIE +from .sportbox import SportBoxIE +from .springboardplatform import SpringboardPlatformIE +from .svt import SVTIE +from .teachable import TeachableIE +from .ted import TedEmbedIE +from .theplatform import ThePlatformIE +from .threeqsdn import ThreeQSDNIE +from .tnaflix import TNAFlixNetworkEmbedIE +from .tube8 import Tube8IE +from .tunein import TuneInBaseIE +from .tvc import TVCIE +from .tvopengr import TVOpenGrEmbedIE +from .tvp import TVPEmbedIE +from .twentymin import TwentyMinutenIE +from .udn import UDNEmbedIE +from .ustream import UstreamIE +from .vbox7 import Vbox7IE +from .vice import ViceIE +from .videa import VideaIE +from .videomore import VideomoreIE +from .videopress import VideoPressIE +from .viewlift import ViewLiftEmbedIE +from .vimeo import VHXEmbedIE, VimeoIE +from .viqeo import ViqeoIE +from .vk import VKIE +from .vshare import VShareIE +from .vzaar import VzaarIE +from .washingtonpost import WashingtonPostIE +from .webcaster import WebcasterFeedIE +from .wimtv import WimTVIE +from .wistia import WistiaIE +from .xfileshare import XFileShareIE +from .xhamster import XHamsterEmbedIE +from .yapfiles import YapFilesIE +from .youporn import YouPornIE from .youtube import YoutubeIE +from .zype import ZypeIE from ..compat import ( compat_etree_fromstring, compat_str, @@ -11,15 +110,16 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + KNOWN_EXTENSIONS, + ExtractorError, + HEADRequest, + UnsupportedError, determine_ext, dict_get, - ExtractorError, float_or_none, - HEADRequest, int_or_none, is_html, js_to_json, - KNOWN_EXTENSIONS, merge_dicts, mimetype2ext, orderedSet, @@ -31,120 +131,11 @@ from ..utils import ( unescapeHTML, unified_timestamp, unsmuggle_url, - UnsupportedError, url_or_none, xpath_attr, xpath_text, xpath_with_ns, ) -from .commonprotocols import RtmpIE -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .nexx import ( - NexxIE, - NexxEmbedIE, -) -from .nbc import NBCSportsVPlayerIE -from .ooyala import OoyalaIE -from .rutv import RUTVIE -from .tvc import TVCIE -from .sportbox import SportBoxIE -from .myvi import MyviIE -from .condenast import CondeNastIE -from .udn import UDNEmbedIE -from .senategov import SenateISVPIE -from .svt import SVTIE -from .pornhub import PornHubIE -from .xhamster import XHamsterEmbedIE -from .tnaflix import TNAFlixNetworkEmbedIE -from .drtuber import DrTuberIE -from .redtube import RedTubeIE -from .tube8 import Tube8IE -from .mofosex import MofosexEmbedIE -from .spankwire import SpankwireIE -from .youporn import YouPornIE -from .vimeo import ( - VimeoIE, - VHXEmbedIE, -) -from .dailymotion import DailymotionIE -from .dailymail import DailyMailIE -from .onionstudios import OnionStudiosIE -from .viewlift import ViewLiftEmbedIE -from .mtv import MTVServicesEmbeddedIE -from .pladform import PladformIE -from .videomore import VideomoreIE -from .webcaster import WebcasterFeedIE -from .googledrive import GoogleDriveIE -from .jwplatform import JWPlatformIE -from .digiteka import DigitekaIE -from .arkena import ArkenaIE -from .instagram import InstagramIE -from .threeqsdn import ThreeQSDNIE -from .theplatform import ThePlatformIE -from .kaltura import KalturaIE -from .eagleplatform import EaglePlatformIE -from .facebook import FacebookIE -from .soundcloud import SoundcloudEmbedIE -from .tunein import TuneInBaseIE -from .vbox7 import Vbox7IE -from .dbtv import DBTVIE -from .piksel import PikselIE -from .videa import VideaIE -from .twentymin import TwentyMinutenIE -from .ustream import UstreamIE -from .arte import ArteTVEmbedIE -from .videopress import VideoPressIE -from .rutube import RutubeIE -from .glomex import GlomexEmbedIE -from .megatvcom import MegaTVComEmbedIE -from .ant1newsgr import Ant1NewsGrEmbedIE -from .limelight import LimelightBaseIE -from .anvato import AnvatoIE -from .washingtonpost import WashingtonPostIE -from .wistia import WistiaIE -from .mediaset import MediasetIE -from .joj import JojIE -from .megaphone import MegaphoneIE -from .vzaar import VzaarIE -from .channel9 import Channel9IE -from .vshare import VShareIE -from .mediasite import MediasiteIE -from .springboardplatform import SpringboardPlatformIE -from .ted import TedEmbedIE -from .yapfiles import YapFilesIE -from .vice import ViceIE -from .xfileshare import XFileShareIE -from .cloudflarestream import CloudflareStreamIE -from .peertube import PeerTubeIE -from .teachable import TeachableIE -from .indavideo import IndavideoEmbedIE -from .apa import APAIE -from .foxnews import FoxNewsIE -from .viqeo import ViqeoIE -from .expressen import ExpressenIE -from .zype import ZypeIE -from .odnoklassniki import OdnoklassnikiIE -from .vk import VKIE -from .kinja import KinjaEmbedIE -from .gedidigital import GediDigitalIE -from .rcs import RCSEmbedsIE -from .bitchute import BitChuteIE -from .rumble import RumbleEmbedIE -from .arcpublishing import ArcPublishingIE -from .medialaan import MedialaanIE -from .simplecast import SimplecastIE -from .wimtv import WimTVIE -from .tvopengr import TVOpenGrEmbedIE -from .ertgr import ERTWebtvEmbedIE -from .tvp import TVPEmbedIE -from .blogger import BloggerIE -from .mainstreaming import MainStreamingIE -from .gfycat import GfycatIE -from .panopto import PanoptoBaseIE -from .ruutu import RuutuIE class GenericIE(InfoExtractor): diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index 3ef851e0b..d161c33c1 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -1,9 +1,7 @@ import re from .common import InfoExtractor -from ..compat import ( - compat_str, -) +from ..compat import compat_str from ..utils import ( ExtractorError, find_xpath_attr, diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index 22cb08e8a..b42a56f7e 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -1,13 +1,11 @@ from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, -) from ..utils import ( int_or_none, find_xpath_attr, xpath_text, update_url_query, ) +from ..compat import compat_urllib_parse_unquote class NozIE(InfoExtractor): diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index 41ef2e892..f2600aaa4 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -3,16 +3,14 @@ import os import subprocess import tempfile -from ..compat import ( - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( + ExtractorError, + Popen, check_executable, encodeArgument, - ExtractorError, get_exe_version, is_outdated_version, - Popen, ) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 21c6143bd..dee1b2315 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -10,9 +10,9 @@ import os.path import random import re import sys +import threading import time import traceback -import threading from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -27,12 +27,13 @@ from ..compat import ( ) from ..jsinterp import JSInterpreter from ..utils import ( + NO_DEFAULT, + ExtractorError, bug_reports_message, clean_html, datetime_from_str, dict_get, error_to_compat_str, - ExtractorError, float_or_none, format_field, get_first, @@ -42,7 +43,6 @@ from ..utils import ( js_to_json, mimetype2ext, network_exceptions, - NO_DEFAULT, orderedSet, parse_codecs, parse_count, @@ -68,7 +68,6 @@ from ..utils import ( variadic, ) - # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 3695a282d..001836887 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -1,12 +1,9 @@ -from collections.abc import MutableMapping import json import operator import re +from collections.abc import MutableMapping -from .utils import ( - ExtractorError, - remove_quotes, -) +from .utils import ExtractorError, remove_quotes _OPERATORS = [ ('|', operator.or_), diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index d7a8ffddd..9fd679a48 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,7 +1,7 @@ import functools from threading import Lock -from .utils import supports_terminal_sequences, write_string +from .utils import supports_terminal_sequences, write_string CONTROL_SEQUENCES = { 'DOWN': '\n', diff --git a/yt_dlp/options.py b/yt_dlp/options.py index c434e32b9..243beab4d 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1,26 +1,11 @@ -import os.path import optparse +import os.path import re import shlex import sys -from .compat import ( - compat_expanduser, - compat_get_terminal_size, - compat_getenv, -) -from .utils import ( - Config, - expand_path, - get_executable_path, - OUTTMPL_TYPES, - POSTPROCESS_WHEN, - remove_end, - write_string, -) +from .compat import compat_expanduser, compat_get_terminal_size, compat_getenv from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS -from .version import __version__ - from .downloader.external import list_external_downloaders from .postprocessor import ( FFmpegExtractAudioPP, @@ -30,6 +15,16 @@ from .postprocessor import ( SponsorBlockPP, ) from .postprocessor.modify_chapters import DEFAULT_SPONSORBLOCK_CHAPTER_TITLE +from .utils import ( + OUTTMPL_TYPES, + POSTPROCESS_WHEN, + Config, + expand_path, + get_executable_path, + remove_end, + write_string, +) +from .version import __version__ def parseOpts(overrideArguments=None, ignore_config_files='if_override'): diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index e47631eb6..f168be46a 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -1,27 +1,25 @@ # flake8: noqa: F401 -from ..utils import load_plugins - from .common import PostProcessor from .embedthumbnail import EmbedThumbnailPP -from .exec import ExecPP, ExecAfterDownloadPP +from .exec import ExecAfterDownloadPP, ExecPP from .ffmpeg import ( - FFmpegPostProcessor, - FFmpegCopyStreamPP, FFmpegConcatPP, + FFmpegCopyStreamPP, FFmpegEmbedSubtitlePP, FFmpegExtractAudioPP, FFmpegFixupDuplicateMoovPP, FFmpegFixupDurationPP, - FFmpegFixupStretchedPP, - FFmpegFixupTimestampPP, FFmpegFixupM3u8PP, FFmpegFixupM4aPP, + FFmpegFixupStretchedPP, + FFmpegFixupTimestampPP, FFmpegMergerPP, FFmpegMetadataPP, + FFmpegPostProcessor, + FFmpegSplitChaptersPP, FFmpegSubtitlesConvertorPP, FFmpegThumbnailsConvertorPP, - FFmpegSplitChaptersPP, FFmpegVideoConvertorPP, FFmpegVideoRemuxerPP, ) @@ -35,6 +33,7 @@ from .movefilesafterdownload import MoveFilesAfterDownloadPP from .sponskrub import SponSkrubPP from .sponsorblock import SponsorBlockPP from .xattrpp import XAttrMetadataPP +from ..utils import load_plugins _PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals()) diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index 3f55b24f2..ce6dec2f5 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -6,10 +6,10 @@ import time import urllib.error from ..utils import ( + PostProcessingError, _configuration_args, encodeFilename, network_exceptions, - PostProcessingError, sanitized_Request, write_string, ) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 2fca97784..5469f25e0 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -1,11 +1,11 @@ import base64 import imghdr import os -import subprocess import re +import subprocess try: - from mutagen.flac import Picture, FLAC + from mutagen.flac import FLAC, Picture from mutagen.mp4 import MP4, MP4Cover from mutagen.oggopus import OggOpus from mutagen.oggvorbis import OggVorbis @@ -14,17 +14,14 @@ except ImportError: has_mutagen = False from .common import PostProcessor -from .ffmpeg import ( - FFmpegPostProcessor, - FFmpegThumbnailsConvertorPP, -) +from .ffmpeg import FFmpegPostProcessor, FFmpegThumbnailsConvertorPP from ..utils import ( + Popen, + PostProcessingError, check_executable, encodeArgument, encodeFilename, error_to_compat_str, - Popen, - PostProcessingError, prepend_extension, shell_quote, ) diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py index 6621889d5..cfc83167c 100644 --- a/yt_dlp/postprocessor/exec.py +++ b/yt_dlp/postprocessor/exec.py @@ -2,11 +2,7 @@ import subprocess from .common import PostProcessor from ..compat import compat_shlex_quote -from ..utils import ( - encodeArgument, - PostProcessingError, - variadic, -) +from ..utils import PostProcessingError, encodeArgument, variadic class ExecPP(PostProcessor): diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 3175c8d10..69182618b 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -1,27 +1,26 @@ import collections import itertools +import json import os +import re import subprocess import time -import re -import json from .common import AudioConversionError, PostProcessor - from ..compat import compat_str from ..utils import ( + ISO639Utils, + Popen, + PostProcessingError, + _get_exe_version_output, + detect_exe_version, determine_ext, dfxp2srt, encodeArgument, encodeFilename, float_or_none, - _get_exe_version_output, - detect_exe_version, is_outdated_version, - ISO639Utils, orderedSet, - Popen, - PostProcessingError, prepend_extension, replace_extension, shell_quote, @@ -30,7 +29,6 @@ from ..utils import ( write_json_file, ) - EXT_TO_OUT_FORMATS = { 'aac': 'adts', 'flac': 'flac', diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 22506bc21..7e2c23288 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -3,17 +3,9 @@ import heapq import os from .common import PostProcessor -from .ffmpeg import ( - FFmpegPostProcessor, - FFmpegSubtitlesConvertorPP -) +from .ffmpeg import FFmpegPostProcessor, FFmpegSubtitlesConvertorPP from .sponsorblock import SponsorBlockPP -from ..utils import ( - orderedSet, - PostProcessingError, - prepend_extension, -) - +from ..utils import PostProcessingError, orderedSet, prepend_extension _TINY_CHAPTER_DURATION = 1 DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' diff --git a/yt_dlp/postprocessor/movefilesafterdownload.py b/yt_dlp/postprocessor/movefilesafterdownload.py index bc3d15ca4..436d13227 100644 --- a/yt_dlp/postprocessor/movefilesafterdownload.py +++ b/yt_dlp/postprocessor/movefilesafterdownload.py @@ -3,10 +3,10 @@ import shutil from .common import PostProcessor from ..utils import ( + PostProcessingError, decodeFilename, encodeFilename, make_dir, - PostProcessingError, ) diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 38089de08..1a9f5dc66 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -4,15 +4,15 @@ import subprocess from .common import PostProcessor from ..utils import ( + Popen, + PostProcessingError, check_executable, cli_option, encodeArgument, encodeFilename, + prepend_extension, shell_quote, str_or_none, - Popen, - PostProcessingError, - prepend_extension, ) diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 7943014e2..501e30320 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -1,6 +1,6 @@ -from hashlib import sha256 import json import re +from hashlib import sha256 from .ffmpeg import FFmpegPostProcessor from ..compat import compat_urllib_parse_urlencode diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index 5ad8509e7..3c431941b 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -1,11 +1,11 @@ from .common import PostProcessor from ..compat import compat_os_name from ..utils import ( - hyphenate_date, - write_xattr, PostProcessingError, XAttrMetadataError, XAttrUnavailableError, + hyphenate_date, + write_xattr, ) diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py index ffa960e03..56fab08ab 100644 --- a/yt_dlp/socks.py +++ b/yt_dlp/socks.py @@ -9,11 +9,7 @@ import collections import socket -from .compat import ( - compat_ord, - compat_struct_pack, - compat_struct_unpack, -) +from .compat import compat_ord, compat_struct_pack, compat_struct_unpack __author__ = 'Timo Schmid ' diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 7db260e96..eea08ce43 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -8,8 +8,7 @@ import traceback from zipimport import zipimporter from .compat import compat_realpath -from .utils import encode_compat_str, Popen, write_string - +from .utils import Popen, encode_compat_str, write_string from .version import __version__ diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 91e1a9870..25ac864f3 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -9,8 +9,8 @@ import collections import contextlib import ctypes import datetime -import email.utils import email.header +import email.utils import errno import functools import gzip @@ -22,11 +22,13 @@ import itertools import json import locale import math +import mimetypes import operator import os import platform import random import re +import shlex import socket import ssl import subprocess @@ -34,16 +36,11 @@ import sys import tempfile import time import traceback +import urllib.parse import xml.etree.ElementTree import zlib -import mimetypes -import urllib.parse -import shlex from .compat import ( - compat_HTMLParseError, - compat_HTMLParser, - compat_HTTPError, compat_brotli, compat_chr, compat_cookiejar, @@ -51,7 +48,10 @@ from .compat import ( compat_expanduser, compat_html_entities, compat_html_entities_html5, + compat_HTMLParseError, + compat_HTMLParser, compat_http_client, + compat_HTTPError, compat_os_name, compat_parse_qs, compat_shlex_quote, @@ -59,18 +59,14 @@ from .compat import ( compat_struct_pack, compat_struct_unpack, compat_urllib_error, + compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, - compat_urllib_parse_unquote_plus, compat_urllib_request, compat_urlparse, compat_websockets, ) - -from .socks import ( - ProxyType, - sockssocket, -) +from .socks import ProxyType, sockssocket try: import certifi diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index e0d7f6743..3180eafde 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -8,13 +8,11 @@ Regular expressions based on the W3C WebVTT specification in RFC 8216 §3.5 . """ -import re import io +import re + +from .compat import compat_Match, compat_Pattern from .utils import int_or_none, timetuple_from_msec -from .compat import ( - compat_Pattern, - compat_Match, -) class _MatchParser: From e5a998f3684e7c56f9cf1c07c4e176e891d96509 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 12 Apr 2022 05:31:54 +0530 Subject: [PATCH 0944/2552] [cleanup] Misc cleanup (#2173) Authored by: fstirlitz, pukkandan --- Changelog.md | 2 +- README.md | 2 +- devscripts/bash-completion.py | 4 ++-- devscripts/fish-completion.py | 4 ++-- devscripts/make_lazy_extractors.py | 3 +-- devscripts/zsh-completion.py | 4 ++-- test/test_overwrites.py | 5 ++--- yt_dlp/YoutubeDL.py | 4 +++- yt_dlp/compat.py | 2 +- yt_dlp/extractor/abematv.py | 1 + yt_dlp/extractor/sonyliv.py | 30 +++++++++++++++++++--------- yt_dlp/postprocessor/sponsorblock.py | 4 ++-- yt_dlp/utils.py | 14 ++++++------- 13 files changed, 46 insertions(+), 33 deletions(-) diff --git a/Changelog.md b/Changelog.md index a4cf0e92e..3fb6260b8 100644 --- a/Changelog.md +++ b/Changelog.md @@ -15,7 +15,7 @@ * Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz) * Treat multiple `--match-filters` as OR -* File locking improvevemnts: +* File locking improvements: * Do not lock downloading file on Windows * Do not prevent download if locking is unsupported * Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan) diff --git a/README.md b/README.md index f4b55f6d7..8a8477c9b 100644 --- a/README.md +++ b/README.md @@ -1748,7 +1748,7 @@ with YoutubeDL(ydl_opts) as ydl: ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L197). +Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L181). Here's a more complete example demonstrating various functionality: diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index 73d698c39..27ec7ca7a 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 import os import sys -from os.path import dirname as dirn -sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + import yt_dlp BASH_COMPLETION_FILE = "completions/bash/yt-dlp" diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index c318b69e4..dcb1d6582 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -2,9 +2,9 @@ import optparse import os import sys -from os.path import dirname as dirn -sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + import yt_dlp from yt_dlp.utils import shell_quote diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 6d5f96cf0..5e2070602 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -2,9 +2,8 @@ import os import sys from inspect import getsource -from os.path import dirname as dirn -sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py' if os.path.exists(lazy_extractors_filename): diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 2d5ac2a45..06660d8fd 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 import os import sys -from os.path import dirname as dirn -sys.path.insert(0, dirn(dirn(os.path.abspath(__file__)))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + import yt_dlp ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp" diff --git a/test/test_overwrites.py b/test/test_overwrites.py index 39741b65c..a6d5bae40 100644 --- a/test/test_overwrites.py +++ b/test/test_overwrites.py @@ -3,14 +3,13 @@ import os import subprocess import sys import unittest -from os.path import join sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import is_download_test, try_rm root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -download_file = join(root_dir, 'test.webm') +download_file = os.path.join(root_dir, 'test.webm') @is_download_test @@ -44,7 +43,7 @@ class TestOverwrites(unittest.TestCase): self.assertTrue(os.path.getsize(download_file) > 1) def tearDown(self): - try_rm(join(root_dir, 'test.webm')) + try_rm(os.path.join(root_dir, 'test.webm')) if __name__ == '__main__': diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a8bb7f45c..eaf2d9216 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -888,6 +888,7 @@ class YoutubeDL: SUPPRESS = 'light black' def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): + text = str(text) if test_encoding: original_text = text # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711 @@ -895,7 +896,7 @@ class YoutubeDL: text = text.encode(encoding, 'ignore').decode(encoding) if fallback is not None and text != original_text: text = fallback - if isinstance(f, self.Styles): + if isinstance(f, Enum): f = f.value return format_text(text, f) if allow_colors else text if fallback is None else fallback @@ -1708,6 +1709,7 @@ class YoutubeDL: entries.append(entry) try: if entry is not None: + # TODO: Add auto-generated fields self._match_entry(entry, incomplete=True, silent=True) except (ExistingVideoReached, RejectedVideoReached): broken = True diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 7a1500435..9d3a6bbfd 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -196,7 +196,7 @@ compat_urllib_request = urllib.request compat_urlparse = compat_urllib_parse = urllib.parse -# To be removed +# To be removed - Do not use compat_basestring = str compat_collections_abc = collections.abc diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 0dc8dea26..1b9deeae8 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -28,6 +28,7 @@ from ..utils import ( # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862) + def add_opener(ydl, handler): ''' Add a handler for opening URLs, like _download_webpage ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py index 771f890cc..17d28478f 100644 --- a/yt_dlp/extractor/sonyliv.py +++ b/yt_dlp/extractor/sonyliv.py @@ -1,4 +1,5 @@ import datetime +import json import math import random import time @@ -82,21 +83,32 @@ class SonyLIVIE(InfoExtractor): raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}') self.report_login() - data = '''{"mobileNumber":"%s","channelPartnerID":"MSMIND","country":"IN","timestamp":"%s", - "otpSize":6,"loginType":"REGISTERORSIGNIN","isMobileMandatory":true} - ''' % (username, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%MZ")) otp_request_json = self._download_json( 'https://apiv2.sonyliv.com/AGL/1.6/A/ENG/WEB/IN/HR/CREATEOTP-V2', - None, note='Sending OTP', data=data.encode(), headers=self._HEADERS) + None, note='Sending OTP', headers=self._HEADERS, data=json.dumps({ + 'mobileNumber': username, + 'channelPartnerID': 'MSMIND', + 'country': 'IN', + 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'), + 'otpSize': 6, + 'loginType': 'REGISTERORSIGNIN', + 'isMobileMandatory': True, + }).encode()) if otp_request_json['resultCode'] == 'KO': raise ExtractorError(otp_request_json['message'], expected=True) - otp_code = self._get_tfa_info('OTP') - data = '''{"channelPartnerID":"MSMIND","mobileNumber":"%s","country":"IN","otp":"%s", - "dmaId":"IN","ageConfirmation":true,"timestamp":"%s","isMobileMandatory":true} - ''' % (username, otp_code, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%MZ")) + otp_verify_json = self._download_json( 'https://apiv2.sonyliv.com/AGL/2.0/A/ENG/WEB/IN/HR/CONFIRMOTP-V2', - None, note='Verifying OTP', data=data.encode(), headers=self._HEADERS) + None, note='Verifying OTP', headers=self._HEADERS, data=json.dumps({ + 'channelPartnerID': 'MSMIND', + 'mobileNumber': username, + 'country': 'IN', + 'otp': self._get_tfa_info('OTP'), + 'dmaId': 'IN', + 'ageConfirmation': True, + 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'), + 'isMobileMandatory': True, + }).encode()) if otp_verify_json['resultCode'] == 'KO': raise ExtractorError(otp_request_json['message'], expected=True) self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken'] diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 501e30320..7749ffe05 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -1,6 +1,6 @@ +import hashlib import json import re -from hashlib import sha256 from .ffmpeg import FFmpegPostProcessor from ..compat import compat_urllib_parse_urlencode @@ -84,7 +84,7 @@ class SponsorBlockPP(FFmpegPostProcessor): return sponsor_chapters def _get_sponsor_segments(self, video_id, service): - hash = sha256(video_id.encode('ascii')).hexdigest() + hash = hashlib.sha256(video_id.encode('ascii')).hexdigest() # SponsorBlock API recommends using first 4 hash characters. url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({ 'service': service, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 25ac864f3..35e8d1d5b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4793,12 +4793,12 @@ def random_birthday(year_field, month_field, day_field): # Templates for internet shortcut files, which are plain text files. -DOT_URL_LINK_TEMPLATE = ''' +DOT_URL_LINK_TEMPLATE = '''\ [InternetShortcut] URL=%(url)s -'''.lstrip() +''' -DOT_WEBLOC_LINK_TEMPLATE = ''' +DOT_WEBLOC_LINK_TEMPLATE = '''\ @@ -4807,16 +4807,16 @@ DOT_WEBLOC_LINK_TEMPLATE = ''' \t%(url)s -'''.lstrip() +''' -DOT_DESKTOP_LINK_TEMPLATE = ''' +DOT_DESKTOP_LINK_TEMPLATE = '''\ [Desktop Entry] Encoding=UTF-8 Name=%(filename)s Type=Link URL=%(url)s Icon=text-html -'''.lstrip() +''' LINK_TEMPLATES = { 'url': DOT_URL_LINK_TEMPLATE, @@ -4872,7 +4872,7 @@ def iri_to_uri(iri): def to_high_limit_path(path): if sys.platform in ['win32', 'cygwin']: # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited. - return r'\\?\ '.rstrip() + os.path.abspath(path) + return '\\\\?\\' + os.path.abspath(path) return path From b07897ef5bcdb865991d8601faef64a451da39fc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 12 Apr 2022 05:23:27 +0530 Subject: [PATCH 0945/2552] [utils] certifi: Make sure the pem file exists Closes #3353 --- yt_dlp/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 35e8d1d5b..966548466 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -70,7 +70,8 @@ from .socks import ProxyType, sockssocket try: import certifi - has_certifi = True + # The certificate may not be bundled in executable + has_certifi = os.path.exists(certifi.where()) except ImportError: has_certifi = False From 66cf3e1001b6d9a2829fe834c3f9103b0890918e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 12 Apr 2022 05:27:17 +0530 Subject: [PATCH 0946/2552] [EmbedSubtitle] Enable for more video extensions Closes #3382 --- yt_dlp/postprocessor/ffmpeg.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 69182618b..4b61693a2 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -577,14 +577,16 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): + SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka') + def __init__(self, downloader=None, already_have_subtitle=False): super().__init__(downloader) self._already_have_subtitle = already_have_subtitle @PostProcessor._restrict_to(images=False) def run(self, info): - if info['ext'] not in ('mp4', 'webm', 'mkv'): - self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files') + if info['ext'] not in self.SUPPORTED_EXTS: + self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files') return [], info subtitles = info.get('requested_subtitles') if not subtitles: From 743f39750cccf53bc320e057a6ed05e301e8ed48 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 12 Apr 2022 19:57:08 +0530 Subject: [PATCH 0947/2552] Fix bug in 66cf3e1001b6d9a2829fe834c3f9103b0890918e --- yt_dlp/postprocessor/ffmpeg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 4b61693a2..6fe1b6cdd 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -187,8 +187,7 @@ class FFmpegPostProcessor(PostProcessor): yield from ('-dn', '-ignore_unknown') if copy: yield from ('-c', 'copy') - # For some reason, '-c copy -map 0' is not enough to copy subtitles - if ext in ('mp4', 'mov'): + if ext in ('mp4', 'mov', 'm4a'): yield from ('-c:s', 'mov_text') def get_audio_codec(self, path): From cda1bc51973c89b72b916dcc40dbe3d7f457097d Mon Sep 17 00:00:00 2001 From: Akmal <72781956+Wikidepia@users.noreply.github.com> Date: Wed, 13 Apr 2022 08:21:23 +0700 Subject: [PATCH 0948/2552] [facebook] Improve thumbnail extraction (#3392) Authored by: Wikidepia --- yt_dlp/extractor/facebook.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 2e69dce0f..f15a36424 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -525,7 +525,8 @@ class FacebookIE(InfoExtractor): info = { 'id': v_id, 'formats': formats, - 'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']), + 'thumbnail': traverse_obj( + video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')), 'uploader_id': try_get(video, lambda x: x['owner']['id']), 'timestamp': int_or_none(video.get('publish_time')), 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), From a49e777d592ea8f0a21832b08ba2e70456d9914e Mon Sep 17 00:00:00 2001 From: Felix S Date: Thu, 14 Apr 2022 13:22:47 +0000 Subject: [PATCH 0949/2552] [spotify] Detect iframe embeds (#3430) Authored by: fstirlitz --- yt_dlp/extractor/generic.py | 6 ++++++ yt_dlp/extractor/spotify.py | 15 ++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index c708b4cee..8192fbb86 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -67,6 +67,7 @@ from .simplecast import SimplecastIE from .soundcloud import SoundcloudEmbedIE from .spankwire import SpankwireIE from .sportbox import SportBoxIE +from .spotify import SpotifyBaseIE from .springboardplatform import SpringboardPlatformIE from .svt import SVTIE from .teachable import TeachableIE @@ -3164,6 +3165,11 @@ class GenericIE(InfoExtractor): if sportbox_urls: return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key()) + # Look for embedded Spotify player + spotify_urls = SpotifyBaseIE._extract_embed_urls(webpage) + if spotify_urls: + return self.playlist_from_matches(spotify_urls, video_id, video_title) + # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) if xhamster_urls: diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py index 3b8dea8f4..3128825e5 100644 --- a/yt_dlp/extractor/spotify.py +++ b/yt_dlp/extractor/spotify.py @@ -19,7 +19,7 @@ class SpotifyBaseIE(InfoExtractor): 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0', 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d', } - _VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P[^/?&#]+)' + _VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P[^/?&#]+)' def _real_initialize(self): self._ACCESS_TOKEN = self._download_json( @@ -93,11 +93,17 @@ class SpotifyBaseIE(InfoExtractor): 'series': series, } + @classmethod + def _extract_embed_urls(cls, webpage): + return re.findall( + r']+src="(https?://open\.spotify.com/embed/[^"]+)"', + webpage) + class SpotifyIE(SpotifyBaseIE): IE_NAME = 'spotify' _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode' - _TEST = { + _TESTS = [{ 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo', 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b', 'info_dict': { @@ -109,7 +115,10 @@ class SpotifyIE(SpotifyBaseIE): 'release_date': '20201217', 'series': "The Guardian's Audio Long Reads", } - } + }, { + 'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA', + 'only_matching': True, + }] def _real_extract(self, url): episode_id = self._match_id(url) From 583910682f75022b13fbc3ca21a1f5a04ce5599b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 14 Apr 2022 20:44:44 +0530 Subject: [PATCH 0950/2552] [chingari] Fix archiving and tests --- yt_dlp/extractor/chingari.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py index 40613cfa3..7e8c0bfc9 100644 --- a/yt_dlp/extractor/chingari.py +++ b/yt_dlp/extractor/chingari.py @@ -45,6 +45,8 @@ class ChingariBaseIE(InfoExtractor): return { 'id': id, + 'extractor_key': ChingariIE.ie_key(), + 'extractor': 'Chingari', 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), 'duration': media_data.get('duration'), @@ -102,11 +104,11 @@ class ChingariUserIE(ChingariBaseIE): _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P[^/?]+)' _TESTS = [{ 'url': 'https://chingari.io/dada1023', - 'playlist_mincount': 3, 'info_dict': { 'id': 'dada1023', }, - 'entries': [{ + 'params': {'playlistend': 3}, + 'playlist': [{ 'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a', 'info_dict': { 'id': '614781f3ade60b3a0bfff42a', From affc4fefea9119f132cc757a6d9e797f3b03e448 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Fri, 15 Apr 2022 16:22:03 +1200 Subject: [PATCH 0951/2552] [youtube] Fix episode metadata extraction --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index dee1b2315..431230948 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3618,7 +3618,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN': info['location'] = stl else: - mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl) + mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl) if mobj: info.update({ 'series': mobj.group(1), From 3b9d9f437469e651d5c65a0fa89d65bd2b95c738 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 15 Apr 2022 10:36:41 +0530 Subject: [PATCH 0952/2552] Do not change fragment chunk-size when `--test` Closes #3434 --- yt_dlp/downloader/fragment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index a2a2fe950..e5bc23e54 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -177,7 +177,7 @@ class FragmentFD(FileDownloader): 'ratelimit': self.params.get('ratelimit'), 'retries': self.params.get('retries', 0), 'nopart': self.params.get('nopart', False), - 'test': self.params.get('test', False), + 'test': False, } ) tmpfilename = self.temp_name(ctx['filename']) From abfecb7bc13efe8031a6c07cdefcf706db33014c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 15 Apr 2022 17:05:49 +0530 Subject: [PATCH 0953/2552] [utils] Fix WebSocketsWrapper Bug in 3cea3edd1ac1101bd709dfa0305509028118b163 Closes #3422 --- yt_dlp/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 966548466..34a938362 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5258,6 +5258,7 @@ class Config: class WebSocketsWrapper(): """Wraps websockets module to use in non-async scopes""" + pool = None def __init__(self, url, headers=None, connect=True): self.loop = asyncio.events.new_event_loop() From e06bd8800fb98e9dc1537e9f1ebf3aaeea5d9b8c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 15 Apr 2022 23:43:07 +0530 Subject: [PATCH 0954/2552] Fix `--skip-unavailable-fragments` Bug in d71fd412495af9ebccef807379859a0baa97ddee Closes #3437 --- yt_dlp/downloader/fragment.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index e5bc23e54..9012a1795 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -134,6 +134,8 @@ class FragmentFD(FileDownloader): return True def _read_fragment(self, ctx): + if not ctx.get('fragment_filename_sanitized'): + return None try: down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') except FileNotFoundError: From c854208ccf7938fa58b3bfbee6cb5bfd6432f11a Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Sat, 16 Apr 2022 21:11:09 +0900 Subject: [PATCH 0955/2552] [downloader/fragment] Make single thread download work for --live-from-start (#3446) Authored by: Lesmiscore --- yt_dlp/downloader/fragment.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 9012a1795..2a97cfd16 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -521,8 +521,13 @@ class FragmentFD(FileDownloader): for fragment in fragments: if not interrupt_trigger[0]: break - download_fragment(fragment, ctx) - result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) + try: + download_fragment(fragment, ctx) + result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) + except KeyboardInterrupt: + if info_dict.get('is_live'): + break + raise if not result: return False From 2e25ce3a05bbbe8a448eb35d1d79865837ec0481 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 17 Apr 2022 22:52:22 +0530 Subject: [PATCH 0956/2552] [niconico] Set `expected_protocol` to a public field Closes #3440 --- yt_dlp/extractor/niconico.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index e60556a4d..353ae1c72 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -212,7 +212,7 @@ class NiconicoIE(InfoExtractor): def _get_heartbeat_info(self, info_dict): video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/') - dmc_protocol = info_dict['_expected_protocol'] + dmc_protocol = info_dict['expected_protocol'] api_data = ( info_dict.get('_api_data') @@ -366,7 +366,7 @@ class NiconicoIE(InfoExtractor): 'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')), 'quality': -2 if 'low' in video_quality['id'] else None, 'protocol': 'niconico_dmc', - '_expected_protocol': dmc_protocol, + 'expected_protocol': dmc_protocol, # XXX: This is not a documented field 'http_headers': { 'Origin': 'https://www.nicovideo.jp', 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, From 3d3bb1688bfc5373105e6bf7c3d4729cf3f78788 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 17 Apr 2022 23:19:53 +0530 Subject: [PATCH 0957/2552] [docs] Improve embedding docs and other minor fixes --- CONTRIBUTING.md | 10 +-- README.md | 147 +++++++++++++++++++++++---------- yt_dlp/__init__.py | 5 +- yt_dlp/extractor/kakao.py | 1 + yt_dlp/postprocessor/common.py | 3 +- 5 files changed, 116 insertions(+), 50 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eff6becac..19888cff4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -374,21 +374,21 @@ When extracting metadata try to do so from multiple sources. For example if `tit #### Example -Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like: +Say `meta` from the previous example has a `title` and you are about to extract it like: ```python -title = meta['title'] +title = meta.get('title') ``` -If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. +If `title` disappears from `meta` in future due to some changes on the hoster's side the title extraction would fail. -Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: +Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback like: ```python title = meta.get('title') or self._og_search_title(webpage) ``` -This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. +This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`, making the extractor more robust. ### Regular expressions diff --git a/README.md b/README.md index 8a8477c9b..197d7b49b 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * youtube-dl tries to remove some superfluous punctuations from filenames. While this can sometimes be helpfull, it is often undesirable. So yt-dlp tries to keep the fields in the filenames as close to their original values as possible. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior For ease of use, a few more compat options are available: + * `--compat-options all`: Use all compat options * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` @@ -166,7 +167,7 @@ You can simply download the [correct binary file](#release-files) for your OS [![Linux](https://img.shields.io/badge/-Linux/MacOS/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) [![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) [![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) -[![ALl versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) +[![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) @@ -485,7 +486,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi -R, --retries RETRIES Number of retries (default is 10), or "infinite" --file-access-retries RETRIES Number of times to retry on file access - error (default is 10), or "infinite" + error (default is 3), or "infinite" --fragment-retries RETRIES Number of retries for a fragment (default is 10), or "infinite" (DASH, hlsnative and ISM) @@ -925,8 +926,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi same codecs and number of streams to be concatable. The "pl_video:" prefix can be used with "--paths" and "--output" to set - the output filename for the split files. - See "OUTPUT TEMPLATE" for details + the output filename for the concatenated + files. See "OUTPUT TEMPLATE" for details --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the @@ -1063,8 +1064,9 @@ You can configure yt-dlp by placing any supported command line option to a confi * `%APPDATA%/yt-dlp/config.txt` * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` - + `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`), or `%HOMEDRIVE%%HOMEPATH%` + 1. **System Configuration**: `/etc/yt-dlp.conf` For example, with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: @@ -1121,6 +1123,7 @@ The simplest usage of `-o` is not to set any template arguments when downloading It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: + 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` @@ -1601,7 +1604,9 @@ The general syntax of `--parse-metadata FROM:TO` is to give the name of a field Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`. This option also has a few special uses: + * You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?Phttps?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description + * You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta_` prefix (Eg: `meta1_language`). Any value set to the `meta_` field will overwrite all default values. **Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes. @@ -1743,19 +1748,72 @@ From a Python program, you can embed yt-dlp in a more powerful fashion, like thi ```python from yt_dlp import YoutubeDL -ydl_opts = {'format': 'bestaudio'} -with YoutubeDL(ydl_opts) as ydl: - ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] +with YoutubeDL() as ydl: + ydl.download(URLS) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L181). -Here's a more complete example demonstrating various functionality: +**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above + +## Embedding examples + +### Extracting information ```python import json import yt_dlp +URL = 'https://www.youtube.com/watch?v=BaW_jenozKc' + +# ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functions +ydl_opts = {} +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(URL, download=False) + + # ℹ️ ydl.sanitize_info makes the info json-serializable + print(json.dumps(ydl.sanitize_info(info))) +``` +### Download from info-json + +```python +import yt_dlp + +INFO_FILE = 'path/to/video.info.json' + +with yt_dlp.YoutubeDL() as ydl: + error_code = ydl.download_with_info_file(INFO_FILE) + +print('Some videos failed to download' if error_code + else 'All videos successfully downloaded') +``` + +### Extract audio + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +ydl_opts = { + 'format': 'm4a/bestaudio/best' + # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments + 'postprocessors': [{ # Extract audio using ffmpeg + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'm4a', + }] +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + error_code = ydl.download(URLS) +``` +### Adding logger and progress hook + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] class MyLogger: def debug(self, msg): @@ -1776,23 +1834,51 @@ class MyLogger: print(msg) -# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor +# ℹ️ See "progress_hooks" in help(yt_dlp.YoutubeDL) +def my_hook(d): + if d['status'] == 'finished': + print('Done downloading, now post-processing ...') + + +ydl_opts = { + 'logger': MyLogger(), + 'progress_hooks': [my_hook], +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download(URLS) +``` + +### Add a custom PostProcessor + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +# ℹ️ See help(yt_dlp.postprocessor.PostProcessor) class MyCustomPP(yt_dlp.postprocessor.PostProcessor): - # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run def run(self, info): self.to_screen('Doing stuff') return [], info -# ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL -def my_hook(d): - if d['status'] == 'finished': - print('Done downloading, now converting ...') +with yt_dlp.YoutubeDL() as ydl: + ydl.add_post_processor(MyCustomPP()) + ydl.download(URLS) +``` +### Use a custom format selector + +```python +import yt_dlp + +URL = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + def format_selector(ctx): """ Select the best video and the best audio that won't result in an mkv. - This is just an example and does not handle all cases """ + NOTE: This is just an example and does not handle all cases """ # formats are already sorted worst to best formats = ctx.get('formats')[::-1] @@ -1807,8 +1893,8 @@ def format_selector(ctx): best_audio = next(f for f in formats if ( f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) + # These are the minimum required fields for a merged format yield { - # These are the minimum required fields for a merged format 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', 'ext': best_video['ext'], 'requested_formats': [best_video, best_audio], @@ -1817,36 +1903,14 @@ def format_selector(ctx): } -# ℹ️ See docstring of yt_dlp.YoutubeDL for a description of the options ydl_opts = { 'format': format_selector, - 'postprocessors': [{ - # Embed metadata in video using ffmpeg. - # ℹ️ See yt_dlp.postprocessor.FFmpegMetadataPP for the arguments it accepts - 'key': 'FFmpegMetadata', - 'add_chapters': True, - 'add_metadata': True, - }], - 'logger': MyLogger(), - 'progress_hooks': [my_hook], - # Add custom headers - 'http_headers': {'Referer': 'https://www.google.com'} } - -# ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions. -# Eg: "ydl.download", "ydl.download_with_info_file" with yt_dlp.YoutubeDL(ydl_opts) as ydl: - ydl.add_post_processor(MyCustomPP()) - info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') - - # ℹ️ ydl.sanitize_info makes the info json-serializable - print(json.dumps(ydl.sanitize_info(info))) + ydl.download(URLS) ``` -**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above - - # DEPRECATED OPTIONS @@ -1960,8 +2024,7 @@ These options may no longer work as intended These options were deprecated since 2014 and have now been entirely removed -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s" - -t, --title -o "%(title)s-%(id)s.%(ext)s" - -l, --literal -o accepts literal names + -t, -l, --title, --literal -o "%(title)s-%(id)s.%(ext)s" # CONTRIBUTING See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f339e4cd1..24991e19b 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -397,7 +397,8 @@ def validate_options(opts): # Conflicting options report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None) report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None) - report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl')) + report_conflict('--exec-before-download', 'exec_before_dl_cmd', + '"--exec before_dl:"', 'exec_cmd', val2=opts.exec_cmd.get('before_dl')) report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') @@ -412,7 +413,7 @@ def validate_options(opts): report_conflict('--embed-subs', 'embedsubtitles') report_conflict('--embed-thumbnail', 'embedthumbnail') report_conflict('--extract-audio', 'extractaudio') - report_conflict('--fixup', 'fixup', val1=(opts.fixup or '').lower() in ('', 'never', 'ignore'), default='never') + report_conflict('--fixup', 'fixup', val1=opts.fixup not in (None, 'never', 'ignore'), default='never') report_conflict('--recode-video', 'recodevideo') report_conflict('--remove-chapters', 'remove_chapters', default=[]) report_conflict('--remux-video', 'remuxvideo') diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 8ad1d9efd..a5014d931 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -105,6 +105,7 @@ class KakaoIE(InfoExtractor): resp = self._parse_json(e.cause.read().decode(), video_id) if resp.get('code') == 'GeoBlocked': self.raise_geo_restricted() + raise fmt_url = traverse_obj(fmt_url_json, ('videoLocation', 'url')) if not fmt_url: diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index ce6dec2f5..fdea3a7ea 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -83,7 +83,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass): write_string(f'DeprecationWarning: {text}') def report_error(self, text, *args, **kwargs): - # Exists only for compatibility. Do not use + self.deprecation_warning('"yt_dlp.postprocessor.PostProcessor.report_error" is deprecated. ' + 'raise "yt_dlp.utils.PostProcessingError" instead') if self._downloader: return self._downloader.report_error(text, *args, **kwargs) From b6dc37fe2aee167bf11f863f960a4888f4886718 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 18 Apr 2022 02:12:48 +0530 Subject: [PATCH 0958/2552] [test] Convert warnings into errors * And fix some existing warnings Authored by: fstirlitz --- devscripts/run_tests.bat | 1 + devscripts/run_tests.sh | 2 +- test/test_http.py | 5 +++-- yt-dlp.cmd | 2 +- yt-dlp.sh | 2 +- yt_dlp/compat.py | 2 +- yt_dlp/extractor/gedidigital.py | 4 ++-- yt_dlp/extractor/gfycat.py | 2 +- yt_dlp/extractor/wimtv.py | 4 ++-- 9 files changed, 13 insertions(+), 11 deletions(-) diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat index b8bb393d9..190d23918 100644 --- a/devscripts/run_tests.bat +++ b/devscripts/run_tests.bat @@ -13,4 +13,5 @@ if ["%~1"]==[""] ( exit /b 1 ) +set PYTHONWARNINGS=error pytest %test_set% diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index c9a75ba00..e9904ae35 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -11,4 +11,4 @@ else exit 1 fi -python3 -m pytest "$test_set" +python3 -bb -Werror -m pytest "$test_set" diff --git a/test/test_http.py b/test/test_http.py index 029996ca9..d99be8be4 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -66,8 +66,9 @@ class TestHTTPS(unittest.TestCase): certfn = os.path.join(TEST_DIR, 'testcert.pem') self.httpd = compat_http_server.HTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) - self.httpd.socket = ssl.wrap_socket( - self.httpd.socket, certfile=certfn, server_side=True) + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True) self.port = http_server_port(self.httpd) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True diff --git a/yt-dlp.cmd b/yt-dlp.cmd index 2b651a41e..aa4500f9f 100644 --- a/yt-dlp.cmd +++ b/yt-dlp.cmd @@ -1 +1 @@ -@py "%~dp0yt_dlp\__main__.py" %* \ No newline at end of file +@py -bb -Werror -Xdev "%~dp0yt_dlp\__main__.py" %* diff --git a/yt-dlp.sh b/yt-dlp.sh index 71a9aa163..0321a3362 100755 --- a/yt-dlp.sh +++ b/yt-dlp.sh @@ -1,2 +1,2 @@ #!/bin/sh -exec python3 "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" +exec "${PYTHON:-python3}" -bb -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 9d3a6bbfd..df0c54606 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -159,7 +159,7 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW try: - subprocess.Popen('', shell=True, startupinfo=startupinfo) + subprocess.Popen('', shell=True, startupinfo=startupinfo).wait() WINDOWS_VT_MODE = True except Exception: pass diff --git a/yt_dlp/extractor/gedidigital.py b/yt_dlp/extractor/gedidigital.py index c878daff8..4ae5362b4 100644 --- a/yt_dlp/extractor/gedidigital.py +++ b/yt_dlp/extractor/gedidigital.py @@ -11,7 +11,7 @@ from ..utils import ( class GediDigitalIE(InfoExtractor): - _VALID_URL = r'''(?x)(?P(?:https?:)//video\. + _VALID_URL = r'''(?x:(?P(?:https?:)//video\. (?: (?: (?:espresso\.)?repubblica @@ -33,7 +33,7 @@ class GediDigitalIE(InfoExtractor): |corrierealpi |lasentinella )\.gelocal - )\.it(?:/[^/]+){2,4}/(?P\d+))(?:$|[?&].*)''' + )\.it(?:/[^/]+){2,4}/(?P\d+))(?:$|[?&].*))''' _TESTS = [{ 'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683', 'md5': '84658d7fb9e55a6e57ecc77b73137494', diff --git a/yt_dlp/extractor/gfycat.py b/yt_dlp/extractor/gfycat.py index 7373c574f..60f06ccd7 100644 --- a/yt_dlp/extractor/gfycat.py +++ b/yt_dlp/extractor/gfycat.py @@ -10,7 +10,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'(?i)https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#\."\']+)' + _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P[^-/?#\."\']+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py index 6e7ec3436..263844d72 100644 --- a/yt_dlp/extractor/wimtv.py +++ b/yt_dlp/extractor/wimtv.py @@ -12,14 +12,14 @@ from ..utils import ( class WimTVIE(InfoExtractor): _player = None _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' - _VALID_URL = r'''(?x) + _VALID_URL = r'''(?x: https?://platform.wim.tv/ (?: (?:embed/)?\? |\#/webtv/.+?/ ) (?Pvod|live|cast)[=/] - (?P%s).*?''' % _UUID_RE + (?P%s).*?)''' % _UUID_RE _TESTS = [{ # vod stream 'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a', From 19a0394044bfad36cd665450271b8eb048a41c02 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 18 Apr 2022 02:28:28 +0530 Subject: [PATCH 0959/2552] [cleanup] Misc cleanup and refactor (#2173) --- devscripts/make_readme.py | 25 +++--- setup.cfg | 4 +- supportedsites.md | 4 +- test/test_compat.py | 10 ++- test/test_execution.py | 11 ++- test/test_utils.py | 22 +++-- test/test_verbose_output.py | 12 ++- test/test_write_annotations.py.disabled | 1 - test/test_youtube_signature.py | 5 +- yt_dlp/YoutubeDL.py | 34 ++++---- yt_dlp/__init__.py | 3 +- yt_dlp/aes.py | 2 +- yt_dlp/cache.py | 5 +- yt_dlp/compat.py | 23 +++--- yt_dlp/cookies.py | 35 ++++---- yt_dlp/downloader/common.py | 98 ++++++++++------------ yt_dlp/downloader/fragment.py | 8 +- yt_dlp/downloader/websocket.py | 5 +- yt_dlp/extractor/__init__.py | 13 ++- yt_dlp/extractor/cpac.py | 7 -- yt_dlp/extractor/extractors.py | 3 +- yt_dlp/extractor/openload.py | 9 +-- yt_dlp/extractor/rtve.py | 7 +- yt_dlp/extractor/spotify.py | 2 + yt_dlp/extractor/youtube.py | 10 +-- yt_dlp/jsinterp.py | 41 +++------- yt_dlp/options.py | 72 +++++++++-------- yt_dlp/postprocessor/common.py | 2 +- yt_dlp/postprocessor/metadataparser.py | 22 +++-- yt_dlp/utils.py | 103 +++++++++--------------- yt_dlp/webvtt.py | 10 +-- 31 files changed, 263 insertions(+), 345 deletions(-) diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 1719ac8e4..1401c2e5a 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -6,22 +6,25 @@ import re import sys README_FILE = 'README.md' -helptext = sys.stdin.read() +OPTIONS_START = 'General Options:' +OPTIONS_END = 'CONFIGURATION' +EPILOG_START = 'See full documentation' + + +helptext = sys.stdin.read() if isinstance(helptext, bytes): helptext = helptext.decode('utf-8') -with open(README_FILE, encoding='utf-8') as f: - oldreadme = f.read() +start, end = helptext.index(f'\n {OPTIONS_START}'), helptext.index(f'\n{EPILOG_START}') +options = re.sub(r'(?m)^ (\w.+)$', r'## \1', helptext[start + 1: end + 1]) -header = oldreadme[:oldreadme.index('## General Options:')] -footer = oldreadme[oldreadme.index('# CONFIGURATION'):] +with open(README_FILE, encoding='utf-8') as f: + readme = f.read() -options = helptext[helptext.index(' General Options:'):] -options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options) -options = options + '\n' +header = readme[:readme.index(f'## {OPTIONS_START}')] +footer = readme[readme.index(f'# {OPTIONS_END}'):] with open(README_FILE, 'w', encoding='utf-8') as f: - f.write(header) - f.write(options) - f.write(footer) + for part in (header, options, footer): + f.write(part) diff --git a/setup.cfg b/setup.cfg index 59372d93a..5fe95226a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,5 +2,5 @@ universal = True [flake8] -exclude = yt_dlp/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv,devscripts/create-github-release.py,devscripts/release.sh,devscripts/show-downloads-statistics.py -ignore = E402,E501,E731,E741,W503 \ No newline at end of file +exclude = devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv +ignore = E402,E501,E731,E741,W503 diff --git a/supportedsites.md b/supportedsites.md index eac7842a3..746a93de6 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -1147,8 +1147,8 @@ - **Sport5** - **SportBox** - **SportDeutschland** - - **spotify** - - **spotify:show** + - **spotify**: Spotify episodes + - **spotify:show**: Spotify shows - **Spreaker** - **SpreakerPage** - **SpreakerShow** diff --git a/test/test_compat.py b/test/test_compat.py index 20dab9573..29e7384f0 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -35,10 +35,12 @@ class TestCompat(unittest.TestCase): def test_compat_expanduser(self): old_home = os.environ.get('HOME') - test_str = r'C:\Documents and Settings\тест\Application Data' - compat_setenv('HOME', test_str) - self.assertEqual(compat_expanduser('~'), test_str) - compat_setenv('HOME', old_home or '') + test_str = R'C:\Documents and Settings\тест\Application Data' + try: + compat_setenv('HOME', test_str) + self.assertEqual(compat_expanduser('~'), test_str) + finally: + compat_setenv('HOME', old_home or '') def test_all_present(self): import yt_dlp.compat diff --git a/test/test_execution.py b/test/test_execution.py index 6a3e9944b..6efd432e9 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import contextlib import os import subprocess import sys @@ -22,14 +23,14 @@ class TestExecution(unittest.TestCase): subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir) def test_module_exec(self): - subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): - subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_cmdline_umlauts(self): p = subprocess.Popen( - [sys.executable, 'yt_dlp/__main__.py', encodeArgument('ä'), '--version'], + [sys.executable, 'yt_dlp/__main__.py', '--ignore-config', encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) @@ -39,10 +40,8 @@ class TestExecution(unittest.TestCase): subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL) subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL) finally: - try: + with contextlib.suppress(OSError): os.remove('yt_dlp/extractor/lazy_extractors.py') - except OSError: - pass if __name__ == '__main__': diff --git a/test/test_utils.py b/test/test_utils.py index 7909dc61c..5e220087b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 # Allow direct execution +import contextlib import os import sys import unittest @@ -267,11 +268,18 @@ class TestUtil(unittest.TestCase): compat_setenv('yt_dlp_EXPATH_PATH', 'expanded') self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded') - self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) - self.assertEqual(expand_path('~'), compat_getenv('HOME')) - self.assertEqual( - expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), - '%s/expanded' % compat_getenv('HOME')) + + old_home = os.environ.get('HOME') + test_str = R'C:\Documents and Settings\тест\Application Data' + try: + compat_setenv('HOME', test_str) + self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) + self.assertEqual(expand_path('~'), compat_getenv('HOME')) + self.assertEqual( + expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), + '%s/expanded' % compat_getenv('HOME')) + finally: + compat_setenv('HOME', old_home or '') def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') @@ -1814,10 +1822,8 @@ Line 1 else: self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}') finally: - try: + with contextlib.suppress(OSError): os.remove(FILE) - except Exception: - pass if __name__ == '__main__': diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py index 1213a9726..657994074 100644 --- a/test/test_verbose_output.py +++ b/test/test_verbose_output.py @@ -13,7 +13,8 @@ class TestVerboseOutput(unittest.TestCase): def test_private_info_arg(self): outp = subprocess.Popen( [ - sys.executable, 'yt_dlp/__main__.py', '-v', + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', '--username', 'johnsmith@gmail.com', '--password', 'my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -26,7 +27,8 @@ class TestVerboseOutput(unittest.TestCase): def test_private_info_shortarg(self): outp = subprocess.Popen( [ - sys.executable, 'yt_dlp/__main__.py', '-v', + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', '-u', 'johnsmith@gmail.com', '-p', 'my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -39,7 +41,8 @@ class TestVerboseOutput(unittest.TestCase): def test_private_info_eq(self): outp = subprocess.Popen( [ - sys.executable, 'yt_dlp/__main__.py', '-v', + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', '--username=johnsmith@gmail.com', '--password=my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -52,7 +55,8 @@ class TestVerboseOutput(unittest.TestCase): def test_private_info_shortarg_eq(self): outp = subprocess.Popen( [ - sys.executable, 'yt_dlp/__main__.py', '-v', + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', '-u=johnsmith@gmail.com', '-p=my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) diff --git a/test/test_write_annotations.py.disabled b/test/test_write_annotations.py.disabled index bf13efe2c..cca60561f 100644 --- a/test/test_write_annotations.py.disabled +++ b/test/test_write_annotations.py.disabled @@ -6,7 +6,6 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import io import xml.etree.ElementTree from test.helper import get_params, is_download_test, try_rm diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index ca23c910d..2c2013295 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 # Allow direct execution +import contextlib import os import sys import unittest @@ -127,11 +128,9 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) def tearDown(self): - try: + with contextlib.suppress(OSError): for f in os.listdir(self.TESTDATA_DIR): os.remove(f) - except OSError: - pass def t_factory(name, sig_func, url_pattern): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index eaf2d9216..155b5a063 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -23,7 +23,6 @@ import tokenize import traceback import unicodedata import urllib.request -from enum import Enum from string import ascii_letters from .cache import Cache @@ -82,6 +81,7 @@ from .utils import ( ISO3166Utils, LazyList, MaxDownloadsReached, + Namespace, PagedList, PerRequestProxyHandler, Popen, @@ -878,14 +878,15 @@ class YoutubeDL: raise DownloadError(message, exc_info) self._download_retcode = 1 - class Styles(Enum): - HEADERS = 'yellow' - EMPHASIS = 'light blue' - ID = 'green' - DELIM = 'blue' - ERROR = 'red' - WARNING = 'yellow' - SUPPRESS = 'light black' + Styles = Namespace( + HEADERS='yellow', + EMPHASIS='light blue', + ID='green', + DELIM='blue', + ERROR='red', + WARNING='yellow', + SUPPRESS='light black', + ) def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): text = str(text) @@ -896,8 +897,6 @@ class YoutubeDL: text = text.encode(encoding, 'ignore').decode(encoding) if fallback is not None and text != original_text: text = fallback - if isinstance(f, Enum): - f = f.value return format_text(text, f) if allow_colors else text if fallback is None else fallback def _format_screen(self, *args, **kwargs): @@ -1760,7 +1759,8 @@ class YoutubeDL: playlist_index, entry = entry_tuple if 'playlist-index' in self.params.get('compat_opts', []): playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1 - self.to_screen(f'[download] Downloading video {i} of {n_entries}') + self.to_screen('[download] Downloading video %s of %s' % ( + self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes if x_forwarded_for: @@ -2337,11 +2337,9 @@ class YoutubeDL: if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: # Working around out-of-range timestamp values (e.g. negative ones on Windows, # see http://bugs.python.org/issue1646728) - try: + with contextlib.suppress(ValueError, OverflowError, OSError): upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) info_dict[date_key] = upload_date.strftime('%Y%m%d') - except (ValueError, OverflowError, OSError): - pass live_keys = ('is_live', 'was_live') live_status = info_dict.get('live_status') @@ -3631,10 +3629,8 @@ class YoutubeDL: if re.match('[0-9a-f]+', out): write_debug('Git HEAD: %s' % out) except Exception: - try: + with contextlib.suppress(Exception): sys.exc_clear() - except Exception: - pass def python_implementation(): impl_name = platform.python_implementation() @@ -3651,7 +3647,7 @@ class YoutubeDL: exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: - exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features) + exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 24991e19b..9ea13ad37 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -404,7 +404,8 @@ def validate_options(opts): report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark') report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove') - report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', val1=opts.sponskrub and opts.sponskrub_cut) + report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', + val1=opts.sponskrub and opts.sponskrub_cut) # Conflicts with --allow-unplayable-formats report_conflict('--add-metadata', 'addmetadata') diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index 01818df61..603f3d187 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -493,7 +493,7 @@ def ghash(subkey, data): last_y = [0] * BLOCK_SIZE_BYTES for i in range(0, len(data), BLOCK_SIZE_BYTES): - block = data[i : i + BLOCK_SIZE_BYTES] # noqa: E203 + block = data[i: i + BLOCK_SIZE_BYTES] last_y = block_product(xor(last_y, block), subkey) return last_y diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index 0cac3ee88..e3f8a7dab 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -1,3 +1,4 @@ +import contextlib import errno import json import os @@ -57,7 +58,7 @@ class Cache: return default cache_fn = self._get_cache_fn(section, key, dtype) - try: + with contextlib.suppress(OSError): try: with open(cache_fn, encoding='utf-8') as cachef: self._ydl.write_debug(f'Loading {section}.{key} from cache') @@ -68,8 +69,6 @@ class Cache: except OSError as oe: file_size = str(oe) self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})') - except OSError: - pass # No cache available return default diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index df0c54606..f18c6cce2 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -1,6 +1,7 @@ import asyncio import base64 import collections +import contextlib import ctypes import getpass import html @@ -54,14 +55,11 @@ if compat_os_name == 'nt': def compat_shlex_quote(s): return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') else: - from shlex import quote as compat_shlex_quote + from shlex import quote as compat_shlex_quote # noqa: F401 def compat_ord(c): - if type(c) is int: - return c - else: - return ord(c) + return c if isinstance(c, int) else ord(c) def compat_setenv(key, value, env=os.environ): @@ -118,16 +116,17 @@ except ImportError: # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # See https://github.com/yt-dlp/yt-dlp/issues/792 # https://docs.python.org/3/library/os.path.html#os.path.expanduser -if compat_os_name in ('nt', 'ce') and 'HOME' in os.environ: - _userhome = os.environ['HOME'] - +if compat_os_name in ('nt', 'ce'): def compat_expanduser(path): - if not path.startswith('~'): + HOME = os.environ.get('HOME') + if not HOME: + return os.path.expanduser(path) + elif not path.startswith('~'): return path i = path.replace('\\', '/', 1).find('/') # ~user if i < 0: i = len(path) - userhome = os.path.join(os.path.dirname(_userhome), path[1:i]) if i > 1 else _userhome + userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME return userhome + path[i:] else: compat_expanduser = os.path.expanduser @@ -158,11 +157,9 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho global WINDOWS_VT_MODE startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - try: + with contextlib.suppress(Exception): subprocess.Popen('', shell=True, startupinfo=startupinfo).wait() WINDOWS_VT_MODE = True - except Exception: - pass # Deprecated diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 6ff9f6f2d..8a4baa5bb 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -167,7 +167,7 @@ def _firefox_browser_dir(): if sys.platform in ('linux', 'linux2'): return os.path.expanduser('~/.mozilla/firefox') elif sys.platform == 'win32': - return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles') + return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') elif sys.platform == 'darwin': return os.path.expanduser('~/Library/Application Support/Firefox') else: @@ -191,12 +191,12 @@ def _get_chromium_based_browser_settings(browser_name): appdata_local = os.path.expandvars('%LOCALAPPDATA%') appdata_roaming = os.path.expandvars('%APPDATA%') browser_dir = { - 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'), - 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'), - 'chromium': os.path.join(appdata_local, r'Chromium\User Data'), - 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'), - 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'), - 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'), + 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'), + 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'), + 'chromium': os.path.join(appdata_local, R'Chromium\User Data'), + 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'), + 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'), + 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'), }[browser_name] elif sys.platform == 'darwin': @@ -237,8 +237,8 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): logger.info(f'Extracting cookies from {browser_name}') if not SQLITE_AVAILABLE: - logger.warning(('Cannot extract cookies from {} without sqlite3 support. ' - 'Please use a python interpreter compiled with sqlite3 support').format(browser_name)) + logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' + 'Please use a python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() config = _get_chromium_based_browser_settings(browser_name) @@ -269,8 +269,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): cursor.connection.text_factory = bytes column_names = _get_column_names(cursor, 'cookies') secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' - cursor.execute('SELECT host_key, name, value, encrypted_value, path, ' - 'expires_utc, {} FROM cookies'.format(secure_column)) + cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies') jar = YoutubeDLCookieJar() failed_cookies = 0 unencrypted_cookies = 0 @@ -346,11 +345,11 @@ class ChromeCookieDecryptor: """ def decrypt(self, encrypted_value): - raise NotImplementedError + raise NotImplementedError('Must be implemented by sub classes') @property def cookie_counts(self): - raise NotImplementedError + raise NotImplementedError('Must be implemented by sub classes') def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): @@ -361,8 +360,7 @@ def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring= elif sys.platform == 'win32': return WindowsChromeCookieDecryptor(browser_root, logger) else: - raise NotImplementedError('Chrome cookie decryption is not supported ' - 'on this platform: {}'.format(sys.platform)) + raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}') class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): @@ -546,8 +544,7 @@ class DataParser: def skip(self, num_bytes, description='unknown'): if num_bytes > 0: - self._logger.debug('skipping {} bytes ({}): {}'.format( - num_bytes, description, self.read_bytes(num_bytes))) + self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}') elif num_bytes < 0: raise ParserError(f'invalid skip of {num_bytes} bytes') @@ -784,8 +781,8 @@ def _get_kwallet_password(browser_keyring_name, logger): stdout, stderr = proc.communicate_or_kill() if proc.returncode != 0: - logger.error('kwallet-query failed with return code {}. Please consult ' - 'the kwallet-query man page for details'.format(proc.returncode)) + logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult ' + 'the kwallet-query man page for details') return b'' else: if stdout.lower().startswith(b'failed to read'): diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 3033926ae..3e5396988 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -1,3 +1,4 @@ +import contextlib import errno import os import random @@ -12,6 +13,7 @@ from ..minicurses import ( ) from ..utils import ( LockingUnsupportedError, + Namespace, decodeArgument, encodeFilename, error_to_compat_str, @@ -70,12 +72,30 @@ class FileDownloader: def __init__(self, ydl, params): """Create a FileDownloader object with the given options.""" - self.ydl = ydl + self._set_ydl(ydl) self._progress_hooks = [] self.params = params self._prepare_multiline_status() self.add_progress_hook(self.report_progress) + def _set_ydl(self, ydl): + self.ydl = ydl + + for func in ( + 'deprecation_warning', + 'report_error', + 'report_file_already_downloaded', + 'report_warning', + 'to_console_title', + 'to_stderr', + 'trouble', + 'write_debug', + ): + setattr(self, func, getattr(ydl, func)) + + def to_screen(self, *args, **kargs): + self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) + @staticmethod def format_seconds(seconds): time = timetuple_from_msec(seconds * 1000) @@ -157,27 +177,6 @@ class FileDownloader: multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return int(round(number * multiplier)) - def to_screen(self, *args, **kargs): - self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) - - def to_stderr(self, message): - self.ydl.to_stderr(message) - - def to_console_title(self, message): - self.ydl.to_console_title(message) - - def trouble(self, *args, **kargs): - self.ydl.trouble(*args, **kargs) - - def report_warning(self, *args, **kargs): - self.ydl.report_warning(*args, **kargs) - - def report_error(self, *args, **kargs): - self.ydl.report_error(*args, **kargs) - - def write_debug(self, *args, **kargs): - self.ydl.write_debug(*args, **kargs) - def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" rate_limit = self.params.get('ratelimit') @@ -263,10 +262,8 @@ class FileDownloader: # Ignore obviously invalid dates if filetime == 0: return - try: + with contextlib.suppress(Exception): os.utime(filename, (time.time(), filetime)) - except Exception: - pass return filetime def report_destination(self, filename): @@ -287,18 +284,18 @@ class FileDownloader: def _finish_multiline_status(self): self._multiline.end() - _progress_styles = { - 'downloaded_bytes': 'light blue', - 'percent': 'light blue', - 'eta': 'yellow', - 'speed': 'green', - 'elapsed': 'bold white', - 'total_bytes': '', - 'total_bytes_estimate': '', - } + ProgressStyles = Namespace( + downloaded_bytes='light blue', + percent='light blue', + eta='yellow', + speed='green', + elapsed='bold white', + total_bytes='', + total_bytes_estimate='', + ) def _report_progress_status(self, s, default_template): - for name, style in self._progress_styles.items(): + for name, style in self.ProgressStyles._asdict().items(): name = f'_{name}_str' if name not in s: continue @@ -391,10 +388,6 @@ class FileDownloader: '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...' % (error_to_compat_str(err), count, self.format_retries(retries))) - def report_file_already_downloaded(self, *args, **kwargs): - """Report file has already been fully downloaded.""" - return self.ydl.report_file_already_downloaded(*args, **kwargs) - def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') @@ -433,25 +426,16 @@ class FileDownloader: self._finish_multiline_status() return True, False - if subtitle is False: - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) - self.to_screen( - '[download] Sleeping %s seconds ...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) + if subtitle: + sleep_interval = self.params.get('sleep_interval_subtitles') or 0 else: - sleep_interval_sub = 0 - if type(self.params.get('sleep_interval_subtitles')) is int: - sleep_interval_sub = self.params.get('sleep_interval_subtitles') - if sleep_interval_sub > 0: - self.to_screen( - '[download] Sleeping %s seconds ...' % ( - sleep_interval_sub)) - time.sleep(sleep_interval_sub) + min_sleep_interval = self.params.get('sleep_interval') or 0 + sleep_interval = random.uniform( + min_sleep_interval, self.params.get('max_sleep_interval', min_sleep_interval)) + if sleep_interval > 0: + self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') + time.sleep(sleep_interval) + ret = self.real_download(filename, info_dict) self._finish_multiline_status() return ret, True diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 2a97cfd16..390c840bb 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -1,3 +1,4 @@ +import contextlib import http.client import json import math @@ -310,10 +311,8 @@ class FragmentFD(FileDownloader): if self.params.get('updatetime', True): filetime = ctx.get('fragment_filetime') if filetime: - try: + with contextlib.suppress(Exception): os.utime(ctx['filename'], (time.time(), filetime)) - except Exception: - pass downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) self._hook_progress({ @@ -523,7 +522,8 @@ class FragmentFD(FileDownloader): break try: download_fragment(fragment, ctx) - result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) + result = append_fragment( + decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) except KeyboardInterrupt: if info_dict.get('is_live'): break diff --git a/yt_dlp/downloader/websocket.py b/yt_dlp/downloader/websocket.py index 96d113846..6b190cd90 100644 --- a/yt_dlp/downloader/websocket.py +++ b/yt_dlp/downloader/websocket.py @@ -1,4 +1,5 @@ import asyncio +import contextlib import os import signal import threading @@ -29,11 +30,9 @@ class FFmpegSinkFD(FileDownloader): except (BrokenPipeError, OSError): pass finally: - try: + with contextlib.suppress(OSError): stdin.flush() stdin.close() - except OSError: - pass os.kill(os.getpid(), signal.SIGINT) class FFmpegStdinFD(FFmpegFD): diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py index b35484246..6288c5c6b 100644 --- a/yt_dlp/extractor/__init__.py +++ b/yt_dlp/extractor/__init__.py @@ -1,24 +1,23 @@ +import contextlib import os from ..utils import load_plugins _LAZY_LOADER = False if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): - try: - from .lazy_extractors import * + with contextlib.suppress(ImportError): + from .lazy_extractors import * # noqa: F403 from .lazy_extractors import _ALL_CLASSES _LAZY_LOADER = True - except ImportError: - pass if not _LAZY_LOADER: - from .extractors import * - _ALL_CLASSES = [ + from .extractors import * # noqa: F403 + _ALL_CLASSES = [ # noqa: F811 klass for name, klass in globals().items() if name.endswith('IE') and name != 'GenericIE' ] - _ALL_CLASSES.append(GenericIE) + _ALL_CLASSES.append(GenericIE) # noqa: F405 _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES diff --git a/yt_dlp/extractor/cpac.py b/yt_dlp/extractor/cpac.py index e8975e5e2..65ac2497f 100644 --- a/yt_dlp/extractor/cpac.py +++ b/yt_dlp/extractor/cpac.py @@ -9,13 +9,6 @@ from ..utils import ( urljoin, ) -# compat_range -try: - if callable(xrange): - range = xrange -except (NameError, TypeError): - pass - class CPACIE(InfoExtractor): IE_NAME = 'cpac' diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index cd3934a70..d67b2eeec 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1,4 +1,5 @@ -# flake8: noqa +# flake8: noqa: F401 + from .abc import ( ABCIE, ABCIViewIE, diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index f2600aaa4..61e3a8b86 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -1,3 +1,4 @@ +import contextlib import json import os import subprocess @@ -31,13 +32,11 @@ def cookie_to_dict(cookie): cookie_dict['secure'] = cookie.secure if cookie.discard is not None: cookie_dict['discard'] = cookie.discard - try: + with contextlib.suppress(TypeError): if (cookie.has_nonstandard_attr('httpOnly') or cookie.has_nonstandard_attr('httponly') or cookie.has_nonstandard_attr('HttpOnly')): cookie_dict['httponly'] = True - except TypeError: - pass return cookie_dict @@ -129,10 +128,8 @@ class PhantomJSwrapper: def __del__(self): for name in self._TMP_FILE_NAMES: - try: + with contextlib.suppress(OSError, KeyError): os.remove(self._TMP_FILES[name].name) - except (OSError, KeyError): - pass def _save_cookies(self, url): cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index e5837e8c8..42a602968 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -1,6 +1,5 @@ import base64 import io -import sys from .common import InfoExtractor from ..compat import ( @@ -17,8 +16,6 @@ from ..utils import ( try_get, ) -_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x)) - class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' @@ -87,7 +84,7 @@ class RTVEALaCartaIE(InfoExtractor): alphabet = [] e = 0 d = 0 - for l in _bytes_to_chr(alphabet_data): + for l in alphabet_data.decode('iso-8859-1'): if d == 0: alphabet.append(l) d = e = (e + 1) % 4 @@ -97,7 +94,7 @@ class RTVEALaCartaIE(InfoExtractor): f = 0 e = 3 b = 1 - for letter in _bytes_to_chr(url_data): + for letter in url_data.decode('iso-8859-1'): if f == 0: l = int(letter) * 10 f = 1 diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py index 3128825e5..a2068a1b6 100644 --- a/yt_dlp/extractor/spotify.py +++ b/yt_dlp/extractor/spotify.py @@ -102,6 +102,7 @@ class SpotifyBaseIE(InfoExtractor): class SpotifyIE(SpotifyBaseIE): IE_NAME = 'spotify' + IE_DESC = 'Spotify episodes' _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode' _TESTS = [{ 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo', @@ -131,6 +132,7 @@ class SpotifyIE(SpotifyBaseIE): class SpotifyShowIE(SpotifyBaseIE): IE_NAME = 'spotify:show' + IE_DESC = 'Spotify shows' _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show' _TEST = { 'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 431230948..7da54e088 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3586,17 +3586,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): headers=self.generate_api_headers(ytcfg=master_ytcfg), note='Downloading initial data API JSON') - try: - # This will error if there is no livechat + try: # This will error if there is no livechat initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + except (KeyError, IndexError, TypeError): + pass + else: info.setdefault('subtitles', {})['live_chat'] = [{ - 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies + 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies 'video_id': video_id, 'ext': 'json', 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay', }] - except (KeyError, IndexError, TypeError): - pass if initial_data: info['chapters'] = ( diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 001836887..70857b798 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -1,7 +1,8 @@ +import collections +import contextlib import json import operator import re -from collections.abc import MutableMapping from .utils import ExtractorError, remove_quotes @@ -35,38 +36,17 @@ class JS_Continue(ExtractorError): ExtractorError.__init__(self, 'Invalid continue') -class LocalNameSpace(MutableMapping): - def __init__(self, *stack): - self.stack = tuple(stack) - - def __getitem__(self, key): - for scope in self.stack: - if key in scope: - return scope[key] - raise KeyError(key) - +class LocalNameSpace(collections.ChainMap): def __setitem__(self, key, value): - for scope in self.stack: + for scope in self.maps: if key in scope: scope[key] = value - break - else: - self.stack[0][key] = value - return value + return + self.maps[0][key] = value def __delitem__(self, key): raise NotImplementedError('Deleting is not supported') - def __iter__(self): - for scope in self.stack: - yield from scope - - def __len__(self, key): - return len(iter(self)) - - def __repr__(self): - return f'LocalNameSpace{self.stack}' - class JSInterpreter: def __init__(self, code, objects=None): @@ -302,10 +282,8 @@ class JSInterpreter: if var_m: return local_vars[var_m.group('name')] - try: + with contextlib.suppress(ValueError): return json.loads(expr) - except ValueError: - pass m = re.match( r'(?P%s)\[(?P.+)\]$' % _NAME_RE, expr) @@ -521,14 +499,13 @@ class JSInterpreter: def build_function(self, argnames, code, *global_stack): global_stack = list(global_stack) or [{}] - local_vars = global_stack.pop(0) def resf(args, **kwargs): - local_vars.update({ + global_stack[0].update({ **dict(zip(argnames, args)), **kwargs }) - var_stack = LocalNameSpace(local_vars, *global_stack) + var_stack = LocalNameSpace(*global_stack) for stmt in self._separate(code.replace('\n', ''), ';'): ret, should_abort = self.interpret_statement(stmt, var_stack) if should_abort: diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 243beab4d..0c042caf4 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -21,6 +21,7 @@ from .utils import ( Config, expand_path, get_executable_path, + join_nonempty, remove_end, write_string, ) @@ -109,9 +110,43 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'): return parser, opts, args +class _YoutubeDLHelpFormatter(optparse.IndentedHelpFormatter): + def __init__(self): + # No need to wrap help messages if we're on a wide console + max_width = compat_get_terminal_size().columns or 80 + # 47% is chosen because that is how README.md is currently formatted + # and moving help text even further to the right is undesirable. + # This can be reduced in the future to get a prettier output + super().__init__(width=max_width, max_help_position=int(0.47 * max_width)) + + @staticmethod + def format_option_strings(option): + """ ('-o', '--option') -> -o, --format METAVAR """ + opts = join_nonempty( + option._short_opts and option._short_opts[0], + option._long_opts and option._long_opts[0], + delim=', ') + if option.takes_value(): + opts += f' {option.metavar}' + return opts + + class _YoutubeDLOptionParser(optparse.OptionParser): # optparse is deprecated since python 3.2. So assume a stable interface even for private methods + def __init__(self): + super().__init__( + prog='yt-dlp', + version=__version__, + usage='%prog [OPTIONS] URL [URL...]', + epilog='See full documentation at https://github.com/yt-dlp/yt-dlp#readme', + formatter=_YoutubeDLHelpFormatter(), + conflict_handler='resolve', + ) + + def _get_args(self, args): + return sys.argv[1:] if args is None else list(args) + def _match_long_opt(self, opt): """Improve ambigious argument resolution by comparing option objects instead of argument strings""" try: @@ -123,23 +158,6 @@ class _YoutubeDLOptionParser(optparse.OptionParser): def create_parser(): - def _format_option_string(option): - ''' ('-o', '--option') -> -o, --format METAVAR''' - - opts = [] - - if option._short_opts: - opts.append(option._short_opts[0]) - if option._long_opts: - opts.append(option._long_opts[0]) - if len(opts) > 1: - opts.insert(1, ', ') - - if option.takes_value(): - opts.append(' %s' % option.metavar) - - return ''.join(opts) - def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip): # append can be True, False or -1 (prepend) current = list(getattr(parser.values, option.dest)) if append else [] @@ -204,23 +222,7 @@ def create_parser(): out_dict[key] = out_dict.get(key, []) + [val] if append else val setattr(parser.values, option.dest, out_dict) - # No need to wrap help messages if we're on a wide console - columns = compat_get_terminal_size().columns - max_width = columns if columns else 80 - # 47% is chosen because that is how README.md is currently formatted - # and moving help text even further to the right is undesirable. - # This can be reduced in the future to get a prettier output - max_help_position = int(0.47 * max_width) - - fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) - fmt.format_option_strings = _format_option_string - - parser = _YoutubeDLOptionParser( - version=__version__, - formatter=fmt, - usage='%prog [OPTIONS] URL [URL...]', - conflict_handler='resolve' - ) + parser = _YoutubeDLOptionParser() general = optparse.OptionGroup(parser, 'General Options') general.add_option( @@ -1048,7 +1050,7 @@ def create_parser(): verbosity.add_option( '-C', '--call-home', dest='call_home', action='store_true', default=False, - # help='[Broken] Contact the yt-dlp server for debugging') + # help='Contact the yt-dlp server for debugging') help=optparse.SUPPRESS_HELP) verbosity.add_option( '--no-call-home', diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index fdea3a7ea..519d06138 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -69,8 +69,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass): return name[6:] if name[:6].lower() == 'ffmpeg' else name def to_screen(self, text, prefix=True, *args, **kwargs): - tag = '[%s] ' % self.PP_NAME if prefix else '' if self._downloader: + tag = '[%s] ' % self.PP_NAME if prefix else '' return self._downloader.to_screen(f'{tag}{text}', *args, **kwargs) def report_warning(self, text, *args, **kwargs): diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py index 5bc435da3..98885bd19 100644 --- a/yt_dlp/postprocessor/metadataparser.py +++ b/yt_dlp/postprocessor/metadataparser.py @@ -1,29 +1,25 @@ import re -from enum import Enum from .common import PostProcessor +from ..utils import Namespace class MetadataParserPP(PostProcessor): - class Actions(Enum): - INTERPRET = 'interpretter' - REPLACE = 'replacer' - def __init__(self, downloader, actions): - PostProcessor.__init__(self, downloader) + super().__init__(self, downloader) self._actions = [] for f in actions: - action = f[0] - assert isinstance(action, self.Actions) - self._actions.append(getattr(self, action.value)(*f[1:])) + action, *args = f + assert action in self.Actions + self._actions.append(action(*args)) @classmethod def validate_action(cls, action, *data): - ''' Each action can be: + """Each action can be: (Actions.INTERPRET, from, to) OR (Actions.REPLACE, field, search, replace) - ''' - if not isinstance(action, cls.Actions): + """ + if action not in cls.Actions: raise ValueError(f'{action!r} is not a valid action') getattr(cls, action.value)(cls, *data) # So this can raise error to validate @@ -99,6 +95,8 @@ class MetadataParserPP(PostProcessor): search_re = re.compile(search) return f + Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer) + class MetadataFromFieldPP(MetadataParserPP): @classmethod diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 34a938362..cf52fb2b6 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -70,6 +70,7 @@ from .socks import ProxyType, sockssocket try: import certifi + # The certificate may not be bundled in executable has_certifi = os.path.exists(certifi.where()) except ImportError: @@ -282,22 +283,16 @@ def write_json_file(obj, fn): if sys.platform == 'win32': # Need to remove existing file on Windows, else os.rename raises # WindowsError or FileExistsError. - try: + with contextlib.suppress(OSError): os.unlink(fn) - except OSError: - pass - try: + with contextlib.suppress(OSError): mask = os.umask(0) os.umask(mask) os.chmod(tf.name, 0o666 & ~mask) - except OSError: - pass os.rename(tf.name, fn) except Exception: - try: + with contextlib.suppress(OSError): os.remove(tf.name) - except OSError: - pass raise @@ -575,12 +570,9 @@ def extract_attributes(html_element): }. """ parser = HTMLAttributeParser() - try: + with contextlib.suppress(compat_HTMLParseError): parser.feed(html_element) parser.close() - # Older Python may throw HTMLParseError in case of malformed HTML - except compat_HTMLParseError: - pass return parser.attrs @@ -800,10 +792,8 @@ def _htmlentity_transform(entity_with_semicolon): else: base = 10 # See https://github.com/ytdl-org/youtube-dl/issues/7518 - try: + with contextlib.suppress(ValueError): return compat_chr(int(numstr, base)) - except ValueError: - pass # Unknown entity in name, return its literal representation return '&%s;' % entity @@ -812,7 +802,7 @@ def _htmlentity_transform(entity_with_semicolon): def unescapeHTML(s): if s is None: return None - assert type(s) == compat_str + assert isinstance(s, str) return re.sub( r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) @@ -865,7 +855,7 @@ def get_subprocess_encoding(): def encodeFilename(s, for_subprocess=False): - assert type(s) == str + assert isinstance(s, str) return s @@ -924,10 +914,8 @@ def _ssl_load_windows_store_certs(ssl_context, storename): except PermissionError: return for cert in certs: - try: + with contextlib.suppress(ssl.SSLError): ssl_context.load_verify_locations(cadata=cert) - except ssl.SSLError: - pass def make_HTTPS_handler(params, **kwargs): @@ -1391,7 +1379,7 @@ def make_socks_conn_class(base_class, socks_proxy): def connect(self): self.sock = sockssocket() self.sock.setproxy(*proxy_args) - if type(self.timeout) in (int, float): + if isinstance(self.timeout, (int, float)): self.sock.settimeout(self.timeout) self.sock.connect((self.host, self.port)) @@ -1526,9 +1514,7 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): try: cf.write(prepare_line(line)) except compat_cookiejar.LoadError as e: - write_string( - 'WARNING: skipping cookie file entry due to %s: %r\n' - % (e, line), sys.stderr) + write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') continue cf.seek(0) self._really_load(cf, filename, ignore_discard, ignore_expires) @@ -1646,12 +1632,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): if timezone is None: timezone, date_str = extract_timezone(date_str) - try: + with contextlib.suppress(ValueError): date_format = f'%Y-%m-%d{delimiter}%H:%M:%S' dt = datetime.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt.timetuple()) - except ValueError: - pass def date_formats(day_first=True): @@ -1671,17 +1655,13 @@ def unified_strdate(date_str, day_first=True): _, date_str = extract_timezone(date_str) for expression in date_formats(day_first): - try: + with contextlib.suppress(ValueError): upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') - except ValueError: - pass if upload_date is None: timetuple = email.utils.parsedate_tz(date_str) if timetuple: - try: + with contextlib.suppress(ValueError): upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') - except ValueError: - pass if upload_date is not None: return compat_str(upload_date) @@ -1709,11 +1689,9 @@ def unified_timestamp(date_str, day_first=True): date_str = m.group(1) for expression in date_formats(day_first): - try: + with contextlib.suppress(ValueError): dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta) return calendar.timegm(dt.timetuple()) - except ValueError: - pass timetuple = email.utils.parsedate_tz(date_str) if timetuple: return calendar.timegm(timetuple) + pm_delta * 3600 @@ -1879,9 +1857,8 @@ def get_windows_version(): def write_string(s, out=None, encoding=None): - if out is None: - out = sys.stderr - assert type(s) == compat_str + assert isinstance(s, str) + out = out or sys.stderr if 'b' in getattr(out, 'mode', ''): byt = s.encode(encoding or preferredencoding(), 'ignore') @@ -2483,18 +2460,10 @@ def parse_duration(s): else: return None - duration = 0 - if secs: - duration += float(secs) - if mins: - duration += float(mins) * 60 - if hours: - duration += float(hours) * 60 * 60 - if days: - duration += float(days) * 24 * 60 * 60 if ms: - duration += float(ms.replace(':', '.')) - return duration + ms = ms.replace(':', '.') + return sum(float(part or 0) * mult for part, mult in ( + (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1))) def prepend_extension(filename, ext, expected_real_ext=None): @@ -2957,9 +2926,10 @@ TV_PARENTAL_GUIDELINES = { def parse_age_limit(s): - if type(s) == int: + # isinstance(False, int) is True. So type() must be used instead + if type(s) is int: return s if 0 <= s <= 21 else None - if not isinstance(s, str): + elif not isinstance(s, str): return None m = re.match(r'^(?P\d{1,2})\+?$', s) if m: @@ -3227,7 +3197,7 @@ def parse_codecs(codecs_str): if not tcodec: tcodec = full_codec else: - write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) + write_string(f'WARNING: Unknown codec {full_codec}\n') if vcodec or acodec or tcodec: return { 'vcodec': vcodec or 'none', @@ -4934,7 +4904,7 @@ def get_executable_path(): def load_plugins(name, suffix, namespace): classes = {} - try: + with contextlib.suppress(FileNotFoundError): plugins_spec = importlib.util.spec_from_file_location( name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py')) plugins = importlib.util.module_from_spec(plugins_spec) @@ -4947,8 +4917,6 @@ def load_plugins(name, suffix, namespace): continue klass = getattr(plugins, name) classes[name] = namespace[name] = klass - except FileNotFoundError: - pass return classes @@ -4957,13 +4925,14 @@ def traverse_obj( casesense=True, is_user_input=False, traverse_string=False): ''' Traverse nested list/dict/tuple @param path_list A list of paths which are checked one by one. - Each path is a list of keys where each key is a string, - a function, a tuple of strings/None or "...". - When a fuction is given, it takes the key and value as arguments - and returns whether the key matches or not. When a tuple is given, - all the keys given in the tuple are traversed, and - "..." traverses all the keys in the object - "None" returns the object without traversal + Each path is a list of keys where each key is a: + - None: Do nothing + - string: A dictionary key + - int: An index into a list + - tuple: A list of keys all of which will be traversed + - Ellipsis: Fetch all values in the object + - Function: Takes the key and value as arguments + and returns whether the key matches or not @param default Default value to return @param expected_type Only accept final value of this type (Can also be any callable) @param get_all Return all the values obtained from a path or only the first one @@ -5253,7 +5222,7 @@ class Config: yield from self.own_args or [] def parse_args(self): - return self._parser.parse_args(list(self.all_args)) + return self._parser.parse_args(self.all_args) class WebSocketsWrapper(): @@ -5339,3 +5308,7 @@ class classproperty: def __get__(self, _, cls): return self.f(cls) + + +def Namespace(**kwargs): + return collections.namedtuple('Namespace', kwargs)(**kwargs) diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 3180eafde..741622b25 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -103,14 +103,8 @@ def _parse_ts(ts): Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS) into an MPEG PES timestamp: a tick counter at 90 kHz resolution. """ - - h, min, s, ms = ts.groups() - return 90 * ( - int(h or 0) * 3600000 + # noqa: W504,E221,E222 - int(min) * 60000 + # noqa: W504,E221,E222 - int(s) * 1000 + # noqa: W504,E221,E222 - int(ms) # noqa: W504,E221,E222 - ) + return 90 * sum( + int(part or 0) * mult for part, mult in zip(ts.groups(), (3600_000, 60_000, 1000, 1))) def _format_ts(ts): From 77f9033095cd8e1092a80db67f2b577cf13f95a8 Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 4 Feb 2022 14:37:02 +0100 Subject: [PATCH 0960/2552] [compat] Split into sub-modules (#2173) Authored by: fstirlitz, pukkandan --- test/test_compat.py | 8 - yt_dlp/compat.py | 302 ------------------------------ yt_dlp/compat/__init__.py | 129 +++++++++++++ yt_dlp/compat/_deprecated.py | 47 +++++ yt_dlp/compat/_legacy.py | 54 ++++++ yt_dlp/compat/asyncio/__init__.py | 16 ++ yt_dlp/compat/asyncio/tasks.py | 8 + yt_dlp/compat/re.py | 14 ++ yt_dlp/downloader/websocket.py | 2 +- yt_dlp/extractor/common.py | 5 +- yt_dlp/webvtt.py | 7 +- 11 files changed, 274 insertions(+), 318 deletions(-) delete mode 100644 yt_dlp/compat.py create mode 100644 yt_dlp/compat/__init__.py create mode 100644 yt_dlp/compat/_deprecated.py create mode 100644 yt_dlp/compat/_legacy.py create mode 100644 yt_dlp/compat/asyncio/__init__.py create mode 100644 yt_dlp/compat/asyncio/tasks.py create mode 100644 yt_dlp/compat/re.py diff --git a/test/test_compat.py b/test/test_compat.py index 29e7384f0..8e40a4180 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -42,14 +42,6 @@ class TestCompat(unittest.TestCase): finally: compat_setenv('HOME', old_home or '') - def test_all_present(self): - import yt_dlp.compat - all_names = yt_dlp.compat.__all__ - present_names = set(filter( - lambda c: '_' in c and not c.startswith('_'), - dir(yt_dlp.compat))) - {'unicode_literals'} - self.assertEqual(all_names, sorted(present_names)) - def test_compat_urllib_parse_unquote(self): self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def') diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py deleted file mode 100644 index f18c6cce2..000000000 --- a/yt_dlp/compat.py +++ /dev/null @@ -1,302 +0,0 @@ -import asyncio -import base64 -import collections -import contextlib -import ctypes -import getpass -import html -import html.parser -import http -import http.client -import http.cookiejar -import http.cookies -import http.server -import itertools -import os -import re -import shlex -import shutil -import socket -import struct -import subprocess -import sys -import tokenize -import urllib -import xml.etree.ElementTree as etree -from subprocess import DEVNULL - - -# HTMLParseError has been deprecated in Python 3.3 and removed in -# Python 3.5. Introducing dummy exception for Python >3.5 for compatible -# and uniform cross-version exception handling -class compat_HTMLParseError(Exception): - pass - - -# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE -# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines -def compat_ctypes_WINFUNCTYPE(*args, **kwargs): - return ctypes.WINFUNCTYPE(*args, **kwargs) - - -class _TreeBuilder(etree.TreeBuilder): - def doctype(self, name, pubid, system): - pass - - -def compat_etree_fromstring(text): - return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) - - -compat_os_name = os._name if os.name == 'java' else os.name - - -if compat_os_name == 'nt': - def compat_shlex_quote(s): - return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') -else: - from shlex import quote as compat_shlex_quote # noqa: F401 - - -def compat_ord(c): - return c if isinstance(c, int) else ord(c) - - -def compat_setenv(key, value, env=os.environ): - env[key] = value - - -if compat_os_name == 'nt' and sys.version_info < (3, 8): - # os.path.realpath on Windows does not follow symbolic links - # prior to Python 3.8 (see https://bugs.python.org/issue9949) - def compat_realpath(path): - while os.path.islink(path): - path = os.path.abspath(os.readlink(path)) - return path -else: - compat_realpath = os.path.realpath - - -try: - compat_Pattern = re.Pattern -except AttributeError: - compat_Pattern = type(re.compile('')) - - -try: - compat_Match = re.Match -except AttributeError: - compat_Match = type(re.compile('').match('')) - - -try: - compat_asyncio_run = asyncio.run # >= 3.7 -except AttributeError: - def compat_asyncio_run(coro): - try: - loop = asyncio.get_event_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - loop.run_until_complete(coro) - - asyncio.run = compat_asyncio_run - - -try: # >= 3.7 - asyncio.tasks.all_tasks -except AttributeError: - asyncio.tasks.all_tasks = asyncio.tasks.Task.all_tasks - -try: - import websockets as compat_websockets -except ImportError: - compat_websockets = None - -# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl -# See https://github.com/yt-dlp/yt-dlp/issues/792 -# https://docs.python.org/3/library/os.path.html#os.path.expanduser -if compat_os_name in ('nt', 'ce'): - def compat_expanduser(path): - HOME = os.environ.get('HOME') - if not HOME: - return os.path.expanduser(path) - elif not path.startswith('~'): - return path - i = path.replace('\\', '/', 1).find('/') # ~user - if i < 0: - i = len(path) - userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME - return userhome + path[i:] -else: - compat_expanduser = os.path.expanduser - - -try: - from Cryptodome.Cipher import AES as compat_pycrypto_AES -except ImportError: - try: - from Crypto.Cipher import AES as compat_pycrypto_AES - except ImportError: - compat_pycrypto_AES = None - -try: - import brotlicffi as compat_brotli -except ImportError: - try: - import brotli as compat_brotli - except ImportError: - compat_brotli = None - -WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None - - -def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 - if compat_os_name != 'nt': - return - global WINDOWS_VT_MODE - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - with contextlib.suppress(Exception): - subprocess.Popen('', shell=True, startupinfo=startupinfo).wait() - WINDOWS_VT_MODE = True - - -# Deprecated - -compat_b64decode = base64.b64decode -compat_chr = chr -compat_cookiejar = http.cookiejar -compat_cookiejar_Cookie = http.cookiejar.Cookie -compat_cookies_SimpleCookie = http.cookies.SimpleCookie -compat_get_terminal_size = shutil.get_terminal_size -compat_getenv = os.getenv -compat_getpass = getpass.getpass -compat_html_entities = html.entities -compat_html_entities_html5 = html.entities.html5 -compat_HTMLParser = html.parser.HTMLParser -compat_http_client = http.client -compat_http_server = http.server -compat_HTTPError = urllib.error.HTTPError -compat_itertools_count = itertools.count -compat_parse_qs = urllib.parse.parse_qs -compat_str = str -compat_struct_pack = struct.pack -compat_struct_unpack = struct.unpack -compat_tokenize_tokenize = tokenize.tokenize -compat_urllib_error = urllib.error -compat_urllib_parse_unquote = urllib.parse.unquote -compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus -compat_urllib_parse_urlencode = urllib.parse.urlencode -compat_urllib_parse_urlparse = urllib.parse.urlparse -compat_urllib_request = urllib.request -compat_urlparse = compat_urllib_parse = urllib.parse - - -# To be removed - Do not use - -compat_basestring = str -compat_collections_abc = collections.abc -compat_cookies = http.cookies -compat_etree_Element = etree.Element -compat_etree_register_namespace = etree.register_namespace -compat_filter = filter -compat_input = input -compat_integer_types = (int, ) -compat_kwargs = lambda kwargs: kwargs -compat_map = map -compat_numeric_types = (int, float, complex) -compat_print = print -compat_shlex_split = shlex.split -compat_socket_create_connection = socket.create_connection -compat_Struct = struct.Struct -compat_subprocess_get_DEVNULL = lambda: DEVNULL -compat_urllib_parse_quote = urllib.parse.quote -compat_urllib_parse_quote_plus = urllib.parse.quote_plus -compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes -compat_urllib_parse_urlunparse = urllib.parse.urlunparse -compat_urllib_request_DataHandler = urllib.request.DataHandler -compat_urllib_response = urllib.response -compat_urlretrieve = urllib.request.urlretrieve -compat_xml_parse_error = etree.ParseError -compat_xpath = lambda xpath: xpath -compat_zip = zip -workaround_optparse_bug9161 = lambda: None - - -# Set public objects - -__all__ = [ - 'WINDOWS_VT_MODE', - 'compat_HTMLParseError', - 'compat_HTMLParser', - 'compat_HTTPError', - 'compat_Match', - 'compat_Pattern', - 'compat_Struct', - 'compat_asyncio_run', - 'compat_b64decode', - 'compat_basestring', - 'compat_brotli', - 'compat_chr', - 'compat_collections_abc', - 'compat_cookiejar', - 'compat_cookiejar_Cookie', - 'compat_cookies', - 'compat_cookies_SimpleCookie', - 'compat_ctypes_WINFUNCTYPE', - 'compat_etree_Element', - 'compat_etree_fromstring', - 'compat_etree_register_namespace', - 'compat_expanduser', - 'compat_filter', - 'compat_get_terminal_size', - 'compat_getenv', - 'compat_getpass', - 'compat_html_entities', - 'compat_html_entities_html5', - 'compat_http_client', - 'compat_http_server', - 'compat_input', - 'compat_integer_types', - 'compat_itertools_count', - 'compat_kwargs', - 'compat_map', - 'compat_numeric_types', - 'compat_ord', - 'compat_os_name', - 'compat_parse_qs', - 'compat_print', - 'compat_pycrypto_AES', - 'compat_realpath', - 'compat_setenv', - 'compat_shlex_quote', - 'compat_shlex_split', - 'compat_socket_create_connection', - 'compat_str', - 'compat_struct_pack', - 'compat_struct_unpack', - 'compat_subprocess_get_DEVNULL', - 'compat_tokenize_tokenize', - 'compat_urllib_error', - 'compat_urllib_parse', - 'compat_urllib_parse_quote', - 'compat_urllib_parse_quote_plus', - 'compat_urllib_parse_unquote', - 'compat_urllib_parse_unquote_plus', - 'compat_urllib_parse_unquote_to_bytes', - 'compat_urllib_parse_urlencode', - 'compat_urllib_parse_urlparse', - 'compat_urllib_parse_urlunparse', - 'compat_urllib_request', - 'compat_urllib_request_DataHandler', - 'compat_urllib_response', - 'compat_urlparse', - 'compat_urlretrieve', - 'compat_websockets', - 'compat_xml_parse_error', - 'compat_xpath', - 'compat_zip', - 'windows_enable_vt_mode', - 'workaround_optparse_bug9161', -] diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py new file mode 100644 index 000000000..7a0e82992 --- /dev/null +++ b/yt_dlp/compat/__init__.py @@ -0,0 +1,129 @@ +import contextlib +import os +import subprocess +import sys +import types +import xml.etree.ElementTree as etree + +from . import re +from ._deprecated import * # noqa: F401, F403 + + +# HTMLParseError has been deprecated in Python 3.3 and removed in +# Python 3.5. Introducing dummy exception for Python >3.5 for compatible +# and uniform cross-version exception handling +class compat_HTMLParseError(Exception): + pass + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + + +def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) + + +compat_os_name = os._name if os.name == 'java' else os.name + + +if compat_os_name == 'nt': + def compat_shlex_quote(s): + return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') +else: + from shlex import quote as compat_shlex_quote # noqa: F401 + + +def compat_ord(c): + return c if isinstance(c, int) else ord(c) + + +def compat_setenv(key, value, env=os.environ): + env[key] = value + + +if compat_os_name == 'nt' and sys.version_info < (3, 8): + # os.path.realpath on Windows does not follow symbolic links + # prior to Python 3.8 (see https://bugs.python.org/issue9949) + def compat_realpath(path): + while os.path.islink(path): + path = os.path.abspath(os.readlink(path)) + return path +else: + compat_realpath = os.path.realpath + + +try: + import websockets as compat_websockets +except ImportError: + compat_websockets = None + +# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl +# See https://github.com/yt-dlp/yt-dlp/issues/792 +# https://docs.python.org/3/library/os.path.html#os.path.expanduser +if compat_os_name in ('nt', 'ce'): + def compat_expanduser(path): + HOME = os.environ.get('HOME') + if not HOME: + return os.path.expanduser(path) + elif not path.startswith('~'): + return path + i = path.replace('\\', '/', 1).find('/') # ~user + if i < 0: + i = len(path) + userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME + return userhome + path[i:] +else: + compat_expanduser = os.path.expanduser + + +try: + from Cryptodome.Cipher import AES as compat_pycrypto_AES +except ImportError: + try: + from Crypto.Cipher import AES as compat_pycrypto_AES + except ImportError: + compat_pycrypto_AES = None + +try: + import brotlicffi as compat_brotli +except ImportError: + try: + import brotli as compat_brotli + except ImportError: + compat_brotli = None + +WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None + + +def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 + if compat_os_name != 'nt': + return + global WINDOWS_VT_MODE + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + with contextlib.suppress(Exception): + subprocess.Popen('', shell=True, startupinfo=startupinfo).wait() + WINDOWS_VT_MODE = True + + +class _PassthroughLegacy(types.ModuleType): + def __getattr__(self, attr): + import importlib + with contextlib.suppress(ImportError): + return importlib.import_module(f'.{attr}', __name__) + + legacy = importlib.import_module('._legacy', __name__) + if not hasattr(legacy, attr): + raise AttributeError(f'module {__name__} has no attribute {attr}') + + # XXX: Implement this the same way as other DeprecationWarnings without circular import + import warnings + warnings.warn(DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=2) + return getattr(legacy, attr) + + +# Python 3.6 does not have module level __getattr__ +# https://peps.python.org/pep-0562/ +sys.modules[__name__].__class__ = _PassthroughLegacy diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py new file mode 100644 index 000000000..f84439825 --- /dev/null +++ b/yt_dlp/compat/_deprecated.py @@ -0,0 +1,47 @@ +"""Deprecated - New code should avoid these""" + +import base64 +import getpass +import html +import html.parser +import http +import http.client +import http.cookiejar +import http.cookies +import http.server +import itertools +import os +import shutil +import struct +import tokenize +import urllib + +compat_b64decode = base64.b64decode +compat_chr = chr +compat_cookiejar = http.cookiejar +compat_cookiejar_Cookie = http.cookiejar.Cookie +compat_cookies_SimpleCookie = http.cookies.SimpleCookie +compat_get_terminal_size = shutil.get_terminal_size +compat_getenv = os.getenv +compat_getpass = getpass.getpass +compat_html_entities = html.entities +compat_html_entities_html5 = html.entities.html5 +compat_HTMLParser = html.parser.HTMLParser +compat_http_client = http.client +compat_http_server = http.server +compat_HTTPError = urllib.error.HTTPError +compat_itertools_count = itertools.count +compat_parse_qs = urllib.parse.parse_qs +compat_str = str +compat_struct_pack = struct.pack +compat_struct_unpack = struct.unpack +compat_tokenize_tokenize = tokenize.tokenize +compat_urllib_error = urllib.error +compat_urllib_parse_unquote = urllib.parse.unquote +compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus +compat_urllib_parse_urlencode = urllib.parse.urlencode +compat_urllib_parse_urlparse = urllib.parse.urlparse +compat_urllib_request = urllib.request +compat_urlparse = compat_urllib_parse = urllib.parse + +__all__ = [x for x in globals() if x.startswith('compat_')] diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py new file mode 100644 index 000000000..f185b7e2f --- /dev/null +++ b/yt_dlp/compat/_legacy.py @@ -0,0 +1,54 @@ +""" Do not use! """ + +import collections +import ctypes +import http +import http.client +import http.cookiejar +import http.cookies +import http.server +import shlex +import socket +import struct +import urllib +import xml.etree.ElementTree as etree +from subprocess import DEVNULL + +from .asyncio import run as compat_asyncio_run # noqa: F401 +from .re import Pattern as compat_Pattern # noqa: F401 +from .re import match as compat_Match # noqa: F401 + + +# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE +# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines +def compat_ctypes_WINFUNCTYPE(*args, **kwargs): + return ctypes.WINFUNCTYPE(*args, **kwargs) + + +compat_basestring = str +compat_collections_abc = collections.abc +compat_cookies = http.cookies +compat_etree_Element = etree.Element +compat_etree_register_namespace = etree.register_namespace +compat_filter = filter +compat_input = input +compat_integer_types = (int, ) +compat_kwargs = lambda kwargs: kwargs +compat_map = map +compat_numeric_types = (int, float, complex) +compat_print = print +compat_shlex_split = shlex.split +compat_socket_create_connection = socket.create_connection +compat_Struct = struct.Struct +compat_subprocess_get_DEVNULL = lambda: DEVNULL +compat_urllib_parse_quote = urllib.parse.quote +compat_urllib_parse_quote_plus = urllib.parse.quote_plus +compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes +compat_urllib_parse_urlunparse = urllib.parse.urlunparse +compat_urllib_request_DataHandler = urllib.request.DataHandler +compat_urllib_response = urllib.response +compat_urlretrieve = urllib.request.urlretrieve +compat_xml_parse_error = etree.ParseError +compat_xpath = lambda xpath: xpath +compat_zip = zip +workaround_optparse_bug9161 = lambda: None diff --git a/yt_dlp/compat/asyncio/__init__.py b/yt_dlp/compat/asyncio/__init__.py new file mode 100644 index 000000000..0e8c6cad3 --- /dev/null +++ b/yt_dlp/compat/asyncio/__init__.py @@ -0,0 +1,16 @@ +# flake8: noqa: F405 + +from asyncio import * # noqa: F403 + +from . import tasks # noqa: F401 + +try: + run # >= 3.7 +except NameError: + def run(coro): + try: + loop = get_event_loop() + except RuntimeError: + loop = new_event_loop() + set_event_loop(loop) + loop.run_until_complete(coro) diff --git a/yt_dlp/compat/asyncio/tasks.py b/yt_dlp/compat/asyncio/tasks.py new file mode 100644 index 000000000..cb31e52fa --- /dev/null +++ b/yt_dlp/compat/asyncio/tasks.py @@ -0,0 +1,8 @@ +# flake8: noqa: F405 + +from asyncio.tasks import * # noqa: F403 + +try: # >= 3.7 + all_tasks +except NameError: + all_tasks = Task.all_tasks diff --git a/yt_dlp/compat/re.py b/yt_dlp/compat/re.py new file mode 100644 index 000000000..e8a6fabbd --- /dev/null +++ b/yt_dlp/compat/re.py @@ -0,0 +1,14 @@ +# flake8: noqa: F405 + +from re import * # F403 + +try: + Pattern # >= 3.7 +except NameError: + Pattern = type(compile('')) + + +try: + Match # >= 3.7 +except NameError: + Match = type(compile('').match('')) diff --git a/yt_dlp/downloader/websocket.py b/yt_dlp/downloader/websocket.py index 6b190cd90..8465f9713 100644 --- a/yt_dlp/downloader/websocket.py +++ b/yt_dlp/downloader/websocket.py @@ -1,4 +1,3 @@ -import asyncio import contextlib import os import signal @@ -15,6 +14,7 @@ else: from .common import FileDownloader from .external import FFmpegFD +from ..compat import asyncio class FFmpegSinkFD(FileDownloader): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 10b297708..3ee5e257c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -7,7 +7,6 @@ import math import netrc import os import random -import re import sys import time import xml.etree.ElementTree @@ -20,13 +19,13 @@ from ..compat import ( compat_getpass, compat_http_client, compat_os_name, - compat_Pattern, compat_str, compat_urllib_error, compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, + re, ) from ..downloader import FileDownloader from ..downloader.f4m import get_base_url, remove_encrypted_media @@ -1198,7 +1197,7 @@ class InfoExtractor: """ if string is None: mobj = None - elif isinstance(pattern, (str, compat_Pattern)): + elif isinstance(pattern, (str, re.Pattern)): mobj = re.search(pattern, string, flags) else: for p in pattern: diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 741622b25..4c222ba8e 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -9,9 +9,8 @@ in RFC 8216 §3.5 . """ import io -import re -from .compat import compat_Match, compat_Pattern +from .compat import re from .utils import int_or_none, timetuple_from_msec @@ -26,7 +25,7 @@ class _MatchParser: self._pos = 0 def match(self, r): - if isinstance(r, compat_Pattern): + if isinstance(r, re.Pattern): return r.match(self._data, self._pos) if isinstance(r, str): if self._data.startswith(r, self._pos): @@ -37,7 +36,7 @@ class _MatchParser: def advance(self, by): if by is None: amt = 0 - elif isinstance(by, compat_Match): + elif isinstance(by, re.Match): amt = len(by.group(0)) elif isinstance(by, str): amt = len(by) From 43cc91ad759d3950c99a905f0ee4937cade10e5c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 18 Apr 2022 04:39:25 +0530 Subject: [PATCH 0961/2552] bugfix for 19a0394044bfad36cd665450271b8eb048a41c02, 3d3bb1688bfc5373105e6bf7c3d4729cf3f78788 --- README.md | 15 ++++++++------- yt_dlp/downloader/common.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 197d7b49b..be713569c 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) * [EMBEDDING YT-DLP](#embedding-yt-dlp) + * [Embedding examples](#embedding-examples) * [DEPRECATED OPTIONS](#deprecated-options) * [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp) * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) @@ -1755,11 +1756,11 @@ with YoutubeDL() as ydl: Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L181). -**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above +**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information) ## Embedding examples -### Extracting information +#### Extracting information ```python import json @@ -1775,7 +1776,7 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: # ℹ️ ydl.sanitize_info makes the info json-serializable print(json.dumps(ydl.sanitize_info(info))) ``` -### Download from info-json +#### Download using an info-json ```python import yt_dlp @@ -1789,7 +1790,7 @@ print('Some videos failed to download' if error_code else 'All videos successfully downloaded') ``` -### Extract audio +#### Extract audio ```python import yt_dlp @@ -1808,7 +1809,7 @@ ydl_opts = { with yt_dlp.YoutubeDL(ydl_opts) as ydl: error_code = ydl.download(URLS) ``` -### Adding logger and progress hook +#### Adding logger and progress hook ```python import yt_dlp @@ -1849,7 +1850,7 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(URLS) ``` -### Add a custom PostProcessor +#### Add a custom PostProcessor ```python import yt_dlp @@ -1869,7 +1870,7 @@ with yt_dlp.YoutubeDL() as ydl: ``` -### Use a custom format selector +#### Use a custom format selector ```python import yt_dlp diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 3e5396988..022a9cd17 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -431,7 +431,7 @@ class FileDownloader: else: min_sleep_interval = self.params.get('sleep_interval') or 0 sleep_interval = random.uniform( - min_sleep_interval, self.params.get('max_sleep_interval', min_sleep_interval)) + min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval) if sleep_interval > 0: self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') time.sleep(sleep_interval) From 1e9969f4f517eab4077f0b03eee9ef3afa493486 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 19 Apr 2022 02:57:20 +0530 Subject: [PATCH 0962/2552] bugfix for a44ca5a470e09b5170fc9c3a46733f050fadbfae, 19a0394044bfad36cd665450271b8eb048a41c02, 77f9033095cd8e1092a80db67f2b577cf13f95a8 Closes #3472 --- yt_dlp/extractor/facebook.py | 6 ++---- yt_dlp/postprocessor/ffmpeg.py | 2 +- yt_dlp/postprocessor/metadataparser.py | 6 +++--- yt_dlp/utils.py | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index f15a36424..de45f9298 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -394,10 +394,8 @@ class FacebookIE(InfoExtractor): r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)] post = traverse_obj(post_data, ( ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] - media = traverse_obj( - post, - (..., 'attachments', ..., 'media', lambda _, m: str(m['id']) == video_id and m['__typename'] == 'Video'), - expected_type=dict) + media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: ( + k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict) title = get_first(media, ('title', 'text')) description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 6fe1b6cdd..d909149ef 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -1151,7 +1151,7 @@ class FFmpegConcatPP(FFmpegPostProcessor): entries = info.get('entries') or [] if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'): return [], info - elif traverse_obj(entries, (..., 'requested_downloads', lambda _, v: len(v) > 1)): + elif traverse_obj(entries, (..., lambda k, v: k == 'requested_downloads' and len(v) > 1)): raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats') in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or [] diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py index 98885bd19..51b927b91 100644 --- a/yt_dlp/postprocessor/metadataparser.py +++ b/yt_dlp/postprocessor/metadataparser.py @@ -6,12 +6,12 @@ from ..utils import Namespace class MetadataParserPP(PostProcessor): def __init__(self, downloader, actions): - super().__init__(self, downloader) + super().__init__(downloader) self._actions = [] for f in actions: action, *args = f assert action in self.Actions - self._actions.append(action(*args)) + self._actions.append(action(self, *args)) @classmethod def validate_action(cls, action, *data): @@ -21,7 +21,7 @@ class MetadataParserPP(PostProcessor): """ if action not in cls.Actions: raise ValueError(f'{action!r} is not a valid action') - getattr(cls, action.value)(cls, *data) # So this can raise error to validate + action(cls, *data) # So this can raise error to validate @staticmethod def field_to_template(tmpl): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index cf52fb2b6..e1db7b868 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -import asyncio import atexit import base64 import binascii @@ -41,6 +40,7 @@ import xml.etree.ElementTree import zlib from .compat import ( + asyncio, compat_brotli, compat_chr, compat_cookiejar, From fdfc8149e168ba769cd16b380287383491635d0e Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Tue, 19 Apr 2022 11:06:55 +0900 Subject: [PATCH 0963/2552] [openrec:movie] Enable fallback for /movie/ URLs Closes #3474 --- yt_dlp/extractor/openrec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 7546c12fb..6c1eb8f3a 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -35,8 +35,8 @@ class OpenRecBaseIE(InfoExtractor): raise ExtractorError(f'Failed to extract {name} info') formats = list(self._expand_media(video_id, get_first(movie_stores, 'media'))) - if not formats and is_live: - # archived livestreams + if not formats: + # archived livestreams or subscriber-only videos cookies = self._get_cookies('https://www.openrec.tv/') detail = self._download_json( f'https://apiv5.openrec.tv/api/v5/movies/{video_id}/detail', video_id, From 6f638d325e1878df304822c6bf4e231e06dae89a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 19 Apr 2022 14:54:12 +0530 Subject: [PATCH 0964/2552] Fix `Makefile` Closes #3467, #35 Authored by: putnam --- Makefile | 9 ++++++--- yt_dlp/extractor/anvato.py | 11 +---------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index b6cb27bb0..9b58c9008 100644 --- a/Makefile +++ b/Makefile @@ -59,15 +59,18 @@ test: offlinetest: codetest $(PYTHON) -m pytest -k "not download" +# XXX: This is hard to maintain +CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat \ + yt_dlp/compat/asyncio yt_dlp/extractor/anvato_token_generator yt-dlp: yt_dlp/*.py yt_dlp/*/*.py mkdir -p zip - for d in yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor ; do \ + for d in $(CODE_FOLDERS) ; do \ mkdir -p zip/$$d ;\ cp -pPR $$d/*.py zip/$$d/ ;\ done - touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py + touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py zip/yt_dlp/*/*/*.py mv zip/yt_dlp/__main__.py zip/ - cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py __main__.py + cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py yt_dlp/*/*/*.py __main__.py rm -rf zip echo '#!$(PYTHON)' > yt-dlp cat yt-dlp.zip >> yt-dlp diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 28fbd606e..09dfffdb0 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -5,6 +5,7 @@ import random import re import time +from .anvato_token_generator import NFLTokenGenerator from .common import InfoExtractor from ..aes import aes_encrypt from ..compat import compat_str @@ -19,16 +20,6 @@ from ..utils import ( unsmuggle_url, ) -# This import causes a ModuleNotFoundError on some systems for unknown reason. -# See issues: -# https://github.com/yt-dlp/yt-dlp/issues/35 -# https://github.com/ytdl-org/youtube-dl/issues/27449 -# https://github.com/animelover1984/youtube-dl/issues/17 -try: - from .anvato_token_generator import NFLTokenGenerator -except ImportError: - NFLTokenGenerator = None - def md5_text(s): if not isinstance(s, compat_str): From 2d3b3feb7e69df0840d06fc1c8b27c5f26de054f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 19 Apr 2022 22:44:11 +0530 Subject: [PATCH 0965/2552] [Olympics] Fix format extension Closes #3481 --- yt_dlp/extractor/olympics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 85f17a2f4..42ea94905 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -53,7 +53,7 @@ class OlympicsReplayIE(InfoExtractor): }) m3u8_url = self._download_json( f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, m3u8_id='hls') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls') self._sort_formats(formats) return { From e08585b0f84368e2cb8c78b271116a2d13f6e032 Mon Sep 17 00:00:00 2001 From: mehq <11481344+mehq@users.noreply.github.com> Date: Wed, 20 Apr 2022 15:43:15 +0600 Subject: [PATCH 0966/2552] [Gofile] Support password-protected links (#3488) Closes #3465 Authored by: mehq --- yt_dlp/extractor/gofile.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index b491b46a5..ddbce2ee8 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -1,3 +1,5 @@ +import hashlib + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -37,6 +39,15 @@ class GofileIE(InfoExtractor): 'id': 'TMjXd9', }, 'playlist_count': 1, + }, { + 'url': 'https://gofile.io/d/gqOtRf', + 'info_dict': { + 'id': 'gqOtRf', + }, + 'playlist_mincount': 1, + 'params': { + 'videopassword': 'password', + }, }] _TOKEN = None @@ -52,14 +63,22 @@ class GofileIE(InfoExtractor): self._set_cookie('gofile.io', 'accountToken', self._TOKEN) def _entries(self, file_id): - files = self._download_json('https://api.gofile.io/getContent', 'Gofile', note='Getting filelist', query={ + query_params = { 'contentId': file_id, 'token': self._TOKEN, 'websiteToken': 12345, - }) + } + password = self.get_param('videopassword') + if password: + query_params['password'] = hashlib.sha256(password.encode('utf-8')).hexdigest() + files = self._download_json( + 'https://api.gofile.io/getContent', file_id, note='Getting filelist', query=query_params) status = files['status'] - if status != 'ok': + if status == 'error-passwordRequired': + raise ExtractorError( + 'This video is protected by a password, use the --video-password option', expected=True) + elif status != 'ok': raise ExtractorError(f'{self.IE_NAME} said: status {status}', expected=True) found_files = False From 62f6f1cbf253240a026a70538b5b58945563fc90 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 20 Apr 2022 02:25:33 +0530 Subject: [PATCH 0967/2552] Don't imply `-s` for later stages of `-O` --- README.md | 7 ++++--- yt_dlp/__init__.py | 15 ++++++++------- yt_dlp/options.py | 3 ++- yt_dlp/utils.py | 2 +- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index be713569c..d401acb21 100644 --- a/README.md +++ b/README.md @@ -690,9 +690,10 @@ You can also fork the project on github and run your fork's [build workflow](.gi print it, separated by a ":". Supported values of "WHEN" are the same as that of --use-postprocessor, and "video" (default). - Implies --quiet and --simulate (unless - --no-simulate is used). This option can be - used multiple times + Implies --quiet. Implies --simulate unless + --no-simulate or later stages of WHEN are + used. This option can be used multiple + times --print-to-file [WHEN:]TEMPLATE FILE Append given template to the file. The values of WHEN and TEMPLATE are same as diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 9ea13ad37..dc2f905c7 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -28,6 +28,7 @@ from .postprocessor import ( from .update import run_update from .utils import ( NO_DEFAULT, + POSTPROCESS_WHEN, DateRange, DownloadCancelled, DownloadError, @@ -618,11 +619,11 @@ def parse_options(argv=None): postprocessors = list(get_postprocessors(opts)) - any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json - or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail - or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration) - - any_printing = opts.print_json + print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[2:]) + any_getting = any(getattr(opts, k) for k in ( + 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', + 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' + )) final_ext = ( opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS @@ -640,7 +641,7 @@ def parse_options(argv=None): 'ap_mso': opts.ap_mso, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, - 'quiet': (opts.quiet or any_getting or any_printing), + 'quiet': opts.quiet or any_getting or opts.print_json or bool(opts.forceprint), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, @@ -655,7 +656,7 @@ def parse_options(argv=None): 'forcejson': opts.dumpjson or opts.print_json, 'dump_single_json': opts.dump_single_json, 'force_write_download_archive': opts.force_write_download_archive, - 'simulate': (any_getting or None) if opts.simulate is None else opts.simulate, + 'simulate': (print_only or any_getting or None) if opts.simulate is None else opts.simulate, 'skip_download': opts.skip_download, 'format': opts.format, 'allow_unplayable_formats': opts.allow_unplayable_formats, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 0c042caf4..73bc88b89 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -932,7 +932,8 @@ def create_parser(): }, help=( 'Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". ' 'Supported values of "WHEN" are the same as that of --use-postprocessor, and "video" (default). ' - 'Implies --quiet and --simulate (unless --no-simulate is used). This option can be used multiple times')) + 'Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. ' + 'This option can be used multiple times')) verbosity.add_option( '--print-to-file', metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', default={}, type='str', nargs=2, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index e1db7b868..ccea3c4e6 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3013,7 +3013,7 @@ def qualities(quality_ids): return q -POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'} +POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist') DEFAULT_OUTTMPL = { From 9b8ee23b99de91f9e463050baddfd76fa6580ad6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 21 Apr 2022 00:35:57 +0530 Subject: [PATCH 0968/2552] [dependencies] Create module with all dependency imports --- test/test_aes.py | 6 +- yt_dlp/YoutubeDL.py | 22 ++------ yt_dlp/aes.py | 9 +-- yt_dlp/compat/__init__.py | 21 ------- yt_dlp/compat/_legacy.py | 3 + yt_dlp/cookies.py | 35 +++--------- yt_dlp/dependencies.py | 77 ++++++++++++++++++++++++++ yt_dlp/downloader/hls.py | 5 +- yt_dlp/downloader/websocket.py | 10 +--- yt_dlp/extractor/fc2.py | 4 +- yt_dlp/extractor/twitcasting.py | 4 +- yt_dlp/postprocessor/embedthumbnail.py | 20 +++---- yt_dlp/utils.py | 25 +++------ 13 files changed, 128 insertions(+), 113 deletions(-) create mode 100644 yt_dlp/dependencies.py diff --git a/test/test_aes.py b/test/test_aes.py index 1c1238c8b..c934104e3 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -23,7 +23,7 @@ from yt_dlp.aes import ( aes_gcm_decrypt_and_verify, aes_gcm_decrypt_and_verify_bytes, ) -from yt_dlp.compat import compat_pycrypto_AES +from yt_dlp.dependencies import Cryptodome_AES from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' @@ -45,7 +45,7 @@ class TestAES(unittest.TestCase): data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) - if compat_pycrypto_AES: + if Cryptodome_AES: decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) @@ -75,7 +75,7 @@ class TestAES(unittest.TestCase): decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) - if compat_pycrypto_AES: + if Cryptodome_AES: decrypted = aes_gcm_decrypt_and_verify_bytes( data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 155b5a063..9acd88171 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -27,10 +27,8 @@ from string import ascii_letters from .cache import Cache from .compat import ( - compat_brotli, compat_get_terminal_size, compat_os_name, - compat_pycrypto_AES, compat_shlex_quote, compat_str, compat_urllib_error, @@ -109,7 +107,6 @@ from .utils import ( format_field, formatSeconds, get_domain, - has_certifi, int_or_none, iri_to_uri, join_nonempty, @@ -3656,20 +3653,11 @@ class YoutubeDL: ) or 'none' write_debug('exe versions: %s' % exe_str) - from .cookies import SECRETSTORAGE_AVAILABLE, SQLITE_AVAILABLE - from .downloader.websocket import has_websockets - from .postprocessor.embedthumbnail import has_mutagen - - lib_str = join_nonempty( - compat_brotli and compat_brotli.__name__, - has_certifi and 'certifi', - compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], - SECRETSTORAGE_AVAILABLE and 'secretstorage', - has_mutagen and 'mutagen', - SQLITE_AVAILABLE and 'sqlite', - has_websockets and 'websockets', - delim=', ') or 'none' - write_debug('Optional libraries: %s' % lib_str) + from .dependencies import available_dependencies + + write_debug('Optional libraries: %s' % (', '.join(sorted({ + module.__name__.split('.')[0] for module in available_dependencies.values() + })) or 'none')) self._setup_opener() proxy_map = {} diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index 603f3d187..ba3baf3de 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -1,16 +1,17 @@ from math import ceil -from .compat import compat_b64decode, compat_ord, compat_pycrypto_AES +from .compat import compat_b64decode, compat_ord +from .dependencies import Cryptodome_AES from .utils import bytes_to_intlist, intlist_to_bytes -if compat_pycrypto_AES: +if Cryptodome_AES: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using pycryptodome """ - return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_CBC, iv).decrypt(data) + return Cryptodome_AES.new(key, Cryptodome_AES.MODE_CBC, iv).decrypt(data) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using pycryptodome """ - return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) + return Cryptodome_AES.new(key, Cryptodome_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) else: def aes_cbc_decrypt_bytes(data, key, iv): diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index 7a0e82992..56a65bb6c 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -54,11 +54,6 @@ else: compat_realpath = os.path.realpath -try: - import websockets as compat_websockets -except ImportError: - compat_websockets = None - # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # See https://github.com/yt-dlp/yt-dlp/issues/792 # https://docs.python.org/3/library/os.path.html#os.path.expanduser @@ -78,22 +73,6 @@ else: compat_expanduser = os.path.expanduser -try: - from Cryptodome.Cipher import AES as compat_pycrypto_AES -except ImportError: - try: - from Crypto.Cipher import AES as compat_pycrypto_AES - except ImportError: - compat_pycrypto_AES = None - -try: - import brotlicffi as compat_brotli -except ImportError: - try: - import brotli as compat_brotli - except ImportError: - compat_brotli = None - WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index f185b7e2f..ce24760e5 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -17,6 +17,9 @@ from subprocess import DEVNULL from .asyncio import run as compat_asyncio_run # noqa: F401 from .re import Pattern as compat_Pattern # noqa: F401 from .re import match as compat_Match # noqa: F401 +from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401 +from ..dependencies import brotli as compat_brotli # noqa: F401 +from ..dependencies import websockets as compat_websockets # noqa: F401 # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 8a4baa5bb..621c91e86 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -17,31 +17,14 @@ from .aes import ( unpad_pkcs7, ) from .compat import compat_b64decode, compat_cookiejar_Cookie +from .dependencies import ( + _SECRETSTORAGE_UNAVAILABLE_REASON, + secretstorage, + sqlite3, +) from .minicurses import MultilinePrinter, QuietMultilinePrinter from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path -try: - import sqlite3 - SQLITE_AVAILABLE = True -except ImportError: - # although sqlite3 is part of the standard library, it is possible to compile python without - # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 - SQLITE_AVAILABLE = False - - -try: - import secretstorage - SECRETSTORAGE_AVAILABLE = True -except ImportError: - SECRETSTORAGE_AVAILABLE = False - SECRETSTORAGE_UNAVAILABLE_REASON = ( - 'as the `secretstorage` module is not installed. ' - 'Please install by running `python3 -m pip install secretstorage`.') -except Exception as _err: - SECRETSTORAGE_AVAILABLE = False - SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' - - CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} @@ -122,7 +105,7 @@ def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), def _extract_firefox_cookies(profile, logger): logger.info('Extracting cookies from firefox') - if not SQLITE_AVAILABLE: + if not sqlite3: logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' 'Please use a python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() @@ -236,7 +219,7 @@ def _get_chromium_based_browser_settings(browser_name): def _extract_chrome_cookies(browser_name, profile, keyring, logger): logger.info(f'Extracting cookies from {browser_name}') - if not SQLITE_AVAILABLE: + if not sqlite3: logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' 'Please use a python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() @@ -806,8 +789,8 @@ def _get_kwallet_password(browser_keyring_name, logger): def _get_gnome_keyring_password(browser_keyring_name, logger): - if not SECRETSTORAGE_AVAILABLE: - logger.error(f'secretstorage not available {SECRETSTORAGE_UNAVAILABLE_REASON}') + if not secretstorage: + logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}') return b'' # the Gnome keyring does not seem to organise keys in the same way as KWallet, # using `dbus-monitor` during startup, it can be observed that chromium lists all keys diff --git a/yt_dlp/dependencies.py b/yt_dlp/dependencies.py new file mode 100644 index 000000000..99cc6e29c --- /dev/null +++ b/yt_dlp/dependencies.py @@ -0,0 +1,77 @@ +# flake8: noqa: F401 + +try: + import brotlicffi as brotli +except ImportError: + try: + import brotli + except ImportError: + brotli = None + + +try: + import certifi +except ImportError: + certifi = None +else: + from os.path import exists as _path_exists + + # The certificate may not be bundled in executable + if not _path_exists(certifi.where()): + certifi = None + + +try: + from Cryptodome.Cipher import AES as Cryptodome_AES +except ImportError: + try: + from Crypto.Cipher import AES as Cryptodome_AES + except ImportError: + Cryptodome_AES = None + + +try: + import mutagen +except ImportError: + mutagen = None + + +secretstorage = None +try: + import secretstorage + _SECRETSTORAGE_UNAVAILABLE_REASON = None +except ImportError: + _SECRETSTORAGE_UNAVAILABLE_REASON = ( + 'as the `secretstorage` module is not installed. ' + 'Please install by running `python3 -m pip install secretstorage`') +except Exception as _err: + _SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' + + +try: + import sqlite3 +except ImportError: + # although sqlite3 is part of the standard library, it is possible to compile python without + # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 + sqlite3 = None + + +try: + import websockets +except (ImportError, SyntaxError): + # websockets 3.10 on python 3.6 causes SyntaxError + # See https://github.com/yt-dlp/yt-dlp/issues/2633 + websockets = None + + +all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')} + + +available_dependencies = {k: v for k, v in all_dependencies.items() if v} + + +__all__ = [ + 'all_dependencies', + 'available_dependencies', + *all_dependencies.keys(), +] diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 2d65f48ae..694c843f3 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -5,7 +5,8 @@ import re from .external import FFmpegFD from .fragment import FragmentFD from .. import webvtt -from ..compat import compat_pycrypto_AES, compat_urlparse +from ..compat import compat_urlparse +from ..dependencies import Cryptodome_AES from ..downloader import get_suitable_downloader from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query @@ -60,7 +61,7 @@ class HlsFD(FragmentFD): s = urlh.read().decode('utf-8', 'ignore') can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None - if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: + if can_download and not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s: if FFmpegFD.available(): can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' else: diff --git a/yt_dlp/downloader/websocket.py b/yt_dlp/downloader/websocket.py index 8465f9713..eb1b99b45 100644 --- a/yt_dlp/downloader/websocket.py +++ b/yt_dlp/downloader/websocket.py @@ -3,18 +3,10 @@ import os import signal import threading -try: - import websockets -except (ImportError, SyntaxError): - # websockets 3.10 on python 3.6 causes SyntaxError - # See https://github.com/yt-dlp/yt-dlp/issues/2633 - has_websockets = False -else: - has_websockets = True - from .common import FileDownloader from .external import FFmpegFD from ..compat import asyncio +from ..dependencies import websockets class FFmpegSinkFD(FileDownloader): diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index a4c9793bb..225677b00 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -4,10 +4,10 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, ) +from ..dependencies import websockets from ..utils import ( ExtractorError, WebSocketsWrapper, - has_websockets, js_to_json, sanitized_Request, std_headers, @@ -170,7 +170,7 @@ class FC2LiveIE(InfoExtractor): }] def _real_extract(self, url): - if not has_websockets: + if not websockets: raise ExtractorError('websockets library is not available. Please install it.', expected=True) video_id = self._match_id(url) webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 3d6a12265..07565383a 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -2,7 +2,7 @@ import itertools import re from .common import InfoExtractor -from ..downloader.websocket import has_websockets +from ..dependencies import websockets from ..utils import ( clean_html, ExtractorError, @@ -161,7 +161,7 @@ class TwitCastingIE(InfoExtractor): note='Downloading source quality m3u8', headers=self._M3U8_HEADERS, fatal=False)) - if has_websockets: + if websockets: qq = qualities(['base', 'mobilesource', 'main']) streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {} for mode, ws_url in streams.items(): diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 5469f25e0..c5ea76893 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -4,17 +4,9 @@ import os import re import subprocess -try: - from mutagen.flac import FLAC, Picture - from mutagen.mp4 import MP4, MP4Cover - from mutagen.oggopus import OggOpus - from mutagen.oggvorbis import OggVorbis - has_mutagen = True -except ImportError: - has_mutagen = False - from .common import PostProcessor from .ffmpeg import FFmpegPostProcessor, FFmpegThumbnailsConvertorPP +from ..dependencies import mutagen from ..utils import ( Popen, PostProcessingError, @@ -26,6 +18,12 @@ from ..utils import ( shell_quote, ) +if mutagen: + from mutagen.flac import FLAC, Picture + from mutagen.mp4 import MP4, MP4Cover + from mutagen.oggopus import OggOpus + from mutagen.oggvorbis import OggVorbis + class EmbedThumbnailPPError(PostProcessingError): pass @@ -121,7 +119,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): elif info['ext'] in ['m4a', 'mp4', 'mov']: prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', []) # Method 1: Use mutagen - if not has_mutagen or prefer_atomicparsley: + if not mutagen or prefer_atomicparsley: success = False else: try: @@ -194,7 +192,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): raise EmbedThumbnailPPError(f'Unable to embed using ffprobe & ffmpeg; {err}') elif info['ext'] in ['ogg', 'opus', 'flac']: - if not has_mutagen: + if not mutagen: raise EmbedThumbnailPPError('module mutagen was not found. Please install using `python -m pip install mutagen`') self._report_run('mutagen', filename) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ccea3c4e6..7f0c055ac 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -41,7 +41,6 @@ import zlib from .compat import ( asyncio, - compat_brotli, compat_chr, compat_cookiejar, compat_etree_fromstring, @@ -64,18 +63,10 @@ from .compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urlparse, - compat_websockets, ) +from .dependencies import brotli, certifi, websockets from .socks import ProxyType, sockssocket -try: - import certifi - - # The certificate may not be bundled in executable - has_certifi = os.path.exists(certifi.where()) -except ImportError: - has_certifi = False - def register_socks_protocols(): # "Register" SOCKS protocols @@ -138,7 +129,7 @@ def random_user_agent(): SUPPORTED_ENCODINGS = [ 'gzip', 'deflate' ] -if compat_brotli: +if brotli: SUPPORTED_ENCODINGS.append('br') std_headers = { @@ -1267,7 +1258,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): def brotli(data): if not data: return data - return compat_brotli.decompress(data) + return brotli.decompress(data) def http_request(self, req): # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not @@ -5231,7 +5222,7 @@ class WebSocketsWrapper(): def __init__(self, url, headers=None, connect=True): self.loop = asyncio.events.new_event_loop() - self.conn = compat_websockets.connect( + self.conn = websockets.connect( url, extra_headers=headers, ping_interval=None, close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) if connect: @@ -5294,9 +5285,6 @@ class WebSocketsWrapper(): }) -has_websockets = bool(compat_websockets) - - def merge_headers(*dicts): """Merge dicts of http headers case insensitively, prioritizing the latter ones""" return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} @@ -5312,3 +5300,8 @@ class classproperty: def Namespace(**kwargs): return collections.namedtuple('Namespace', kwargs)(**kwargs) + + +# Deprecated +has_certifi = bool(certifi) +has_websockets = bool(websockets) From 7774db5bf9cb1d3ff1d09f0ba5d431643fdff030 Mon Sep 17 00:00:00 2001 From: Evan Spensley <94762716+evansp@users.noreply.github.com> Date: Thu, 21 Apr 2022 17:26:10 -0400 Subject: [PATCH 0969/2552] [EmbedThumbnail] Disable thumbnail conversion for mkv (#3512) Closes #3209 Authored by: evansp --- yt_dlp/postprocessor/embedthumbnail.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index c5ea76893..caa841b2e 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -79,12 +79,10 @@ class EmbedThumbnailPP(FFmpegPostProcessor): original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath'] - # Convert unsupported thumbnail formats to PNG (see #25687, #25717) - # Original behavior was to convert to JPG, but since JPG is a lossy - # format, there will be some additional data loss. - # PNG, on the other hand, is lossless. thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:] - if thumbnail_ext not in ('jpg', 'jpeg', 'png'): + # Convert unsupported thumbnail formats (see #25687, #25717) + # PNG is preferred since JPEG is lossy + if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'): thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png') thumbnail_ext = 'png' @@ -102,7 +100,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): elif info['ext'] in ['mkv', 'mka']: options = list(self.stream_copy_opts()) - mimetype = 'image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg') + mimetype = 'image/%s' % ('jpeg' if thumbnail_ext in ('jpg', 'jpeg') else thumbnail_ext) old_stream, new_stream = self.get_stream_number( filename, ('tags', 'mimetype'), mimetype) if old_stream is not None: From d14b920c330b00ca50a66bf471b9f901ebc16212 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Fri, 22 Apr 2022 20:45:52 +0700 Subject: [PATCH 0970/2552] [PearVideo] Add fallback for formats (#3438) Closes #3425 Authored by: hatienl0i261299 --- yt_dlp/extractor/pearvideo.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py index d552e0966..e76305acd 100644 --- a/yt_dlp/extractor/pearvideo.py +++ b/yt_dlp/extractor/pearvideo.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( qualities, unified_timestamp, + traverse_obj, ) @@ -36,6 +37,14 @@ class PearVideoIE(InfoExtractor): } for mobj in re.finditer( r'(?P[a-zA-Z]+)Url\s*=\s*(["\'])(?P(?:https?:)?//.+?)\2', webpage)] + if not formats: + info = self._download_json( + 'https://www.pearvideo.com/videoStatus.jsp', video_id=video_id, + query={'contId': video_id}, headers={'Referer': url}) + formats = [{ + 'format_id': k, + 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v + } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v] self._sort_formats(formats) title = self._search_regex( From b0f636beb46411d454e4f14ae5372d672c798701 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Apr 2022 22:15:00 +0530 Subject: [PATCH 0971/2552] [Sponsorblock] Don't crash when duration is unknown CLoses #3529 --- yt_dlp/postprocessor/sponsorblock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 7749ffe05..7f75561db 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -38,7 +38,7 @@ class SponsorBlockPP(FFmpegPostProcessor): return [], info self.to_screen('Fetching SponsorBlock segments') - info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration']) + info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration')) return [], info def _get_sponsor_chapters(self, info, duration): From 90f42294096d4fc38fb4355564c083733d638b0d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Apr 2022 22:15:38 +0530 Subject: [PATCH 0972/2552] [telegram] Fix metadata extraction Closes #3528 --- yt_dlp/extractor/telegram.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py index 2dfa261e9..bb9ca8c45 100644 --- a/yt_dlp/extractor/telegram.py +++ b/yt_dlp/extractor/telegram.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..utils import clean_html, get_element_by_class class TelegramEmbedIE(InfoExtractor): @@ -17,8 +18,8 @@ class TelegramEmbedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - webpage_embed = self._download_webpage(f'{url}?embed=1', video_id) + webpage = self._download_webpage(url, video_id, query={'embed': 0}) + webpage_embed = self._download_webpage(url, video_id, query={'embed': 1}, note='Downloading ermbed page') formats = [{ 'url': self._proto_relative_url(self._search_regex( @@ -29,9 +30,12 @@ class TelegramEmbedIE(InfoExtractor): return { 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True), - 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True), - 'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', - webpage_embed, 'thumbnail'), + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), + 'description': self._html_search_meta( + ['og:description', 'twitter:description'], webpage, + default=clean_html(get_element_by_class('tgme_widget_message_text', webpage_embed))), + 'thumbnail': self._search_regex( + r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', + webpage_embed, 'thumbnail'), 'formats': formats, } From 6534298b120b282e3ef258d82baa7c1ff7552269 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Apr 2022 22:32:23 +0530 Subject: [PATCH 0973/2552] [build] Avoid use of `install -D` Closes #3429 --- Makefile | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 9b58c9008..146df1906 100644 --- a/Makefile +++ b/Makefile @@ -43,11 +43,16 @@ SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then ech MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) install: lazy-extractors yt-dlp yt-dlp.1 completions - install -Dm755 yt-dlp $(DESTDIR)$(BINDIR)/yt-dlp - install -Dm644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 - install -Dm644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp - install -Dm644 completions/zsh/_yt-dlp $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp - install -Dm644 completions/fish/yt-dlp.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish + mkdir -p $(DESTDIR)$(BINDIR) + install -m755 yt-dlp $(DESTDIR)$(BINDIR)/yt-dlp + mkdir -p $(DESTDIR)$(MANDIR)/man1 + install -m644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 + mkdir -p $(DESTDIR)$(SHAREDIR)/bash-completion/completions + install -m644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp + mkdir -p $(DESTDIR)$(SHAREDIR)/zsh/site-functions + install -m644 completions/zsh/_yt-dlp $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp + mkdir -p $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d + install -m644 completions/fish/yt-dlp.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish codetest: flake8 . From a1ddaa899ca8693f31f34770f7263ace7e8c8841 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 22 Apr 2022 13:16:24 +0530 Subject: [PATCH 0974/2552] [hotstar] Refactor extractors Closes #3517 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/hotstar.py | 231 ++++++++++++++++++--------------- 2 files changed, 124 insertions(+), 108 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d67b2eeec..a4ccf07a4 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -616,6 +616,7 @@ from .hitrecord import HitRecordIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, + HotStarPrefixIE, HotStarPlaylistIE, HotStarSeriesIE, ) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index d82e1aead..fe16de665 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -14,6 +14,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + join_nonempty, str_or_none, try_get, url_or_none, @@ -21,6 +22,8 @@ from ..utils import ( class HotStarBaseIE(InfoExtractor): + _BASE_URL = 'https://www.hotstar.com' + _API_URL = 'https://api.hotstar.com' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' def _call_api_impl(self, path, video_id, query, st=None, cookies=None): @@ -33,7 +36,7 @@ class HotStarBaseIE(InfoExtractor): token = cookies.get('userUP').value else: token = self._download_json( - 'https://api.hotstar.com/um/v3/users', + f'{self._API_URL}/um/v3/users', video_id, note='Downloading token', data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'), headers={ @@ -43,12 +46,13 @@ class HotStarBaseIE(InfoExtractor): })['user_identity'] response = self._download_json( - 'https://api.hotstar.com/' + path, video_id, headers={ + f'{self._API_URL}/{path}', video_id, query=query, + headers={ 'hotstarauth': auth, 'x-hs-appversion': '6.72.2', 'x-hs-platform': 'web', 'x-hs-usertoken': token, - }, query=query) + }) if response['message'] != "Playback URL's fetched successfully": raise ExtractorError( @@ -56,17 +60,20 @@ class HotStarBaseIE(InfoExtractor): return response['data'] def _call_api(self, path, video_id, query_name='contentId'): - return self._download_json('https://api.hotstar.com/' + path, video_id=video_id, query={ - query_name: video_id, - 'tas': 10000, - }, headers={ - 'x-country-code': 'IN', - 'x-platform-code': 'PCTV', - }) + return self._download_json( + f'{self._API_URL}/{path}', video_id=video_id, + query={ + query_name: video_id, + 'tas': 10000, + }, headers={ + 'x-country-code': 'IN', + 'x-platform-code': 'PCTV', + }) - def _call_api_v2(self, path, video_id, st=None, cookies=None): + def _call_api_v2(self, path, video_id, st=None): + cookies = self._get_cookies(self._BASE_URL) return self._call_api_impl( - '%s/content/%s' % (path, video_id), video_id, st=st, cookies=cookies, query={ + f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={ 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265', 'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()), 'os-name': 'Windows', @@ -77,24 +84,15 @@ class HotStarBaseIE(InfoExtractor): class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' _VALID_URL = r'''(?x) - (?: - hotstar\:| - https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/) - ) - (?: - (?Pmovies|sports|episode|(?Ptv)) - (?: - \:| - /[^/?#]+/ - (?(tv) - (?:[^/?#]+/){2}| - (?:[^/?#]+/)* - ) - )| - [^/?#]+/ - )? - (?P\d{10}) - ''' + https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/) + (?: + (?Pmovies|sports|episode|(?Ptv))/ + (?(tv)(?:[^/?#]+/){2}|[^?#]*) + )? + [^/?#]+/ + (?P\d{10}) + ''' + _TESTS = [{ 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', 'info_dict': { @@ -105,38 +103,8 @@ class HotStarIE(HotStarBaseIE): 'timestamp': 1447248600, 'upload_date': '20151111', 'duration': 381, + 'episode': 'Can You Not Spread Rumours?', }, - }, { - 'url': 'hotstar:1000076273', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', - 'info_dict': { - 'id': '1000057157', - 'ext': 'mp4', - 'title': 'Radha Gopalam', - 'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22', - 'timestamp': 1140805800, - 'upload_date': '20060224', - 'duration': 9182, - }, - }, { - 'url': 'hotstar:movies:1000057157', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956', - 'only_matching': True, - }, { - # contentData - 'url': 'hotstar:sports:1260065956', - 'only_matching': True, - }, { - # contentData - 'url': 'hotstar:sports:1260066104', - 'only_matching': True, }, { 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847', 'info_dict': { @@ -155,12 +123,19 @@ class HotStarIE(HotStarBaseIE): 'season_id': 6771, 'episode': 'Janhvi Targets Suman', 'episode_number': 8, - }, + } }, { - 'url': 'hotstar:episode:1000234847', + 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', + 'only_matching': True, + }, { + 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104', + 'only_matching': True, + }, { + 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956', 'only_matching': True, }] _GEO_BYPASS = False + _TYPE = { 'movies': 'movie', 'sports': 'match', @@ -169,41 +144,52 @@ class HotStarIE(HotStarBaseIE): None: 'content', } + _IGNORE_MAP = { + 'res': 'resolution', + 'vcodec': 'video_codec', + 'dr': 'dynamic_range', + } + + @classmethod + def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None): + assert None in (video_type, root) + if not root: + root = join_nonempty(cls._BASE_URL, video_type, delim='/') + return f'{root}/{slug}/{video_id}' + def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - video_type = mobj.group('type') - cookies = self._get_cookies(url) + video_id, video_type = self._match_valid_url(url).group('id', 'type') video_type = self._TYPE.get(video_type, video_type) - video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item'] - title = video_data['title'] + video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item'] if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): self.report_drm(video_id) - headers = {'Referer': 'https://www.hotstar.com/in'} - formats = [] - subs = {} + # See https://github.com/yt-dlp/yt-dlp/issues/396 + st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date') + geo_restricted = False - _, urlh = self._download_webpage_handle('https://www.hotstar.com/in', video_id) - # Required to fix https://github.com/yt-dlp/yt-dlp/issues/396 - st = urlh.headers.get('x-origin-date') + formats, subs = [], {} + headers = {'Referer': f'{self._BASE_URL}/in'} + # change to v2 in the future - playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets'] + playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st)['playBackSets'] for playback_set in playback_sets: if not isinstance(playback_set, dict): continue - dr = re.search(r'dynamic_range:(?P[a-z]+)', playback_set.get('tagsCombination')).group('dr') + tags = str_or_none(playback_set.get('tagsCombination')) or '' + if any(f'{prefix}:{ignore}' in tags + for key, prefix in self._IGNORE_MAP.items() + for ignore in self._configuration_arg(key)): + continue + format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: continue - format_url = re.sub( - r'(?<=//staragvod)(\d)', r'web\1', format_url) - tags = str_or_none(playback_set.get('tagsCombination')) or '' - ingored_res, ignored_vcodec, ignored_dr = self._configuration_arg('res'), self._configuration_arg('vcodec'), self._configuration_arg('dr') - if any(f'resolution:{ig_res}' in tags for ig_res in ingored_res) or any(f'video_codec:{ig_vc}' in tags for ig_vc in ignored_vcodec) or any(f'dynamic_range:{ig_dr}' in tags for ig_dr in ignored_dr): - continue + format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url) + dr = re.search(r'dynamic_range:(?P[a-z]+)', playback_set.get('tagsCombination')).group('dr') ext = determine_ext(format_url) + current_formats, current_subs = [], {} try: if 'package:hls' in tags or ext == 'm3u8': @@ -215,8 +201,7 @@ class HotStarIE(HotStarBaseIE): current_formats, current_subs = self._extract_mpd_formats_and_subtitles( format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) elif ext == 'f4m': - # produce broken files - pass + pass # XXX: produce broken files else: current_formats = [{ 'url': format_url, @@ -227,6 +212,7 @@ class HotStarIE(HotStarBaseIE): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: geo_restricted = True continue + if tags and 'encryption:plain' not in tags: for f in current_formats: f['has_drm'] = True @@ -235,18 +221,19 @@ class HotStarIE(HotStarBaseIE): for f in current_formats: if not f.get('langauge'): f['language'] = lang + formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) + if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) self._sort_formats(formats) - for f in formats: f.setdefault('http_headers', {}).update(headers) return { 'id': video_id, - 'title': title, + 'title': video_data.get('title'), 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), @@ -258,14 +245,48 @@ class HotStarIE(HotStarBaseIE): 'season': video_data.get('seasonName'), 'season_number': int_or_none(video_data.get('seasonNo')), 'season_id': video_data.get('seasonId'), - 'episode': title, + 'episode': video_data.get('title'), 'episode_number': int_or_none(video_data.get('episodeNo')), - 'http_headers': { - 'Referer': 'https://www.hotstar.com/in', - } } +class HotStarPrefixIE(InfoExtractor): + """ The "hotstar:" prefix is no longer in use, but this is kept for backward compatibility """ + IE_DESC = False + _VALID_URL = r'hotstar:(?:(?P\w+):)?(?P\d+)$' + _TESTS = [{ + 'url': 'hotstar:1000076273', + 'only_matching': True, + }, { + 'url': 'hotstar:movies:1000057157', + 'info_dict': { + 'id': '1000057157', + 'ext': 'mp4', + 'title': 'Radha Gopalam', + 'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22', + 'timestamp': 1140805800, + 'upload_date': '20060224', + 'duration': 9182, + 'episode': 'Radha Gopalam', + }, + }, { + 'url': 'hotstar:episode:1000234847', + 'only_matching': True, + }, { + # contentData + 'url': 'hotstar:sports:1260065956', + 'only_matching': True, + }, { + # contentData + 'url': 'hotstar:sports:1260066104', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, video_type = self._match_valid_url(url).group('id', 'type') + return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id) + + class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P\w+)' @@ -285,11 +306,8 @@ class HotStarPlaylistIE(HotStarBaseIE): collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')['body']['results'] entries = [ - self.url_result( - 'https://www.hotstar.com/%s' % video['contentId'], - ie=HotStarIE.ie_key(), video_id=video['contentId']) - for video in collection['assets']['items'] - if video.get('contentId')] + self.url_result(HotStarIE._video_url(video['contentId']), HotStarIE, video['contentId']) + for video in collection['assets']['items'] if video.get('contentId')] return self.playlist_result(entries, playlist_id) @@ -323,16 +341,13 @@ class HotStarSeriesIE(HotStarBaseIE): 'x-country-code': 'IN', 'x-platform-code': 'PCTV', } - detail_json = self._download_json('https://api.hotstar.com/o/v1/show/detail?contentId=' + series_id, - video_id=series_id, headers=headers) - id = compat_str(try_get(detail_json, lambda x: x['body']['results']['item']['id'], int)) - item_json = self._download_json('https://api.hotstar.com/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid=' + id, - video_id=series_id, headers=headers) - entries = [ - self.url_result( - '%s/ignoreme/%d' % (url, video['contentId']), - ie=HotStarIE.ie_key(), video_id=video['contentId']) - for video in item_json['body']['results']['items'] - if video.get('contentId')] + detail_json = self._download_json( + f'{self._API_URL}/o/v1/show/detail?contentId={series_id}', series_id, headers=headers) + id = try_get(detail_json, lambda x: x['body']['results']['item']['id'], int) + item_json = self._download_json( + f'{self._API_URL}/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid={id}', series_id, headers=headers) - return self.playlist_result(entries, series_id) + return self.playlist_result([ + self.url_result(HotStarIE._video_url(video['contentId'], root=url), HotStarIE, video['contentId']) + for video in item_json['body']['results']['items'] if video.get('contentId') + ], series_id) From 52c2af8298ef1593adf6843d47fe6e9daf2a1758 Mon Sep 17 00:00:00 2001 From: Teemu Ikonen Date: Sun, 24 Apr 2022 04:18:04 +0300 Subject: [PATCH 0975/2552] [icareus] Add extractor (#3320) Authored by: tpikonen, pukkandan --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/icareus.py | 180 +++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 yt_dlp/extractor/icareus.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a4ccf07a4..952738884 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -640,6 +640,7 @@ from .hungama import ( HungamaAlbumPlaylistIE, ) from .hypem import HypemIE +from .icareus import IcareusIE from .ichinanalive import ( IchinanaLiveIE, IchinanaLiveClipIE, diff --git a/yt_dlp/extractor/icareus.py b/yt_dlp/extractor/icareus.py new file mode 100644 index 000000000..dc7a2f0ba --- /dev/null +++ b/yt_dlp/extractor/icareus.py @@ -0,0 +1,180 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + determine_ext, + get_element_by_class, + int_or_none, + merge_dicts, + parse_bitrate, + parse_resolution, + remove_end, + str_or_none, + url_or_none, + urlencode_postdata, +) + + +class IcareusIE(InfoExtractor): + _DOMAINS = '|'.join(map(re.escape, ( + 'asahitv.fi', + 'helsinkikanava.fi', + 'hyvinvointitv.fi', + 'inez.fi', + 'permanto.fi', + 'suite.icareus.com', + 'videos.minifiddlers.org', + ))) + _VALID_URL = rf'(?Phttps?://(?:www\.)?(?:{_DOMAINS}))/[^?#]+/player/[^?#]+\?(?:[^#]+&)?(?:assetId|eventId)=(?P\d+)' + _TESTS = [{ + 'url': 'https://www.helsinkikanava.fi/fi_FI/web/helsinkikanava/player/vod?assetId=68021894', + 'md5': 'ca0b62ffc814a5411dfa6349cf5adb8a', + 'info_dict': { + 'id': '68021894', + 'ext': 'mp4', + 'title': 'Perheiden parhaaksi', + 'description': 'md5:295785ea408e5ac00708766465cc1325', + 'thumbnail': 'https://www.helsinkikanava.fi/image/image_gallery?img_id=68022501', + 'upload_date': '20200924', + 'timestamp': 1600938300, + }, + }, { # Recorded livestream + 'url': 'https://www.helsinkikanava.fi/fi/web/helsinkikanava/player/event/view?eventId=76241489', + 'md5': '014327e69dfa7b949fcc861f6d162d6d', + 'info_dict': { + 'id': '76258304', + 'ext': 'mp4', + 'title': 'Helsingin kaupungin ja HUSin tiedotustilaisuus koronaepidemiatilanteesta 24.11.2020', + 'description': 'md5:3129d041c6fbbcdc7fe68d9a938fef1c', + 'thumbnail': 'https://icareus-suite.secure2.footprint.net/image/image_gallery?img_id=76288630', + 'upload_date': '20201124', + 'timestamp': 1606206600, + }, + }, { # Non-m3u8 stream + 'url': 'https://suite.icareus.com/fi/web/westend-indians/player/vod?assetId=47567389', + 'md5': '72fc04ee971bbedc44405cdf16c990b6', + 'info_dict': { + 'id': '47567389', + 'ext': 'mp4', + 'title': 'Omatoiminen harjoittelu - Laukominen', + 'description': '', + 'thumbnail': 'https://suite.icareus.com/image/image_gallery?img_id=47568162', + 'upload_date': '20200319', + 'timestamp': 1584658080, + }, + }, { + 'url': 'https://asahitv.fi/fi/web/asahi/player/vod?assetId=89415818', + 'only_matching': True + }, { + 'url': 'https://hyvinvointitv.fi/fi/web/hyvinvointitv/player/vod?assetId=89149730', + 'only_matching': True + }, { + 'url': 'https://inez.fi/fi/web/inez-media/player/vod?assetId=71328822', + 'only_matching': True + }, { + 'url': 'https://www.permanto.fi/fi/web/alfatv/player/vod?assetId=135497515', + 'only_matching': True + }, { + 'url': 'https://videos.minifiddlers.org/web/international-minifiddlers/player/vod?assetId=1982759', + 'only_matching': True + }] + + def _real_extract(self, url): + base_url, temp_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, temp_id) + + video_id = self._search_regex(r"_icareus\['itemId'\]\s*=\s*'(\d+)'", webpage, 'video_id') + organization_id = self._search_regex(r"_icareus\['organizationId'\]\s*=\s*'(\d+)'", webpage, 'organization_id') + + assets = self._download_json( + self._search_regex(r'var\s+publishingServiceURL\s*=\s*"(http[^"]+)";', webpage, 'api_base'), + video_id, data=urlencode_postdata({ + 'version': '03', + 'action': 'getAssetPlaybackUrls', + 'organizationId': organization_id, + 'assetId': video_id, + 'token': self._search_regex(r"_icareus\['token'\]\s*=\s*'([a-f0-9]+)'", webpage, 'icareus_token'), + })) + + subtitles = { + remove_end(sdesc.split(' ')[0], ':'): [{'url': url_or_none(surl)}] + for _, sdesc, surl in assets.get('subtitles') or [] + } + + formats = [{ + 'format': item.get('name'), + 'format_id': 'audio', + 'vcodec': 'none', + 'url': url_or_none(item['url']), + 'tbr': int_or_none(self._search_regex( + r'\((\d+)\s*k\)', item.get('name') or '', 'audio bitrate', default=None)), + } for item in assets.get('audio_urls') or [] if url_or_none(item.get('url'))] + + for item in assets.get('urls') or []: + video_url = url_or_none(item.get('url')) + if video_url is None: + continue + ext = determine_ext(video_url) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + fmt = item.get('name') + formats.append({ + 'url': video_url, + 'format': fmt, + 'tbr': parse_bitrate(fmt), + 'format_id': str_or_none(item.get('id')), + **parse_resolution(fmt), + }) + + info, token, live_title = self._search_json_ld(webpage, video_id, default={}), None, None + if not info: + token = self._search_regex( + r'data\s*:\s*{action:"getAsset".*?token:\'([a-f0-9]+)\'}', webpage, 'token', default=None) + if not token: + live_title = get_element_by_class('unpublished-info-item future-event-title', webpage) + + if token: + metadata = self._download_json( + f'{base_url}/icareus-suite-api-portlet/publishing', + video_id, fatal=False, data=urlencode_postdata({ + 'version': '03', + 'action': 'getAsset', + 'organizationId': organization_id, + 'assetId': video_id, + 'languageId': 'en_US', + 'userId': '0', + 'token': token, + })) or {} + info = { + 'title': metadata.get('name'), + 'description': metadata.get('description'), + 'timestamp': int_or_none(metadata.get('date'), scale=1000), + 'duration': int_or_none(metadata.get('duration')), + 'thumbnail': url_or_none(metadata.get('thumbnailMedium')), + } + elif live_title: # Recorded livestream + info = { + 'title': live_title, + 'description': get_element_by_class('unpublished-info-item future-event-description', webpage), + 'timestamp': int_or_none(self._search_regex( + r'var startEvent\s*=\s*(\d+);', webpage, 'uploadDate', fatal=False), scale=1000), + } + + thumbnails = info.get('thumbnails') or [{ + 'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail')) + }] + + self._sort_formats(formats) + return merge_dicts({ + 'id': video_id, + 'title': None, + 'formats': formats, + 'subtitles': subtitles, + 'description': clean_html(info.get('description')), + 'thumbnails': thumbnails if thumbnails[0]['url'] else None, + }, info) From 96b49af01c63dbdf88c2711bb2fb6e83d7345b02 Mon Sep 17 00:00:00 2001 From: Yipten Date: Sat, 23 Apr 2022 22:40:20 -0400 Subject: [PATCH 0976/2552] [bandcamp] Exclude merch links (#3368) Closes #3318 Authored by: Yipten --- yt_dlp/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 5863eaeca..6f806d84e 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -436,7 +436,7 @@ class BandcampUserIE(InfoExtractor): uploader = self._match_id(url) webpage = self._download_webpage(url, uploader) - discography_data = (re.findall(r'
  • ]+>\s*]+>\s*]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)) return self.playlist_from_matches( From acbc64225006964cf52d316e007a77a1b5e2975b Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Mon, 25 Apr 2022 00:44:30 +0900 Subject: [PATCH 0977/2552] [utils] WebSocketsWrapper: Ignore warnings at websockets instantiation This also fixes crash caused by moving asyncio to .compat. Authored by: Lesmiscore Thanks: J.Chung at Discord (581418557871620106) --- yt_dlp/utils.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 7f0c055ac..844b9cb19 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -36,6 +36,7 @@ import tempfile import time import traceback import urllib.parse +import warnings import xml.etree.ElementTree import zlib @@ -5221,17 +5222,23 @@ class WebSocketsWrapper(): pool = None def __init__(self, url, headers=None, connect=True): - self.loop = asyncio.events.new_event_loop() - self.conn = websockets.connect( - url, extra_headers=headers, ping_interval=None, - close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) + self.loop = asyncio.new_event_loop() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # https://github.com/aaugustin/websockets/blob/9c87d43f1d7bbf6847350087aae74fd35f73a642/src/websockets/legacy/client.py#L480 + # the reason to keep giving `loop` parameter: we aren't in async function + self.conn = websockets.connect( + url, extra_headers=headers, ping_interval=None, + close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) if connect: self.__enter__() atexit.register(self.__exit__, None, None, None) def __enter__(self): if not self.pool: - self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) return self def send(self, *args): @@ -5251,7 +5258,7 @@ class WebSocketsWrapper(): # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class @staticmethod def run_with_loop(main, loop): - if not asyncio.coroutines.iscoroutine(main): + if not asyncio.iscoroutine(main): raise ValueError(f'a coroutine was expected, got {main!r}') try: From 69b59b4b4b52e496df980d8d21ad5ff670089c0b Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Mon, 25 Apr 2022 00:45:19 +0900 Subject: [PATCH 0978/2552] [downloader/fc2] Stop heatbeating once FFmpeg finishes Authored by: Lesmiscore --- yt_dlp/downloader/fc2.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/fc2.py b/yt_dlp/downloader/fc2.py index d503aac04..f9763debb 100644 --- a/yt_dlp/downloader/fc2.py +++ b/yt_dlp/downloader/fc2.py @@ -18,6 +18,9 @@ class FC2LiveFD(FileDownloader): heartbeat_state = [None, 1] def heartbeat(): + if heartbeat_state[1] < 0: + return + try: heartbeat_state[1] += 1 ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) @@ -36,4 +39,8 @@ class FC2LiveFD(FileDownloader): 'ws': None, 'protocol': 'live_ffmpeg', }) - return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) + try: + return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) + finally: + # stop heartbeating + heartbeat_state[1] = -1 From 9cd080508db2daf625994f9aef29790f4bca7996 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Apr 2022 05:35:06 +0530 Subject: [PATCH 0979/2552] Revert acbc64225006964cf52d316e007a77a1b5e2975b Reverts "[utils] WebSocketsWrapper: Ignore warnings at websockets instantiation" The warning should not be suppressed. We need to address it --- yt_dlp/utils.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 844b9cb19..90f070b6d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -36,7 +36,6 @@ import tempfile import time import traceback import urllib.parse -import warnings import xml.etree.ElementTree import zlib @@ -5222,23 +5221,18 @@ class WebSocketsWrapper(): pool = None def __init__(self, url, headers=None, connect=True): - self.loop = asyncio.new_event_loop() - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - # https://github.com/aaugustin/websockets/blob/9c87d43f1d7bbf6847350087aae74fd35f73a642/src/websockets/legacy/client.py#L480 - # the reason to keep giving `loop` parameter: we aren't in async function - self.conn = websockets.connect( - url, extra_headers=headers, ping_interval=None, - close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) + self.loop = asyncio.events.new_event_loop() + # XXX: "loop" is deprecated + self.conn = websockets.connect( + url, extra_headers=headers, ping_interval=None, + close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) if connect: self.__enter__() atexit.register(self.__exit__, None, None, None) def __enter__(self): if not self.pool: - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) + self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) return self def send(self, *args): @@ -5258,7 +5252,7 @@ class WebSocketsWrapper(): # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class @staticmethod def run_with_loop(main, loop): - if not asyncio.iscoroutine(main): + if not asyncio.coroutines.iscoroutine(main): raise ValueError(f'a coroutine was expected, got {main!r}') try: @@ -5278,6 +5272,7 @@ class WebSocketsWrapper(): for task in to_cancel: task.cancel() + # XXX: "loop" is removed in python 3.10+ loop.run_until_complete( asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True)) From 9196cbfe8bb7a6eb46037735b76f21963dfdc61a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 24 Apr 2022 21:58:18 +0530 Subject: [PATCH 0980/2552] [compat] Ensure submodules are correctly wrapped --- test/test_compat.py | 7 +++++ yt_dlp/compat/__init__.py | 30 ++++++--------------- yt_dlp/compat/asyncio/__init__.py | 4 +++ yt_dlp/compat/asyncio/tasks.py | 5 ++++ yt_dlp/compat/compat_utils.py | 44 +++++++++++++++++++++++++++++++ yt_dlp/compat/re.py | 5 ++++ 6 files changed, 73 insertions(+), 22 deletions(-) create mode 100644 yt_dlp/compat/compat_utils.py diff --git a/test/test_compat.py b/test/test_compat.py index 8e40a4180..9b185853d 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -7,6 +7,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from yt_dlp import compat from yt_dlp.compat import ( compat_etree_fromstring, compat_expanduser, @@ -21,6 +22,12 @@ from yt_dlp.compat import ( class TestCompat(unittest.TestCase): + def test_compat_passthrough(self): + with self.assertWarns(DeprecationWarning): + compat.compat_basestring + + compat.asyncio.events # Must not raise error + def test_compat_getenv(self): test_str = 'тест' compat_setenv('yt_dlp_COMPAT_GETENV', test_str) diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index 56a65bb6c..3c395f6d9 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -2,11 +2,18 @@ import contextlib import os import subprocess import sys -import types +import warnings import xml.etree.ElementTree as etree from . import re from ._deprecated import * # noqa: F401, F403 +from .compat_utils import passthrough_module + + +# XXX: Implement this the same way as other DeprecationWarnings without circular import +passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn( + DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=2)) +del passthrough_module # HTMLParseError has been deprecated in Python 3.3 and removed in @@ -85,24 +92,3 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho with contextlib.suppress(Exception): subprocess.Popen('', shell=True, startupinfo=startupinfo).wait() WINDOWS_VT_MODE = True - - -class _PassthroughLegacy(types.ModuleType): - def __getattr__(self, attr): - import importlib - with contextlib.suppress(ImportError): - return importlib.import_module(f'.{attr}', __name__) - - legacy = importlib.import_module('._legacy', __name__) - if not hasattr(legacy, attr): - raise AttributeError(f'module {__name__} has no attribute {attr}') - - # XXX: Implement this the same way as other DeprecationWarnings without circular import - import warnings - warnings.warn(DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=2) - return getattr(legacy, attr) - - -# Python 3.6 does not have module level __getattr__ -# https://peps.python.org/pep-0562/ -sys.modules[__name__].__class__ = _PassthroughLegacy diff --git a/yt_dlp/compat/asyncio/__init__.py b/yt_dlp/compat/asyncio/__init__.py index 0e8c6cad3..21b494499 100644 --- a/yt_dlp/compat/asyncio/__init__.py +++ b/yt_dlp/compat/asyncio/__init__.py @@ -3,6 +3,10 @@ from asyncio import * # noqa: F403 from . import tasks # noqa: F401 +from ..compat_utils import passthrough_module + +passthrough_module(__name__, 'asyncio') +del passthrough_module try: run # >= 3.7 diff --git a/yt_dlp/compat/asyncio/tasks.py b/yt_dlp/compat/asyncio/tasks.py index cb31e52fa..9d98fdfeb 100644 --- a/yt_dlp/compat/asyncio/tasks.py +++ b/yt_dlp/compat/asyncio/tasks.py @@ -2,6 +2,11 @@ from asyncio.tasks import * # noqa: F403 +from ..compat_utils import passthrough_module + +passthrough_module(__name__, 'asyncio.tasks') +del passthrough_module + try: # >= 3.7 all_tasks except NameError: diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py new file mode 100644 index 000000000..938daf926 --- /dev/null +++ b/yt_dlp/compat/compat_utils.py @@ -0,0 +1,44 @@ +import contextlib +import importlib +import sys +import types + + +def _is_package(module): + try: + module.__getattribute__('__path__') + except AttributeError: + return False + return True + + +_NO_ATTRIBUTE = object() + + +def passthrough_module(parent, child, *, callback=lambda _: None): + parent_module = importlib.import_module(parent) + child_module = importlib.import_module(child, parent) + + class PassthroughModule(types.ModuleType): + def __getattr__(self, attr): + if _is_package(parent_module): + with contextlib.suppress(ImportError): + return importlib.import_module(f'.{attr}', parent) + + ret = _NO_ATTRIBUTE + with contextlib.suppress(AttributeError): + ret = getattr(child_module, attr) + + if _is_package(child_module): + with contextlib.suppress(ImportError): + ret = importlib.import_module(f'.{attr}', child) + + if ret is _NO_ATTRIBUTE: + raise AttributeError(f'module {parent} has no attribute {attr}') + + callback(attr) + return ret + + # Python 3.6 does not have module level __getattr__ + # https://peps.python.org/pep-0562/ + sys.modules[parent].__class__ = PassthroughModule diff --git a/yt_dlp/compat/re.py b/yt_dlp/compat/re.py index e8a6fabbd..d4532950a 100644 --- a/yt_dlp/compat/re.py +++ b/yt_dlp/compat/re.py @@ -2,6 +2,11 @@ from re import * # F403 +from .compat_utils import passthrough_module + +passthrough_module(__name__, 're') +del passthrough_module + try: Pattern # >= 3.7 except NameError: From 059bc4db1975698dca53278a0fcc23d428b7658a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Apr 2022 05:45:18 +0530 Subject: [PATCH 0981/2552] [compat/asyncio] Use `asyncio.all_tasks` --- Makefile | 2 +- yt_dlp/compat/{asyncio/__init__.py => asyncio.py} | 8 ++++++-- yt_dlp/compat/asyncio/tasks.py | 13 ------------- yt_dlp/utils.py | 8 ++++---- 4 files changed, 11 insertions(+), 20 deletions(-) rename yt_dlp/compat/{asyncio/__init__.py => asyncio.py} (74%) delete mode 100644 yt_dlp/compat/asyncio/tasks.py diff --git a/Makefile b/Makefile index 146df1906..0e911feba 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ offlinetest: codetest # XXX: This is hard to maintain CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat \ - yt_dlp/compat/asyncio yt_dlp/extractor/anvato_token_generator + yt_dlp/extractor/anvato_token_generator yt-dlp: yt_dlp/*.py yt_dlp/*/*.py mkdir -p zip for d in $(CODE_FOLDERS) ; do \ diff --git a/yt_dlp/compat/asyncio/__init__.py b/yt_dlp/compat/asyncio.py similarity index 74% rename from yt_dlp/compat/asyncio/__init__.py rename to yt_dlp/compat/asyncio.py index 21b494499..f80dc192d 100644 --- a/yt_dlp/compat/asyncio/__init__.py +++ b/yt_dlp/compat/asyncio.py @@ -2,8 +2,7 @@ from asyncio import * # noqa: F403 -from . import tasks # noqa: F401 -from ..compat_utils import passthrough_module +from .compat_utils import passthrough_module passthrough_module(__name__, 'asyncio') del passthrough_module @@ -18,3 +17,8 @@ except NameError: loop = new_event_loop() set_event_loop(loop) loop.run_until_complete(coro) + +try: + all_tasks # >= 3.7 +except NameError: + all_tasks = Task.all_tasks diff --git a/yt_dlp/compat/asyncio/tasks.py b/yt_dlp/compat/asyncio/tasks.py deleted file mode 100644 index 9d98fdfeb..000000000 --- a/yt_dlp/compat/asyncio/tasks.py +++ /dev/null @@ -1,13 +0,0 @@ -# flake8: noqa: F405 - -from asyncio.tasks import * # noqa: F403 - -from ..compat_utils import passthrough_module - -passthrough_module(__name__, 'asyncio.tasks') -del passthrough_module - -try: # >= 3.7 - all_tasks -except NameError: - all_tasks = Task.all_tasks diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 90f070b6d..0171394fc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5221,7 +5221,7 @@ class WebSocketsWrapper(): pool = None def __init__(self, url, headers=None, connect=True): - self.loop = asyncio.events.new_event_loop() + self.loop = asyncio.new_event_loop() # XXX: "loop" is deprecated self.conn = websockets.connect( url, extra_headers=headers, ping_interval=None, @@ -5252,7 +5252,7 @@ class WebSocketsWrapper(): # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class @staticmethod def run_with_loop(main, loop): - if not asyncio.coroutines.iscoroutine(main): + if not asyncio.iscoroutine(main): raise ValueError(f'a coroutine was expected, got {main!r}') try: @@ -5264,7 +5264,7 @@ class WebSocketsWrapper(): @staticmethod def _cancel_all_tasks(loop): - to_cancel = asyncio.tasks.all_tasks(loop) + to_cancel = asyncio.all_tasks(loop) if not to_cancel: return @@ -5274,7 +5274,7 @@ class WebSocketsWrapper(): # XXX: "loop" is removed in python 3.10+ loop.run_until_complete( - asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True)) + asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)) for task in to_cancel: if task.cancelled(): From 7ab56be2c7309a2d11d4ee28c71f8fb29da21ef7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Apr 2022 15:11:01 +0530 Subject: [PATCH 0982/2552] [build] Ensure `compat._legacy` is packed in executables Fixes https://github.com/yt-dlp/yt-dlp/commit/9196cbfe8bb7a6eb46037735b76f21963dfdc61a#commitcomment-72192406 --- pyinst.py | 1 + setup.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/pyinst.py b/pyinst.py index 9e8128e09..c63d879a0 100644 --- a/pyinst.py +++ b/pyinst.py @@ -47,6 +47,7 @@ def main(): '--noconfirm', *dependency_options(), *opts, + '--collect-submodules=yt_dlp', 'yt_dlp/__main__.py', ] print(f'Running PyInstaller with {opts}') diff --git a/setup.py b/setup.py index 45f4d6b49..141cb238f 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,8 @@ if sys.argv[1:2] == ['py2exe']: 'dist_dir': './dist', 'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], + # Modules that are only imported dynamically must be added here + 'includes': ['yt_dlp.compat._legacy'], } }, 'zipfile': None From 00828e2c9311b90d317fa054883dd63e21fffa78 Mon Sep 17 00:00:00 2001 From: Elyse Date: Tue, 26 Apr 2022 04:54:56 -0500 Subject: [PATCH 0983/2552] [downloader/ffmpeg] Specify headers for each URL (#3553) Closes #2696 Authored by: elyse0 --- yt_dlp/downloader/external.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 6c5616c60..da38e502d 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -382,13 +382,15 @@ class FFmpegFD(ExternalFD): # if end_time: # args += ['-t', compat_str(end_time - start_time)] - if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]): - # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: - # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. - headers = handle_youtubedl_headers(info_dict['http_headers']) - args += [ + http_headers = None + if info_dict.get('http_headers'): + youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers']) + http_headers = [ + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. '-headers', - ''.join(f'{key}: {val}\r\n' for key, val in headers.items())] + ''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items()) + ] env = None proxy = self.params.get('proxy') @@ -441,6 +443,11 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_conn', conn] for i, url in enumerate(urls): + # We need to specify headers for each http input stream + # otherwise, it will only be applied to the first. + # https://github.com/yt-dlp/yt-dlp/issues/2696 + if http_headers is not None and re.match(r'^https?://', url): + args += http_headers args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] args += ['-c', 'copy'] From ca04e1bf49153abea3b4762f5b92056aa60f6f91 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 27 Apr 2022 01:18:50 +0530 Subject: [PATCH 0984/2552] [Metadata] Remove filename from attached info-json --- yt_dlp/postprocessor/ffmpeg.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index d909149ef..500fc1950 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -799,8 +799,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor): yield ('-map', '-0:%d' % old_stream) new_stream -= 1 - yield ('-attach', infofn, - '-metadata:s:%d' % new_stream, 'mimetype=application/json') + yield ( + '-attach', infofn, + f'-metadata:s:{new_stream}', 'mimetype=application/json', + f'-metadata:s:{new_stream}', 'filename=info.json', + ) class FFmpegMergerPP(FFmpegPostProcessor): From e13945a2fe34bbec5581b02a7f1dc308bad6f3e7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 27 Apr 2022 05:36:06 +0530 Subject: [PATCH 0985/2552] [ffmpeg] Fix features detection --- yt_dlp/postprocessor/ffmpeg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 500fc1950..bb7a630c6 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -146,7 +146,8 @@ class FFmpegPostProcessor(PostProcessor): self._paths[basename] = location self._versions = {} - executables = {'basename': ('ffmpeg', 'avconv'), 'probe_basename': ('ffprobe', 'avprobe')} + # NB: probe must be first for _features to be poulated correctly + executables = {'probe_basename': ('ffprobe', 'avprobe'), 'basename': ('ffmpeg', 'avconv')} if prefer_ffmpeg is False: executables = {k: v[::-1] for k, v in executables.items()} for var, prefs in executables.items(): From 4f8095235321632ac2785dda2f038bc2aedba4d9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 27 Apr 2022 08:24:25 +0530 Subject: [PATCH 0986/2552] [cleanup] Delete unused extractors --- yt_dlp/extractor/blinkx.py | 84 ---------- yt_dlp/extractor/discoveryvr.py | 56 ------- yt_dlp/extractor/everyonesmixtape.py | 73 --------- yt_dlp/extractor/fxnetworks.py | 74 --------- yt_dlp/extractor/kanalplay.py | 92 ----------- yt_dlp/extractor/noco.py | 225 --------------------------- yt_dlp/extractor/spiegeltv.py | 15 -- yt_dlp/extractor/tastytrade.py | 41 ----- yt_dlp/extractor/tudou.py | 45 ------ yt_dlp/extractor/vidzi.py | 65 -------- 10 files changed, 770 deletions(-) delete mode 100644 yt_dlp/extractor/blinkx.py delete mode 100644 yt_dlp/extractor/discoveryvr.py delete mode 100644 yt_dlp/extractor/everyonesmixtape.py delete mode 100644 yt_dlp/extractor/fxnetworks.py delete mode 100644 yt_dlp/extractor/kanalplay.py delete mode 100644 yt_dlp/extractor/noco.py delete mode 100644 yt_dlp/extractor/spiegeltv.py delete mode 100644 yt_dlp/extractor/tastytrade.py delete mode 100644 yt_dlp/extractor/tudou.py delete mode 100644 yt_dlp/extractor/vidzi.py diff --git a/yt_dlp/extractor/blinkx.py b/yt_dlp/extractor/blinkx.py deleted file mode 100644 index 80531ccad..000000000 --- a/yt_dlp/extractor/blinkx.py +++ /dev/null @@ -1,84 +0,0 @@ -import json - -from .common import InfoExtractor -from ..utils import ( - remove_start, - int_or_none, -) - - -class BlinkxIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P[^?]+)' - IE_NAME = 'blinkx' - - _TEST = { - 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', - 'md5': '337cf7a344663ec79bf93a526a2e06c7', - 'info_dict': { - 'id': 'Da0Gw3xc', - 'ext': 'mp4', - 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', - 'uploader': 'IGN News', - 'upload_date': '20150217', - 'timestamp': 1424215740, - 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', - 'duration': 47.743333, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - display_id = video_id[:8] - - api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' - + 'video=%s' % video_id) - data_json = self._download_webpage(api_url, display_id) - data = json.loads(data_json)['api']['results'][0] - duration = None - thumbnails = [] - formats = [] - for m in data['media']: - if m['type'] == 'jpg': - thumbnails.append({ - 'url': m['link'], - 'width': int(m['w']), - 'height': int(m['h']), - }) - elif m['type'] == 'original': - duration = float(m['d']) - elif m['type'] == 'youtube': - yt_id = m['link'] - self.to_screen('Youtube video detected: %s' % yt_id) - return self.url_result(yt_id, 'Youtube', video_id=yt_id) - elif m['type'] in ('flv', 'mp4'): - vcodec = remove_start(m['vcodec'], 'ff') - acodec = remove_start(m['acodec'], 'ff') - vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) - abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) - tbr = vbr + abr if vbr and abr else None - format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) - formats.append({ - 'format_id': format_id, - 'url': m['link'], - 'vcodec': vcodec, - 'acodec': acodec, - 'abr': abr, - 'vbr': vbr, - 'tbr': tbr, - 'width': int_or_none(m.get('w')), - 'height': int_or_none(m.get('h')), - }) - - self._sort_formats(formats) - - return { - 'id': display_id, - 'fullid': video_id, - 'title': data['title'], - 'formats': formats, - 'uploader': data.get('channel_name'), - 'timestamp': data.get('pubdate_epoch'), - 'description': data.get('description'), - 'thumbnails': thumbnails, - 'duration': duration, - } diff --git a/yt_dlp/extractor/discoveryvr.py b/yt_dlp/extractor/discoveryvr.py deleted file mode 100644 index a021d986e..000000000 --- a/yt_dlp/extractor/discoveryvr.py +++ /dev/null @@ -1,56 +0,0 @@ -from .common import InfoExtractor -from ..utils import parse_duration - - -class DiscoveryVRIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P[^/?#]+)' - _TEST = { - 'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction', - 'md5': '32b1929798c464a54356378b7912eca4', - 'info_dict': { - 'id': 'discovery-vr-an-introduction', - 'ext': 'mp4', - 'title': 'Discovery VR - An Introduction', - 'description': 'md5:80d418a10efb8899d9403e61d8790f06', - } - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - bootstrap_data = self._search_regex( - r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";', - webpage, 'bootstrap data') - bootstrap_data = self._parse_json( - bootstrap_data.encode('utf-8').decode('unicode_escape'), - display_id) - videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos'] - video_data = next(video for video in videos if video.get('slug') == display_id) - - series = video_data.get('showTitle') - title = episode = video_data.get('title') or series - if series and series != title: - title = '%s - %s' % (series, title) - - formats = [] - for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')): - f_url = video_data.get(f) - if not f_url: - continue - formats.append({ - 'format_id': format_id, - 'url': f_url, - }) - - return { - 'id': display_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('runTime')), - 'formats': formats, - 'episode': episode, - 'series': series, - } diff --git a/yt_dlp/extractor/everyonesmixtape.py b/yt_dlp/extractor/everyonesmixtape.py deleted file mode 100644 index d26ff8ad3..000000000 --- a/yt_dlp/extractor/everyonesmixtape.py +++ /dev/null @@ -1,73 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - sanitized_Request, -) - - -class EveryonesMixtapeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P[0-9a-zA-Z]+)(?:/(?P[0-9]))?$' - - _TESTS = [{ - 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5', - 'info_dict': { - 'id': '5bfseWNmlds', - 'ext': 'mp4', - 'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)", - 'uploader': 'FKR.TV', - 'uploader_id': 'frenchkissrecords', - 'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com", - 'upload_date': '20081015' - }, - 'params': { - 'skip_download': True, # This is simply YouTube - } - }, { - 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi', - 'info_dict': { - 'id': 'm7m0jJAbMQi', - 'title': 'Driving', - }, - 'playlist_count': 24 - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - playlist_id = mobj.group('id') - - pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id - pllist_req = sanitized_Request(pllist_url) - pllist_req.add_header('X-Requested-With', 'XMLHttpRequest') - - playlist_list = self._download_json( - pllist_req, playlist_id, note='Downloading playlist metadata') - try: - playlist_no = next(playlist['id'] - for playlist in playlist_list - if playlist['code'] == playlist_id) - except StopIteration: - raise ExtractorError('Playlist id not found') - - pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no - pl_req = sanitized_Request(pl_url) - pl_req.add_header('X-Requested-With', 'XMLHttpRequest') - playlist = self._download_json( - pl_req, playlist_id, note='Downloading playlist info') - - entries = [{ - '_type': 'url', - 'url': t['url'], - 'title': t['title'], - } for t in playlist['tracks']] - - if mobj.group('songnr'): - songnr = int(mobj.group('songnr')) - 1 - return entries[songnr] - - playlist_title = playlist['mixData']['name'] - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': playlist_title, - 'entries': entries, - } diff --git a/yt_dlp/extractor/fxnetworks.py b/yt_dlp/extractor/fxnetworks.py deleted file mode 100644 index 370b0a597..000000000 --- a/yt_dlp/extractor/fxnetworks.py +++ /dev/null @@ -1,74 +0,0 @@ -from .adobepass import AdobePassIE -from ..utils import ( - extract_attributes, - int_or_none, - parse_age_limit, - smuggle_url, - update_url_query, -) - - -class FXNetworksIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.fxnetworks.com/video/1032565827847', - 'md5': '8d99b97b4aa7a202f55b6ed47ea7e703', - 'info_dict': { - 'id': 'dRzwHC_MMqIv', - 'ext': 'mp4', - 'title': 'First Look: Better Things - Season 2', - 'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.', - 'age_limit': 14, - 'uploader': 'NEWA-FNG-FX', - 'upload_date': '20170825', - 'timestamp': 1503686274, - 'episode_number': 0, - 'season_number': 2, - 'series': 'Better Things', - }, - 'add_ie': ['ThePlatform'], - }, { - 'url': 'http://www.simpsonsworld.com/video/716094019682', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - if 'The content you are trying to access is not available in your region.' in webpage: - self.raise_geo_restricted() - video_data = extract_attributes(self._search_regex( - r'()', webpage, 'video data')) - player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None) - release_url = video_data['rel'] - title = video_data['data-title'] - rating = video_data.get('data-rating') - query = { - 'mbr': 'true', - } - if player_type == 'movies': - query.update({ - 'manifest': 'm3u', - }) - else: - query.update({ - 'switch': 'http', - }) - if video_data.get('data-req-auth') == '1': - resource = self._get_mvpd_resource( - video_data['data-channel'], title, - video_data.get('data-guid'), rating) - query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource) - - return { - '_type': 'url_transparent', - 'id': video_id, - 'title': title, - 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), - 'series': video_data.get('data-show-title'), - 'episode_number': int_or_none(video_data.get('data-episode')), - 'season_number': int_or_none(video_data.get('data-season')), - 'thumbnail': video_data.get('data-large-thumb'), - 'age_limit': parse_age_limit(rating), - 'ie_key': 'ThePlatform', - } diff --git a/yt_dlp/extractor/kanalplay.py b/yt_dlp/extractor/kanalplay.py deleted file mode 100644 index ef74014c0..000000000 --- a/yt_dlp/extractor/kanalplay.py +++ /dev/null @@ -1,92 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - srt_subtitles_timecode, -) - - -class KanalPlayIE(InfoExtractor): - IE_DESC = 'Kanal 5/9/11 Play' - _VALID_URL = r'https?://(?:www\.)?kanal(?P5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277', - 'info_dict': { - 'id': '3270012277', - 'ext': 'flv', - 'title': 'Saknar både dusch och avlopp', - 'description': 'md5:6023a95832a06059832ae93bc3c7efb7', - 'duration': 2636.36, - }, - 'params': { - # rtmp download - 'skip_download': True, - } - }, { - 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042', - 'only_matching': True, - }, { - 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199', - 'only_matching': True, - }] - - def _fix_subtitles(self, subs): - return '\r\n\r\n'.join( - '%s\r\n%s --> %s\r\n%s' - % ( - num, - srt_subtitles_timecode(item['startMillis'] / 1000.0), - srt_subtitles_timecode(item['endMillis'] / 1000.0), - item['text'], - ) for num, item in enumerate(subs, 1)) - - def _get_subtitles(self, channel_id, video_id): - subs = self._download_json( - 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id), - video_id, 'Downloading subtitles JSON', fatal=False) - return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {} - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - channel_id = mobj.group('channel_id') - - video = self._download_json( - 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id), - video_id) - - reasons_for_no_streams = video.get('reasonsForNoStreams') - if reasons_for_no_streams: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)), - expected=True) - - title = video['title'] - description = video.get('description') - duration = float_or_none(video.get('length'), 1000) - thumbnail = video.get('posterUrl') - - stream_base_url = video['streamBaseUrl'] - - formats = [{ - 'url': stream_base_url, - 'play_path': stream['source'], - 'ext': 'flv', - 'tbr': float_or_none(stream.get('bitrate'), 1000), - 'rtmp_real_time': True, - } for stream in video['streams']] - self._sort_formats(formats) - - subtitles = {} - if video.get('hasSubtitle'): - subtitles = self.extract_subtitles(channel_id, video_id) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/yt_dlp/extractor/noco.py b/yt_dlp/extractor/noco.py deleted file mode 100644 index 583d399cc..000000000 --- a/yt_dlp/extractor/noco.py +++ /dev/null @@ -1,225 +0,0 @@ -import re -import time -import hashlib - -from .common import InfoExtractor -from ..compat import ( - compat_str, -) -from ..utils import ( - clean_html, - ExtractorError, - int_or_none, - float_or_none, - parse_iso8601, - parse_qs, - sanitized_Request, - urlencode_postdata, -) - - -class NocoIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P\d+)' - _LOGIN_URL = 'https://noco.tv/do.php' - _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' - _SUB_LANG_TEMPLATE = '&sub_lang=%s' - _NETRC_MACHINE = 'noco' - - _TESTS = [ - { - 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', - 'md5': '0a993f0058ddbcd902630b2047ef710e', - 'info_dict': { - 'id': '11538', - 'ext': 'mp4', - 'title': 'Ami Ami Idol - Hello! France', - 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86', - 'upload_date': '20140412', - 'uploader': 'Nolife', - 'uploader_id': 'NOL', - 'duration': 2851.2, - }, - 'skip': 'Requires noco account', - }, - { - 'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call', - 'md5': 'c190f1f48e313c55838f1f412225934d', - 'info_dict': { - 'id': '12610', - 'ext': 'mp4', - 'title': 'The Guild #1 - Wake-Up Call', - 'timestamp': 1403863200, - 'upload_date': '20140627', - 'uploader': 'LBL42', - 'uploader_id': 'LBL', - 'duration': 233.023, - }, - 'skip': 'Requires noco account', - } - ] - - def _perform_login(self, username, password): - login = self._download_json( - self._LOGIN_URL, None, 'Logging in', - data=urlencode_postdata({ - 'a': 'login', - 'cookie': '1', - 'username': username, - 'password': password, - }), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - }) - - if 'erreur' in login: - raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) - - @staticmethod - def _ts(): - return int(time.time() * 1000) - - def _call_api(self, path, video_id, note, sub_lang=None): - ts = compat_str(self._ts() + self._ts_offset) - tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() - url = self._API_URL_TEMPLATE % (path, ts, tk) - if sub_lang: - url += self._SUB_LANG_TEMPLATE % sub_lang - - request = sanitized_Request(url) - request.add_header('Referer', self._referer) - - resp = self._download_json(request, video_id, note) - - if isinstance(resp, dict) and resp.get('error'): - self._raise_error(resp['error'], resp['description']) - - return resp - - def _raise_error(self, error, description): - raise ExtractorError( - '%s returned error: %s - %s' % (self.IE_NAME, error, description), - expected=True) - - def _real_extract(self, url): - video_id = self._match_id(url) - - # Timestamp adjustment offset between server time and local time - # must be calculated in order to use timestamps closest to server's - # in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864) - webpage = self._download_webpage(url, video_id) - - player_url = self._search_regex( - r'(["\'])(?Phttps?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1', - webpage, 'noco player', group='player', - default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf') - - qs = parse_qs(player_url) - ts = int_or_none(qs.get('ts', [None])[0]) - self._ts_offset = ts - self._ts() if ts else 0 - self._referer = player_url - - medias = self._call_api( - 'shows/%s/medias' % video_id, - video_id, 'Downloading video JSON') - - show = self._call_api( - 'shows/by_id/%s' % video_id, - video_id, 'Downloading show JSON')[0] - - options = self._call_api( - 'users/init', video_id, - 'Downloading user options JSON')['options'] - audio_lang_pref = options.get('audio_language') or options.get('language', 'fr') - - if audio_lang_pref == 'original': - audio_lang_pref = show['original_lang'] - if len(medias) == 1: - audio_lang_pref = list(medias.keys())[0] - elif audio_lang_pref not in medias: - audio_lang_pref = 'fr' - - qualities = self._call_api( - 'qualities', - video_id, 'Downloading qualities JSON') - - formats = [] - - for audio_lang, audio_lang_dict in medias.items(): - preference = 1 if audio_lang == audio_lang_pref else 0 - for sub_lang, lang_dict in audio_lang_dict['video_list'].items(): - for format_id, fmt in lang_dict['quality_list'].items(): - format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id) - - video = self._call_api( - 'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang), - video_id, 'Downloading %s video JSON' % format_id_extended, - sub_lang if sub_lang != 'none' else None) - - file_url = video['file'] - if not file_url: - continue - - if file_url in ['forbidden', 'not found']: - popmessage = video['popmessage'] - self._raise_error(popmessage['title'], popmessage['message']) - - formats.append({ - 'url': file_url, - 'format_id': format_id_extended, - 'width': int_or_none(fmt.get('res_width')), - 'height': int_or_none(fmt.get('res_lines')), - 'abr': int_or_none(fmt.get('audiobitrate'), 1000), - 'vbr': int_or_none(fmt.get('videobitrate'), 1000), - 'filesize': int_or_none(fmt.get('filesize')), - 'format_note': qualities[format_id].get('quality_name'), - 'quality': qualities[format_id].get('priority'), - 'language_preference': preference, - }) - - self._sort_formats(formats) - - timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ') - - if timestamp is not None and timestamp < 0: - timestamp = None - - uploader = show.get('partner_name') - uploader_id = show.get('partner_key') - duration = float_or_none(show.get('duration_ms'), 1000) - - thumbnails = [] - for thumbnail_key, thumbnail_url in show.items(): - m = re.search(r'^screenshot_(?P\d+)x(?P\d+)$', thumbnail_key) - if not m: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - - episode = show.get('show_TT') or show.get('show_OT') - family = show.get('family_TT') or show.get('family_OT') - episode_number = show.get('episode_number') - - title = '' - if family: - title += family - if episode_number: - title += ' #' + compat_str(episode_number) - if episode: - title += ' - ' + compat_str(episode) - - description = show.get('show_resume') or show.get('family_resume') - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'duration': duration, - 'formats': formats, - } diff --git a/yt_dlp/extractor/spiegeltv.py b/yt_dlp/extractor/spiegeltv.py deleted file mode 100644 index 69942334e..000000000 --- a/yt_dlp/extractor/spiegeltv.py +++ /dev/null @@ -1,15 +0,0 @@ -from .common import InfoExtractor -from .nexx import NexxIE - - -class SpiegeltvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P\d+)' - _TEST = { - 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/', - 'only_matching': True, - } - - def _real_extract(self, url): - return self.url_result( - 'https://api.nexx.cloud/v3/748/videos/byid/%s' - % self._match_id(url), ie=NexxIE.ie_key()) diff --git a/yt_dlp/extractor/tastytrade.py b/yt_dlp/extractor/tastytrade.py deleted file mode 100644 index bb26926e8..000000000 --- a/yt_dlp/extractor/tastytrade.py +++ /dev/null @@ -1,41 +0,0 @@ -from .common import InfoExtractor -from .ooyala import OoyalaIE - - -class TastyTradeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017', - 'info_dict': { - 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', - 'ext': 'mp4', - 'title': 'A History of Teaming', - 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', - 'duration': 422.255, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - }, { - 'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - ooyala_code = self._search_regex( - r'data-media-id=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala code', group='code') - - info = self._search_json_ld(webpage, display_id, fatal=False) - info.update({ - '_type': 'url_transparent', - 'ie_key': OoyalaIE.ie_key(), - 'url': 'ooyala:%s' % ooyala_code, - 'display_id': display_id, - }) - return info diff --git a/yt_dlp/extractor/tudou.py b/yt_dlp/extractor/tudou.py deleted file mode 100644 index 69774ee38..000000000 --- a/yt_dlp/extractor/tudou.py +++ /dev/null @@ -1,45 +0,0 @@ -from .common import InfoExtractor - - -class TudouPlaylistIE(InfoExtractor): - IE_NAME = 'tudou:playlist' - _VALID_URL = r'https?://(?:www\.)?tudou\.com/listplay/(?P[\w-]{11})\.html' - _TESTS = [{ - 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html', - 'info_dict': { - 'id': 'zzdE77v6Mmo', - }, - 'playlist_mincount': 209, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - playlist_data = self._download_json( - 'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id, playlist_id) - entries = [self.url_result( - 'http://www.tudou.com/programs/view/%s' % item['icode'], - 'Tudou', item['icode'], - item['kw']) for item in playlist_data['items']] - return self.playlist_result(entries, playlist_id) - - -class TudouAlbumIE(InfoExtractor): - IE_NAME = 'tudou:album' - _VALID_URL = r'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P[\w-]{11})' - _TESTS = [{ - 'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html', - 'info_dict': { - 'id': 'v5qckFJvNJg', - }, - 'playlist_mincount': 45, - }] - - def _real_extract(self, url): - album_id = self._match_id(url) - album_data = self._download_json( - 'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id, album_id) - entries = [self.url_result( - 'http://www.tudou.com/programs/view/%s' % item['icode'], - 'Tudou', item['icode'], - item['kw']) for item in album_data['items']] - return self.playlist_result(entries, album_id) diff --git a/yt_dlp/extractor/vidzi.py b/yt_dlp/extractor/vidzi.py deleted file mode 100644 index efa9be116..000000000 --- a/yt_dlp/extractor/vidzi.py +++ /dev/null @@ -1,65 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - decode_packed_codes, - js_to_json, - NO_DEFAULT, - PACKED_CODES_RE, -) - - -class VidziIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P[0-9a-zA-Z]+)' - _TESTS = [{ - 'url': 'http://vidzi.tv/cghql9yq6emu.html', - 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', - 'info_dict': { - 'id': 'cghql9yq6emu', - 'ext': 'mp4', - 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', - 'only_matching': True, - }, { - 'url': 'http://vidzi.cc/cghql9yq6emu.html', - 'only_matching': True, - }, { - 'url': 'https://vidzi.si/rph9gztxj1et.html', - 'only_matching': True, - }, { - 'url': 'http://vidzi.nu/cghql9yq6emu.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://vidzi.tv/%s' % video_id, video_id) - title = self._html_search_regex( - r'(?s)

    (.*?)

    ', webpage, 'title') - - codes = [webpage] - codes.extend([ - decode_packed_codes(mobj.group(0)).replace('\\\'', '\'') - for mobj in re.finditer(PACKED_CODES_RE, webpage)]) - for num, code in enumerate(codes, 1): - jwplayer_data = self._parse_json( - self._search_regex( - r'setup\(([^)]+)\)', code, 'jwplayer data', - default=NO_DEFAULT if num == len(codes) else '{}'), - video_id, transform_source=lambda s: js_to_json( - re.sub(r'\s*\+\s*window\[.+?\]', '', s))) - if jwplayer_data: - break - - info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False) - info_dict['title'] = title - - return info_dict From c1714454313e01c94a7e55e1cb99d439ff933a43 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 27 Apr 2022 13:45:45 +0530 Subject: [PATCH 0987/2552] [cleanup,build] Cleanup some build-related code Fixes an issue in 7ab56be2c7309a2d11d4ee28c71f8fb29da21ef7 --- Makefile | 5 +++- devscripts/bash-completion.py | 2 +- devscripts/fish-completion.py | 2 +- devscripts/make_issue_template.py | 24 +++++++++--------- devscripts/make_lazy_extractors.py | 2 +- devscripts/zsh-completion.py | 2 +- pyinst.py | 39 ++++++++++++++++++------------ setup.py | 22 ++++++++++++----- 8 files changed, 60 insertions(+), 38 deletions(-) diff --git a/Makefile b/Makefile index 0e911feba..179aaff57 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,9 @@ clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap clean-cache: - find . \( -name "*.pyc" -o -name "*.class" \) -delete + find . \( \ + -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ + \) -prune -exec rm -rf {} \; completion-bash: completions/bash/yt-dlp completion-fish: completions/fish/yt-dlp.fish @@ -131,6 +133,7 @@ yt-dlp.tar.gz: all --exclude '*.pyo' \ --exclude '*~' \ --exclude '__pycache__' \ + --exclude '.pytest_cache' \ --exclude '.git' \ -- \ README.md supportedsites.md Changelog.md LICENSE \ diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index 27ec7ca7a..268e8a2ae 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -24,5 +24,5 @@ def build_completion(opt_parser): f.write(filled_template) -parser = yt_dlp.parseOpts()[0] +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index dcb1d6582..d9c0048e2 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -44,5 +44,5 @@ def build_completion(opt_parser): f.write(filled_template) -parser = yt_dlp.parseOpts()[0] +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index 878b94166..811a3e9b5 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -3,6 +3,17 @@ import io import optparse +def read(fname): + with open(fname, encoding='utf-8') as f: + return f.read() + + +# Get the version from yt_dlp/version.py without importing the package +def read_version(fname): + exec(compile(read(fname), fname, 'exec')) + return locals()['__version__'] + + def main(): parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') options, args = parser.parse_args() @@ -10,18 +21,9 @@ def main(): parser.error('Expected an input and an output filename') infile, outfile = args - - with open(infile, encoding='utf-8') as inf: - issue_template_tmpl = inf.read() - - # Get the version from yt_dlp/version.py without importing the package - exec(compile(open('yt_dlp/version.py').read(), - 'yt_dlp/version.py', 'exec')) - - out = issue_template_tmpl % {'version': locals()['__version__']} - with open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + outf.write( + read(infile) % {'version': read_version('yt_dlp/version.py')}) if __name__ == '__main__': diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 5e2070602..6dc8fed90 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -21,7 +21,7 @@ from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor if os.path.exists(plugins_blocked_dirname): os.rename(plugins_blocked_dirname, plugins_dirname) -with open('devscripts/lazy_load_template.py') as f: +with open('devscripts/lazy_load_template.py', encoding='utf-8') as f: module_template = f.read() CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id'] diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 06660d8fd..59faea06a 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -43,5 +43,5 @@ def build_completion(opt_parser): f.write(template) -parser = yt_dlp.parseOpts()[0] +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) diff --git a/pyinst.py b/pyinst.py index c63d879a0..bc3c58ff8 100644 --- a/pyinst.py +++ b/pyinst.py @@ -3,7 +3,7 @@ import os import platform import sys -from PyInstaller.utils.hooks import collect_submodules +from PyInstaller.__main__ import run as run_pyinstaller OS_NAME = platform.system() if OS_NAME == 'Windows': @@ -20,18 +20,22 @@ if OS_NAME == 'Windows': elif OS_NAME == 'Darwin': pass else: - raise Exception('{OS_NAME} is not supported') + raise Exception(f'{OS_NAME} is not supported') ARCH = platform.architecture()[0][:2] def main(): opts = parse_options() - version = read_version() + version = read_version('yt_dlp/version.py') + + onedir = '--onedir' in opts or '-D' in opts + if not onedir and '-F' not in opts and '--onefile' not in opts: + opts.append('--onefile') suffix = '_macos' if OS_NAME == 'Darwin' else '_x86' if ARCH == '32' else '' final_file = 'dist/%syt-dlp%s%s' % ( - 'yt-dlp/' if '--onedir' in opts else '', suffix, '.exe' if OS_NAME == 'Windows' else '') + 'yt-dlp/' if onedir else '', suffix, '.exe' if OS_NAME == 'Windows' else '') print(f'Building yt-dlp v{version} {ARCH}bit for {OS_NAME} with options {opts}') print('Remember to update the version using "devscripts/update-version.py"') @@ -45,17 +49,16 @@ def main(): '--icon=devscripts/logo.ico', '--upx-exclude=vcruntime140.dll', '--noconfirm', + # NB: Modules that are only imported dynamically must be added here. + # --collect-submodules may not work correctly if user has a yt-dlp installed via PIP + '--hidden-import=yt_dlp.compat._legacy', *dependency_options(), *opts, - '--collect-submodules=yt_dlp', 'yt_dlp/__main__.py', ] - print(f'Running PyInstaller with {opts}') - - import PyInstaller.__main__ - - PyInstaller.__main__.run(opts) + print(f'Running PyInstaller with {opts}') + run_pyinstaller(opts) set_version_info(final_file, version) @@ -66,12 +69,14 @@ def parse_options(): if ARCH != opts[0]: raise Exception(f'{opts[0]}bit executable cannot be built on a {ARCH}bit system') opts = opts[1:] - return opts or ['--onefile'] + return opts -def read_version(): - exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) - return locals()['__version__'] +# Get the version from yt_dlp/version.py without importing the package +def read_version(fname): + with open(fname, encoding='utf-8') as f: + exec(compile(f.read(), fname, 'exec')) + return locals()['__version__'] def version_to_list(version): @@ -80,10 +85,12 @@ def version_to_list(version): def dependency_options(): - dependencies = [pycryptodome_module(), 'mutagen', 'brotli', 'certifi'] + collect_submodules('websockets') - excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] + # Due to the current implementation, these are auto-detected, but explicitly add them just in case + dependencies = [pycryptodome_module(), 'mutagen', 'brotli', 'certifi', 'websockets'] + excluded_modules = ['test', 'ytdlp_plugins', 'youtube_dl', 'youtube_dlc'] yield from (f'--hidden-import={module}' for module in dependencies) + yield '--collect-submodules=websockets' yield from (f'--exclude-module={module}' for module in excluded_modules) diff --git a/setup.py b/setup.py index 141cb238f..89b819b1a 100644 --- a/setup.py +++ b/setup.py @@ -11,18 +11,28 @@ except ImportError: setuptools_available = False from distutils.spawn import spawn + +def read(fname): + with open(fname, encoding='utf-8') as f: + return f.read() + + # Get the version from yt_dlp/version.py without importing the package -exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) +def read_version(fname): + exec(compile(read(fname), fname, 'exec')) + return locals()['__version__'] + +VERSION = read_version('yt_dlp/version.py') DESCRIPTION = 'A youtube-dl fork with additional features and patches' LONG_DESCRIPTION = '\n\n'.join(( 'Official repository: ', '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', - open('README.md', encoding='utf-8').read())) + read('README.md'))) -REQUIREMENTS = open('requirements.txt', encoding='utf-8').read().splitlines() +REQUIREMENTS = read('requirements.txt').splitlines() if sys.argv[1:2] == ['py2exe']: @@ -34,11 +44,11 @@ if sys.argv[1:2] == ['py2exe']: 'console': [{ 'script': './yt_dlp/__main__.py', 'dest_base': 'yt-dlp', - 'version': __version__, + 'version': VERSION, 'description': DESCRIPTION, 'comments': LONG_DESCRIPTION.split('\n')[0], 'product_name': 'yt-dlp', - 'product_version': __version__, + 'product_version': VERSION, }], 'options': { 'py2exe': { @@ -107,7 +117,7 @@ else: setup( name='yt-dlp', - version=__version__, + version=VERSION, maintainer='pukkandan', maintainer_email='pukkandan.ytdlp@gmail.com', description=DESCRIPTION, From 83bfb5e2907ffb00fd54de0720650f5ae7ba03dd Mon Sep 17 00:00:00 2001 From: ekangmonyet <71442331+ekangmonyet@users.noreply.github.com> Date: Thu, 28 Apr 2022 00:44:29 +0800 Subject: [PATCH 0988/2552] [Niconico] Support 2FA (#3559) Authored by: ekangmonyet --- yt_dlp/extractor/niconico.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 353ae1c72..a80b544f8 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -7,8 +7,6 @@ import time from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, compat_HTTPError, ) from ..utils import ( @@ -32,6 +30,7 @@ from ..utils import ( update_url_query, url_or_none, urlencode_postdata, + urljoin, ) @@ -192,7 +191,7 @@ class NiconicoIE(InfoExtractor): self._request_webpage( 'https://account.nicovideo.jp/login', None, note='Acquiring Login session') - urlh = self._request_webpage( + page = self._download_webpage( 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None, note='Logging in', errnote='Unable to log in', data=urlencode_postdata(login_form_strs), @@ -200,14 +199,27 @@ class NiconicoIE(InfoExtractor): 'Referer': 'https://account.nicovideo.jp/login', 'Content-Type': 'application/x-www-form-urlencoded', }) - if urlh is False: - login_ok = False - else: - parts = compat_urllib_parse_urlparse(urlh.geturl()) - if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login': - login_ok = False + if 'oneTimePw' in page: + post_url = self._search_regex( + r']+action=(["\'])(?P.+?)\1', page, 'post url', group='url') + page = self._download_webpage( + urljoin('https://account.nicovideo.jp', post_url), None, + note='Performing MFA', errnote='Unable to complete MFA', + data=urlencode_postdata({ + 'otp': self._get_tfa_info('6 digits code') + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + if 'oneTimePw' in page or 'formError' in page: + err_msg = self._html_search_regex( + r'formError["\']+>(.*?)', page, 'form_error', + default='There\'s an error but the message can\'t be parsed.', + flags=re.DOTALL) + self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"') + return False + login_ok = 'class="notice error"' not in page if not login_ok: - self.report_warning('unable to log in: bad username or password') + self.report_warning('Unable to log in: bad username or password') return login_ok def _get_heartbeat_info(self, info_dict): From 997378f9df7ca25a370e13b265205962e986373b Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Thu, 28 Apr 2022 01:59:45 +0900 Subject: [PATCH 0989/2552] [twitcasting] Pass headers for each formats (#3568) Authored by: Lesmiscore --- yt_dlp/extractor/twitcasting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 07565383a..0dbb97a36 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -187,6 +187,7 @@ class TwitCastingIE(InfoExtractor): infodict = { # No problem here since there's only one manifest 'formats': formats, + 'http_headers': self._M3U8_HEADERS, } else: infodict = { From 779da8e31b411d7bb088f246210eeb608adc314b Mon Sep 17 00:00:00 2001 From: Elyse Date: Wed, 27 Apr 2022 13:01:35 -0500 Subject: [PATCH 0990/2552] [extractor] Update dash `manifest_url` after redirects (#3563) Closes #2696 Authored by: elyse0 --- yt_dlp/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3ee5e257c..c60474c7b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2676,7 +2676,10 @@ class InfoExtractor: mpd_doc, urlh = res if mpd_doc is None: return [], {} - mpd_base_url = base_url(urlh.geturl()) + + # We could have been redirected to a new url when we retrieved our mpd file. + mpd_url = urlh.geturl() + mpd_base_url = base_url(mpd_url) return self._parse_mpd_formats_and_subtitles( mpd_doc, mpd_id, mpd_base_url, mpd_url) From b3602f68245588fbedc23917be2fae2780dacf05 Mon Sep 17 00:00:00 2001 From: Evan Spensley <94762716+evansp@users.noreply.github.com> Date: Wed, 27 Apr 2022 17:30:24 -0400 Subject: [PATCH 0991/2552] [InfoQ] Don't fail on missing audio format (#3573) Closes #3441 Authored by: evansp --- yt_dlp/extractor/infoq.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index abf7d36ef..6b31701eb 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -4,8 +4,10 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + ExtractorError, determine_ext, update_url_query, + traverse_obj, ) from .bokecc import BokeCCBaseIE @@ -34,6 +36,7 @@ class InfoQIE(BokeCCBaseIE): 'ext': 'flv', 'description': 'md5:308d981fb28fa42f49f9568322c683ff', }, + 'skip': 'Sorry, the page you visited does not exist', }, { 'url': 'https://www.infoq.com/presentations/Simple-Made-Easy', 'md5': '0e34642d4d9ef44bf86f66f6399672db', @@ -86,8 +89,10 @@ class InfoQIE(BokeCCBaseIE): }] def _extract_http_audio(self, webpage, video_id): - fields = self._form_hidden_inputs('mp3Form', webpage) - http_audio_url = fields.get('filename') + try: + http_audio_url = traverse_obj(self._form_hidden_inputs('mp3Form', webpage), 'filename') + except ExtractorError: + http_audio_url = None if not http_audio_url: return [] From a076c1f97a2fd42555578741323d215010eea767 Mon Sep 17 00:00:00 2001 From: Elyse Date: Wed, 27 Apr 2022 17:50:01 -0500 Subject: [PATCH 0992/2552] [extractor] Update `manifest_url`s after redirect (#3575) Authored by: elyse0 --- yt_dlp/extractor/common.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index c60474c7b..8c2fd7fea 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1982,17 +1982,19 @@ class InfoExtractor: def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None, data=None, headers={}, query={}): - manifest = self._download_xml( + res = self._download_xml_handle( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244) transform_source=transform_source, fatal=fatal, data=data, headers=headers, query=query) - - if manifest is False: + if res is False: return [] + manifest, urlh = res + manifest_url = urlh.geturl() + return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id, transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id) @@ -2400,12 +2402,14 @@ class InfoExtractor: return '/'.join(out) def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None): - smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) - - if smil is False: + res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) + if res is False: assert not fatal return [], {} + smil, urlh = res + smil_url = urlh.geturl() + namespace = self._parse_smil_namespace(smil) fmts = self._parse_smil_formats( @@ -2422,13 +2426,17 @@ class InfoExtractor: return fmts def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None): - smil = self._download_smil(smil_url, video_id, fatal=fatal) - if smil is False: + res = self._download_smil(smil_url, video_id, fatal=fatal) + if res is False: return {} + + smil, urlh = res + smil_url = urlh.geturl() + return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None): - return self._download_xml( + return self._download_xml_handle( smil_url, video_id, 'Downloading SMIL file', 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source) @@ -2607,11 +2615,15 @@ class InfoExtractor: return subtitles def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True): - xspf = self._download_xml( + res = self._download_xml_handle( xspf_url, playlist_id, 'Downloading xpsf playlist', 'Unable to download xspf manifest', fatal=fatal) - if xspf is False: + if res is False: return [] + + xspf, urlh = res + xspf_url = urlh.geturl() + return self._parse_xspf( xspf, playlist_id, xspf_url=xspf_url, xspf_base_url=base_url(xspf_url)) From 4877f9055c68e5da7d91b03bfb384de79440dc89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Thu, 28 Apr 2022 14:38:36 +0300 Subject: [PATCH 0993/2552] [lrt] Support livestreams (#3555) Authored by: GiedriusS --- yt_dlp/extractor/extractors.py | 7 +++-- yt_dlp/extractor/lrt.py | 55 +++++++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 952738884..070d5cc65 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -640,7 +640,7 @@ from .hungama import ( HungamaAlbumPlaylistIE, ) from .hypem import HypemIE -from .icareus import IcareusIE +from .icareus import IcareusIE from .ichinanalive import ( IchinanaLiveIE, IchinanaLiveClipIE, @@ -814,7 +814,10 @@ from .lnkgo import ( ) from .localnews8 import LocalNews8IE from .lovehomeporn import LoveHomePornIE -from .lrt import LRTIE +from .lrt import ( + LRTVODIE, + LRTStreamIE +) from .lynda import ( LyndaIE, LyndaCourseIE diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index 53076b839..a49fd592f 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -2,16 +2,58 @@ from .common import InfoExtractor from ..utils import ( clean_html, merge_dicts, + traverse_obj, + url_or_none, ) -class LRTIE(InfoExtractor): - IE_NAME = 'lrt.lt' +class LRTBaseIE(InfoExtractor): + def _extract_js_var(self, webpage, var_name, default=None): + return self._search_regex( + fr'{var_name}\s*=\s*(["\'])((?:(?!\1).)+)\1', + webpage, var_name.replace('_', ' '), default, group=2) + + +class LRTStreamIE(LRTBaseIE): + _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/tiesiogiai/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.lrt.lt/mediateka/tiesiogiai/lrt-opus', + 'info_dict': { + 'id': 'lrt-opus', + 'live_status': 'is_live', + 'title': 're:^LRT Opus.+$', + 'ext': 'mp4' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + streams_data = self._download_json(self._extract_js_var(webpage, 'tokenURL'), video_id) + + formats, subtitles = [], {} + for stream_url in traverse_obj(streams_data, ( + 'response', 'data', lambda k, _: k.startswith('content')), expected_type=url_or_none): + fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, 'mp4', m3u8_id='hls', live=True) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + self._sort_formats(formats) + + stream_title = self._extract_js_var(webpage, 'video_title', 'LRT') + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + 'title': f'{self._og_search_title(webpage)} - {stream_title}' + } + + +class LRTVODIE(LRTBaseIE): _VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P/mediateka/irasas/(?P[0-9]+))' _TESTS = [{ # m3u8 download 'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene', - 'md5': '85cb2bb530f31d91a9c65b479516ade4', 'info_dict': { 'id': '2000127261', 'ext': 'mp4', @@ -20,6 +62,8 @@ class LRTIE(InfoExtractor): 'duration': 3035, 'timestamp': 1604079000, 'upload_date': '20201030', + 'tags': ['LRT TELEVIZIJA', 'Beatos virtuvė', 'Beata Nicholson', 'Makaronai', 'Baklažanai', 'Vakarienė', 'Receptas'], + 'thumbnail': 'https://www.lrt.lt/img/2020/10/30/764041-126478-1287x836.jpg' }, }, { # direct mp3 download @@ -36,11 +80,6 @@ class LRTIE(InfoExtractor): }, }] - def _extract_js_var(self, webpage, var_name, default): - return self._search_regex( - r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name, - webpage, var_name.replace('_', ' '), default, group=2) - def _real_extract(self, url): path, video_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, video_id) From 0a5a191a2a33e3b305aaf684576b7129ba5173a0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 27 Apr 2022 21:52:57 +0530 Subject: [PATCH 0994/2552] Improve `--clean-infojson` It should not removes fields that may be needed for `--load-infojson`. Eg: `_ffmpeg_args`, `_has_drm` --- test/test_YoutubeDL.py | 2 +- yt_dlp/YoutubeDL.py | 15 ++++++++------- yt_dlp/downloader/external.py | 7 +++++-- yt_dlp/extractor/common.py | 6 ++++-- yt_dlp/extractor/nbc.py | 2 +- yt_dlp/extractor/radiko.py | 2 +- yt_dlp/extractor/turner.py | 2 +- 7 files changed, 21 insertions(+), 15 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 051a203ac..1133f6165 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -661,7 +661,7 @@ class TestYoutubeDL(unittest.TestCase): 'duration': 100000, 'playlist_index': 1, 'playlist_autonumber': 2, - '_last_playlist_index': 100, + '__last_playlist_index': 100, 'n_entries': 10, 'formats': [{'id': 'id 1'}, {'id': 'id 2'}, {'id': 'id 3'}] } diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9acd88171..eadc5d7ec 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -954,7 +954,7 @@ class YoutubeDL: self.to_screen('Deleting existing file') def raise_no_formats(self, info, forced=False, *, msg=None): - has_drm = info.get('__has_drm') + has_drm = info.get('_has_drm') ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' if forced or not ignored: @@ -1052,7 +1052,7 @@ class YoutubeDL: # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { - 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0), + 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0), 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), 'autonumber': self.params.get('autonumber_size') or 5, } @@ -1764,7 +1764,7 @@ class YoutubeDL: entry['__x_forwarded_for_ip'] = x_forwarded_for extra = { 'n_entries': n_entries, - '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries), + '__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries), 'playlist_count': ie_result.get('playlist_count'), 'playlist_index': playlist_index, 'playlist_autonumber': i, @@ -2436,10 +2436,11 @@ class YoutubeDL: else: formats = info_dict['formats'] - info_dict['__has_drm'] = any(f.get('has_drm') for f in formats) + # or None ensures --clean-infojson removes it + info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None if not self.params.get('allow_unplayable_formats'): formats = [f for f in formats if not f.get('has_drm')] - if info_dict['__has_drm'] and all( + if info_dict['_has_drm'] and all( f.get('acodec') == f.get('vcodec') == 'none' for f in formats): self.report_warning( 'This video is DRM protected and only images are available for download. ' @@ -3266,9 +3267,9 @@ class YoutubeDL: info_dict.setdefault('_type', 'video') if remove_private_keys: - reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in { + reject = lambda k, v: v is None or k.startswith('__') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', - 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', + 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber', } else: reject = lambda k, v: False diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index da38e502d..4fe56bb95 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -20,6 +20,7 @@ from ..utils import ( encodeFilename, handle_youtubedl_headers, remove_end, + traverse_obj, ) @@ -363,9 +364,11 @@ class FFmpegFD(ExternalFD): if not self.params.get('verbose'): args += ['-hide_banner'] - args += info_dict.get('_ffmpeg_args', []) + args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[]) - # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead + # These exists only for compatibility. Extractors should use + # info_dict['downloader_options']['ffmpeg_args'] instead + args += info_dict.get('_ffmpeg_args') seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 8c2fd7fea..63f7b5d4a 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -208,8 +208,10 @@ class InfoExtractor: * no_resume The server does not support resuming the (HTTP or RTMP) download. Boolean. * has_drm The format has DRM and cannot be downloaded. Boolean - * downloader_options A dictionary of downloader options as - described in FileDownloader (For internal use only) + * downloader_options A dictionary of downloader options + (For internal use only) + * http_chunk_size Chunk size for HTTP downloads + * ffmpeg_args Extra arguments for ffmpeg downloader RTMP formats can also have the additional fields: page_url, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, rtmp_protocol, rtmp_real_time diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 8aab80a0f..365c2e60d 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -579,7 +579,7 @@ class NBCOlympicsStreamIE(AdobePassIE): for f in formats: # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to # download with ffmpeg without this option - f['_ffmpeg_args'] = ['-seekable', '0', '-http_seekable', '0', '-icy', '0'] + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']} self._sort_formats(formats) return { diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index a0f5ebdd0..651cfe63b 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -125,7 +125,7 @@ class RadikoBaseIE(InfoExtractor): # Prioritize live radio vs playback based on extractor sf['preference'] = 100 if is_onair else -100 if not is_onair and url_attrib['timefree'] == '1' and time_to_skip: - sf['_ffmpeg_args'] = ['-ss', time_to_skip] + sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]} formats.extend(subformats) self._sort_formats(formats) diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 568b6de49..fae8b51e7 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -141,7 +141,7 @@ class TurnerBaseIE(AdobePassIE): m3u8_id=format_id or 'hls', fatal=False) if '/secure/' in video_url and '?hdnea=' in video_url: for f in m3u8_formats: - f['_ffmpeg_args'] = ['-seekable', '0'] + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0']} formats.extend(m3u8_formats) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( From 59f943cd5097e9bdbc3cb3e6b5675e43d369341a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 28 Apr 2022 19:11:04 +0530 Subject: [PATCH 0995/2552] [utils] `write_string`: Workaround newline issue in `conhost` On windows `conhost`, when `WINDOWS_VT_MODE` is enabled, `\n` is not actually sent if the window is exactly the length of printed line, and the line does not end with a white-space character. So the line-break disappears when resizing the window. Fixes #1863 --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/utils.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index eadc5d7ec..4351699b6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3580,7 +3580,7 @@ class YoutubeDL: def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) if not supports_terminal_sequences(stream): - from .compat import WINDOWS_VT_MODE + from .compat import WINDOWS_VT_MODE # Must be imported locally ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)' return ret diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 0171394fc..7faee62ac 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1851,6 +1851,10 @@ def write_string(s, out=None, encoding=None): assert isinstance(s, str) out = out or sys.stderr + from .compat import WINDOWS_VT_MODE # Must be imported locally + if WINDOWS_VT_MODE: + s = s.replace('\n', ' \n') + if 'b' in getattr(out, 'mode', ''): byt = s.encode(encoding or preferredencoding(), 'ignore') out.write(byt) From 492272fed630e3cd4e7649afc03f4084e58df174 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 28 Apr 2022 20:03:26 +0530 Subject: [PATCH 0996/2552] `--match-filter -` to interactively ask for each video --- README.md | 4 +++- yt_dlp/YoutubeDL.py | 15 ++++++++++++++- yt_dlp/minicurses.py | 1 + yt_dlp/options.py | 3 ++- yt_dlp/utils.py | 12 ++++++++---- 5 files changed, 28 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index d401acb21..ca931aba3 100644 --- a/README.md +++ b/README.md @@ -451,7 +451,9 @@ You can also fork the project on github and run your fork's [build workflow](.gi those that have a like count more than 100 (or the like field is not available) and also has a description that contains the - phrase "cats & dogs" (ignoring case) + phrase "cats & dogs" (ignoring case). Use + "--match-filter -" to interactively ask + whether to download each video --no-match-filter Do not use generic video filter (default) --no-playlist Download only the video, if the URL refers to a video and a playlist diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4351699b6..78345f87a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -413,6 +413,8 @@ class YoutubeDL: every video. If it returns a message, the video is ignored. If it returns None, the video is downloaded. + If it returns utils.NO_DEFAULT, the user is interactively + asked whether to download the video. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For @@ -878,6 +880,7 @@ class YoutubeDL: Styles = Namespace( HEADERS='yellow', EMPHASIS='light blue', + FILENAME='green', ID='green', DELIM='blue', ERROR='red', @@ -1303,7 +1306,17 @@ class YoutubeDL: except TypeError: # For backward compatibility ret = None if incomplete else match_filter(info_dict) - if ret is not None: + if ret is NO_DEFAULT: + while True: + filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) + reply = input(self._format_screen( + f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() + if reply in {'y', ''}: + return None + elif reply == 'n': + return f'Skipping {video_title}' + return True + elif ret is not None: return ret return None diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index 9fd679a48..a867fd289 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -69,6 +69,7 @@ def format_text(text, f): raise SyntaxError(f'Invalid format {" ".join(tokens)!r} in {f!r}') if fg_color or bg_color: + text = text.replace(CONTROL_SEQUENCES['RESET'], f'{fg_color}{bg_color}') return f'{fg_color}{bg_color}{text}{CONTROL_SEQUENCES["RESET"]}' else: return text diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 73bc88b89..725ab89db 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -471,7 +471,8 @@ def create_parser(): '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" ' 'matches only videos that are not live OR those that have a like count more than 100 ' '(or the like field is not available) and also has a description ' - 'that contains the phrase "cats & dogs" (ignoring case)')) + 'that contains the phrase "cats & dogs" (ignoring case). ' + 'Use "--match-filter -" to interactively ask whether to download each video')) selection.add_option( '--no-match-filter', metavar='FILTER', dest='match_filter', action='store_const', const=None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 7faee62ac..0612139e0 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3407,11 +3407,15 @@ def match_str(filter_str, dct, incomplete=False): def match_filter_func(filters): if not filters: return None - filters = variadic(filters) + filters = set(variadic(filters)) - def _match_func(info_dict, *args, **kwargs): - if any(match_str(f, info_dict, *args, **kwargs) for f in filters): - return None + interactive = '-' in filters + if interactive: + filters.remove('-') + + def _match_func(info_dict, incomplete=False): + if not filters or any(match_str(f, info_dict, incomplete) for f in filters): + return NO_DEFAULT if interactive and not incomplete else None else: video_title = info_dict.get('title') or info_dict.get('id') or 'video' filter_str = ') | ('.join(map(str.strip, filters)) From e1e1ea54ae8c92b9a796ee103eb20a6b949e437f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 28 Apr 2022 22:16:23 +0530 Subject: [PATCH 0997/2552] [build] Fix `--onedir` on macOS Closes #3584 --- pyinst.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyinst.py b/pyinst.py index bc3c58ff8..af80c1812 100644 --- a/pyinst.py +++ b/pyinst.py @@ -33,9 +33,9 @@ def main(): if not onedir and '-F' not in opts and '--onefile' not in opts: opts.append('--onefile') - suffix = '_macos' if OS_NAME == 'Darwin' else '_x86' if ARCH == '32' else '' - final_file = 'dist/%syt-dlp%s%s' % ( - 'yt-dlp/' if onedir else '', suffix, '.exe' if OS_NAME == 'Windows' else '') + name = 'yt-dlp%s' % ('_macos' if OS_NAME == 'Darwin' else '_x86' if ARCH == '32' else '') + final_file = ''.join(( + 'dist/', f'{name}/' if onedir else '', name, '.exe' if OS_NAME == 'Windows' else '')) print(f'Building yt-dlp v{version} {ARCH}bit for {OS_NAME} with options {opts}') print('Remember to update the version using "devscripts/update-version.py"') @@ -45,7 +45,7 @@ def main(): print(f'Destination: {final_file}\n') opts = [ - f'--name=yt-dlp{suffix}', + f'--name={name}', '--icon=devscripts/logo.ico', '--upx-exclude=vcruntime140.dll', '--noconfirm', From 0a41f331cc3e06007b8d1abe104da196c565b505 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 29 Apr 2022 06:49:57 +0530 Subject: [PATCH 0998/2552] [doc] Minor improvements Closes #3518, Closes #3560 --- .github/PULL_REQUEST_TEMPLATE.md | 19 +++++++++-------- README.md | 36 +++++++++++++++++++++++++------- setup.py | 2 +- yt_dlp/YoutubeDL.py | 14 +++++++------ yt_dlp/extractor/youtube.py | 2 +- yt_dlp/options.py | 4 ++-- 6 files changed, 51 insertions(+), 26 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 684bf59e9..14d4da52e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,28 +1,29 @@ -## Please follow the guide below + ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests -- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) +- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) ### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) - [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) ### What is the purpose of your *pull request*? -- [ ] Bug fix -- [ ] Improvement -- [ ] New extractor -- [ ] New feature +- [ ] Fix or improvement to an extractor (Make sure to add/update tests) +- [ ] New extractor ([Piracy websites will not be accepted](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy)) +- [ ] Core bug fix/improvement +- [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes)) --- ### Description of your *pull request* and other information -Explanation of your *pull request* in arbitrary form goes here. Please make sure the description explains the purpose and effect of your *pull request* and is worded well enough to be understood. Provide as much context and examples as possible. +Explanation of your *pull request* in arbitrary form goes here. Please **make sure the description explains the purpose and effect** of your *pull request* and is worded well enough to be understood. Provide as much **context and examples** as possible. diff --git a/README.md b/README.md index ca931aba3..556977dfa 100644 --- a/README.md +++ b/README.md @@ -270,7 +270,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**mutagen**](https://github.com/quodlibet/mutagen)\* - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) * [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE) -* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) +* [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen/ffmpeg cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) @@ -282,7 +282,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly To use or redistribute the dependencies, you must agree to their respective licensing terms. -The Windows and MacOS standalone release binaries are already built with the python interpreter and all optional python packages (marked with \*) included. +The Windows and MacOS standalone release binaries are built with the Python interpreter and the packages marked with \* included. **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds @@ -533,10 +533,10 @@ You can also fork the project on github and run your fork's [build workflow](.gi (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. Currently supports native, aria2c, avconv, axel, curl, ffmpeg, httpie, - wget (Recommended: aria2c). You can use - this option multiple times to set different - downloaders for different protocols. For - example, --downloader aria2c --downloader + wget. You can use this option multiple + times to set different downloaders for + different protocols. For example, + --downloader aria2c --downloader "dash,m3u8:native" will use aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads (Alias: @@ -1801,7 +1801,7 @@ import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] ydl_opts = { - 'format': 'm4a/bestaudio/best' + 'format': 'm4a/bestaudio/best', # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments 'postprocessors': [{ # Extract audio using ffmpeg 'key': 'FFmpegExtractAudio', @@ -1812,6 +1812,28 @@ ydl_opts = { with yt_dlp.YoutubeDL(ydl_opts) as ydl: error_code = ydl.download(URLS) ``` + +#### Filter videos + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +def longer_than_a_minute(info, *, incomplete): + """Download only videos longer than a minute (or with unknown duration)""" + duration = info.get('duration') + if duration and duration < 60: + return 'The video is too short' + +ydl_opts = { + 'match_filter': longer_than_a_minute, +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + error_code = ydl.download(URLS) +``` + #### Adding logger and progress hook ```python diff --git a/setup.py b/setup.py index 89b819b1a..adcc42a1c 100644 --- a/setup.py +++ b/setup.py @@ -127,7 +127,7 @@ setup( packages=packages, install_requires=REQUIREMENTS, project_urls={ - 'Documentation': 'https://yt-dlp.readthedocs.io', + 'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme', 'Source': 'https://github.com/yt-dlp/yt-dlp', 'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues', 'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators', diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 78345f87a..2857e9106 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -409,12 +409,14 @@ class YoutubeDL: sleep_interval_subtitles: Number of seconds to sleep before each subtitle download listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. - match_filter: A function that gets called with the info_dict of - every video. - If it returns a message, the video is ignored. - If it returns None, the video is downloaded. - If it returns utils.NO_DEFAULT, the user is interactively - asked whether to download the video. + match_filter: A function that gets called for every video with the signature + (info_dict, *, incomplete: bool) -> Optional[str] + For backward compatibility with youtube-dl, the signature + (info_dict) -> Optional[str] is also allowed. + - If it returns a message, the video is ignored. + - If it returns None, the video is downloaded. + - If it returns utils.NO_DEFAULT, the user is interactively + asked whether to download the video. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7da54e088..210e5b36c 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -287,7 +287,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # invidious-redirect websites r'(?:www\.)?redirect\.invidious\.io', r'(?:(?:www|dev)\.)?invidio\.us', - # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md + # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md r'(?:www\.)?invidious\.pussthecat\.org', r'(?:www\.)?invidious\.zee\.li', r'(?:www\.)?invidious\.ethibox\.fr', diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 725ab89db..a62681cbc 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -814,11 +814,11 @@ def create_parser(): }, help=( 'Name or path of the external downloader to use (optionally) prefixed by ' 'the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. ' - 'Currently supports native, %s (Recommended: aria2c). ' + f'Currently supports native, {", ".join(list_external_downloaders())}. ' 'You can use this option multiple times to set different downloaders for different protocols. ' 'For example, --downloader aria2c --downloader "dash,m3u8:native" will use ' 'aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads ' - '(Alias: --external-downloader)' % ', '.join(list_external_downloaders()))) + '(Alias: --external-downloader)')) downloader.add_option( '--downloader-args', '--external-downloader-args', metavar='NAME:ARGS', dest='external_downloader_args', default={}, type='str', From 1d485a1a799bbeeb2faea0595676ca7d4c0f3716 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 29 Apr 2022 07:18:36 +0530 Subject: [PATCH 0999/2552] [cleanup] Misc fixes Closes #3565, https://github.com/yt-dlp/yt-dlp/issues/3514#issuecomment-1105944364 --- devscripts/lazy_load_template.py | 2 +- yt_dlp/YoutubeDL.py | 18 ++++++++++-------- yt_dlp/compat/__init__.py | 4 ---- yt_dlp/compat/_deprecated.py | 5 +++++ yt_dlp/compat/asyncio.py | 1 - yt_dlp/compat/compat_utils.py | 22 +++++++++++++++++++--- yt_dlp/compat/re.py | 1 - yt_dlp/dependencies.py | 11 +++++++++++ yt_dlp/downloader/common.py | 6 ++++-- yt_dlp/downloader/external.py | 2 +- yt_dlp/downloader/fragment.py | 12 ++++-------- yt_dlp/downloader/mhtml.py | 2 +- yt_dlp/extractor/common.py | 3 +-- yt_dlp/extractor/fujitv.py | 6 +++--- yt_dlp/extractor/funimation.py | 3 +++ yt_dlp/extractor/youtube.py | 2 +- yt_dlp/postprocessor/embedthumbnail.py | 4 ++-- yt_dlp/postprocessor/xattrpp.py | 9 ++++++--- yt_dlp/utils.py | 4 +++- 19 files changed, 75 insertions(+), 42 deletions(-) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 0058915ae..e4b4f5825 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -7,7 +7,7 @@ class LazyLoadMetaClass(type): def __getattr__(cls, name): if '_real_class' not in cls.__dict__: write_string( - f'WARNING: Falling back to normal extractor since lazy extractor ' + 'WARNING: Falling back to normal extractor since lazy extractor ' f'{cls.__name__} does not have attribute {name}{bug_reports_message()}') return getattr(cls._get_real_class(), name) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2857e9106..1e61be733 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -62,6 +62,7 @@ from .utils import ( DEFAULT_OUTTMPL, LINK_TEMPLATES, NO_DEFAULT, + NUMBER_RE, OUTTMPL_TYPES, POSTPROCESS_WHEN, STR_FORMAT_RE_TMPL, @@ -1049,7 +1050,7 @@ class YoutubeDL: formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None else None) - info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads + info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads) info_dict['video_autonumber'] = self._num_videos if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) @@ -1071,18 +1072,18 @@ class YoutubeDL: # Field is of the form key1.key2... # where keys (except first) can be string, int or slice FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') - MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') + MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) - INTERNAL_FORMAT_RE = re.compile(r'''(?x) + INTERNAL_FORMAT_RE = re.compile(rf'''(?x) (?P-)? - (?P{field}) - (?P(?:{math_op}{math_field})*) + (?P{FIELD_RE}) + (?P(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*) (?:>(?P.+?))? (?P (?P(?.*?))? (?:\|(?P.*?))? - )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) + )$''') def _traverse_infodict(k): k = k.split('.') @@ -2336,7 +2337,7 @@ class YoutubeDL: video_id=info_dict['id'], ie=info_dict['extractor']) elif not info_dict.get('title'): self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') - info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' + info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}' if info_dict.get('duration') is not None: info_dict['duration_string'] = formatSeconds(info_dict['duration']) @@ -3669,10 +3670,11 @@ class YoutubeDL: ) or 'none' write_debug('exe versions: %s' % exe_str) + from .compat.compat_utils import get_package_info from .dependencies import available_dependencies write_debug('Optional libraries: %s' % (', '.join(sorted({ - module.__name__.split('.')[0] for module in available_dependencies.values() + join_nonempty(*get_package_info(m)) for m in available_dependencies.values() })) or 'none')) self._setup_opener() diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index 3c395f6d9..a0cd62110 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -46,10 +46,6 @@ def compat_ord(c): return c if isinstance(c, int) else ord(c) -def compat_setenv(key, value, env=os.environ): - env[key] = value - - if compat_os_name == 'nt' and sys.version_info < (3, 8): # os.path.realpath on Windows does not follow symbolic links # prior to Python 3.8 (see https://bugs.python.org/issue9949) diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py index f84439825..390f76577 100644 --- a/yt_dlp/compat/_deprecated.py +++ b/yt_dlp/compat/_deprecated.py @@ -44,4 +44,9 @@ compat_urllib_parse_urlparse = urllib.parse.urlparse compat_urllib_request = urllib.request compat_urlparse = compat_urllib_parse = urllib.parse + +def compat_setenv(key, value, env=os.environ): + env[key] = value + + __all__ = [x for x in globals() if x.startswith('compat_')] diff --git a/yt_dlp/compat/asyncio.py b/yt_dlp/compat/asyncio.py index f80dc192d..c61e5c8fd 100644 --- a/yt_dlp/compat/asyncio.py +++ b/yt_dlp/compat/asyncio.py @@ -1,5 +1,4 @@ # flake8: noqa: F405 - from asyncio import * # noqa: F403 from .compat_utils import passthrough_module diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py index 938daf926..b1d58f5b9 100644 --- a/yt_dlp/compat/compat_utils.py +++ b/yt_dlp/compat/compat_utils.py @@ -1,9 +1,28 @@ +import collections import contextlib import importlib import sys import types +_NO_ATTRIBUTE = object() + +_Package = collections.namedtuple('Package', ('name', 'version')) + + +def get_package_info(module): + parent = module.__name__.split('.')[0] + parent_module = None + with contextlib.suppress(ImportError): + parent_module = importlib.import_module(parent) + + for attr in ('__version__', 'version_string', 'version'): + version = getattr(parent_module, attr, None) + if version is not None: + break + return _Package(getattr(module, '_yt_dlp__identifier', parent), str(version)) + + def _is_package(module): try: module.__getattribute__('__path__') @@ -12,9 +31,6 @@ def _is_package(module): return True -_NO_ATTRIBUTE = object() - - def passthrough_module(parent, child, *, callback=lambda _: None): parent_module = importlib.import_module(parent) child_module = importlib.import_module(child, parent) diff --git a/yt_dlp/compat/re.py b/yt_dlp/compat/re.py index d4532950a..e1d3a2645 100644 --- a/yt_dlp/compat/re.py +++ b/yt_dlp/compat/re.py @@ -1,5 +1,4 @@ # flake8: noqa: F405 - from re import * # F403 from .compat_utils import passthrough_module diff --git a/yt_dlp/dependencies.py b/yt_dlp/dependencies.py index 99cc6e29c..a4c2e5f06 100644 --- a/yt_dlp/dependencies.py +++ b/yt_dlp/dependencies.py @@ -1,4 +1,6 @@ # flake8: noqa: F401 +"""Imports all optional dependencies for the project. +An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambigious namespace""" try: import brotlicffi as brotli @@ -28,6 +30,15 @@ except ImportError: from Crypto.Cipher import AES as Cryptodome_AES except ImportError: Cryptodome_AES = None + else: + try: + # In pycrypto, mode defaults to ECB. See: + # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode + Cryptodome_AES.new(b'abcdefghijklmnop') + except TypeError: + pass + else: + Cryptodome_AES._yt_dlp__identifier = 'pycrypto' try: diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 022a9cd17..d79863300 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -12,6 +12,7 @@ from ..minicurses import ( QuietMultilinePrinter, ) from ..utils import ( + NUMBER_RE, LockingUnsupportedError, Namespace, decodeArgument, @@ -91,7 +92,8 @@ class FileDownloader: 'trouble', 'write_debug', ): - setattr(self, func, getattr(ydl, func)) + if not hasattr(self, func): + setattr(self, func, getattr(ydl, func)) def to_screen(self, *args, **kargs): self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) @@ -170,7 +172,7 @@ class FileDownloader: @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" - matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) + matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 4fe56bb95..4f9f8f6e5 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -368,7 +368,7 @@ class FFmpegFD(ExternalFD): # These exists only for compatibility. Extractors should use # info_dict['downloader_options']['ffmpeg_args'] instead - args += info_dict.get('_ffmpeg_args') + args += info_dict.get('_ffmpeg_args') or [] seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 390c840bb..451e3cc2f 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -1,3 +1,4 @@ +import concurrent.futures import contextlib import http.client import json @@ -5,12 +6,6 @@ import math import os import time -try: - import concurrent.futures - can_threaded_download = True -except ImportError: - can_threaded_download = False - from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 @@ -28,6 +23,8 @@ class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass + console_title = to_screen + def report_retry(self, err, count, retries): super().to_screen( f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') @@ -501,8 +498,7 @@ class FragmentFD(FileDownloader): max_workers = math.ceil( self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) - if can_threaded_download and max_workers > 1: - + if max_workers > 1: def _download_fragment(fragment): ctx_copy = ctx.copy() download_fragment(fragment, ctx_copy) diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index 7bc3ab049..8a6619960 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -173,7 +173,7 @@ body > figure > img { mime_type = b'image/png' if frag_content.startswith((b'GIF87a', b'GIF89a')): mime_type = b'image/gif' - if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP': + if frag_content.startswith(b'RIFF') and frag_content[8:12] == b'WEBP': mime_type = b'image/webp' frag_header = io.BytesIO() diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 63f7b5d4a..441d8a136 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1922,8 +1922,7 @@ class InfoExtractor: def _sort_formats(self, formats, field_preference=[]): if not formats: return - format_sort = self.FormatSort(self, field_preference) - formats.sort(key=lambda f: format_sort.calculate_preference(f)) + formats.sort(key=self.FormatSort(self, field_preference).calculate_preference) def _check_formats(self, formats, video_id): if formats: diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py index 15d75a972..f66149d2c 100644 --- a/yt_dlp/extractor/fujitv.py +++ b/yt_dlp/extractor/fujitv.py @@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor): 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076', 'info_dict': { 'id': '5d40110076', - 'ext': 'mp4', + 'ext': 'ts', 'title': '#1318 『まる子、まぼろしの洋館を見る』の巻', 'series': 'ちびまる子ちゃん', 'series_id': '5d40', @@ -28,7 +28,7 @@ class FujiTVFODPlus7IE(InfoExtractor): 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083', 'info_dict': { 'id': '5d40810083', - 'ext': 'mp4', + 'ext': 'ts', 'title': '#1324 『まる子とオニの子』の巻/『結成!2月をムダにしない会』の巻', 'description': 'md5:3972d900b896adc8ab1849e310507efa', 'series': 'ちびまる子ちゃん', @@ -51,7 +51,7 @@ class FujiTVFODPlus7IE(InfoExtractor): for src in src_json['video_selector']: if not src.get('url'): continue - fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4') + fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts') for f in fmt: f.update(dict(zip(('height', 'width'), self._BITRATE_MAP.get(f.get('tbr'), ())))) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 1e3309605..12cacd3b4 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -242,6 +242,9 @@ class FunimationIE(FunimationBaseIE): 'language_preference': language_preference(lang.lower()), }) formats.extend(current_formats) + if not formats and (requested_languages or requested_versions): + self.raise_no_formats( + 'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id) self._remove_duplicate_formats(formats) self._sort_formats(formats, ('lang', 'source')) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 210e5b36c..078f49696 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3107,7 +3107,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) except ExtractorError as e: self.report_warning( - f'nsig extraction failed: You may experience throttling for some formats\n' + 'nsig extraction failed: You may experience throttling for some formats\n' f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True) throttled = True diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index caa841b2e..207be776e 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -79,9 +79,9 @@ class EmbedThumbnailPP(FFmpegPostProcessor): original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath'] - thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:] # Convert unsupported thumbnail formats (see #25687, #25717) # PNG is preferred since JPEG is lossy + thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:] if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'): thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png') thumbnail_ext = 'png' @@ -100,7 +100,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): elif info['ext'] in ['mkv', 'mka']: options = list(self.stream_copy_opts()) - mimetype = 'image/%s' % ('jpeg' if thumbnail_ext in ('jpg', 'jpeg') else thumbnail_ext) + mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}' old_stream, new_stream = self.get_stream_number( filename, ('tags', 'mimetype'), mimetype) if old_stream is not None: diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index 3c431941b..d6ac9b876 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -1,3 +1,5 @@ +import os + from .common import PostProcessor from ..compat import compat_os_name from ..utils import ( @@ -28,6 +30,7 @@ class XAttrMetadataPP(PostProcessor): self.to_screen('Writing metadata to file\'s xattrs') filename = info['filepath'] + mtime = os.stat(filename).st_mtime try: xattr_mapping = { @@ -53,8 +56,6 @@ class XAttrMetadataPP(PostProcessor): write_xattr(filename, xattrname, byte_value) num_written += 1 - return [], info - except XAttrUnavailableError as e: raise PostProcessingError(str(e)) @@ -73,4 +74,6 @@ class XAttrMetadataPP(PostProcessor): else: msg += '(You may have to enable them in your /etc/fstab)' raise PostProcessingError(str(e)) - return [], info + + self.try_utime(filename, mtime, mtime) + return [], info diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 0612139e0..35426568b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -245,6 +245,8 @@ DATE_FORMATS_MONTH_FIRST.extend([ PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" JSON_LD_RE = r'(?is)]+type=(["\']?)application/ld\+json\1[^>]*>(?P.+?)' +NUMBER_RE = r'\d+(?:\.\d+)?' + def preferredencoding(): """Get preferred encoding. @@ -3427,7 +3429,7 @@ def parse_dfxp_time_expr(time_expr): if not time_expr: return - mobj = re.match(r'^(?P\d+(?:\.\d+)?)s?$', time_expr) + mobj = re.match(rf'^(?P{NUMBER_RE})s?$', time_expr) if mobj: return float(mobj.group('time_offset')) From bfec31bec8bff7d5ca0625a52359b48517089430 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 29 Apr 2022 00:31:34 +0530 Subject: [PATCH 1000/2552] [youtube] De-prioritize auto-generated thumbnails Closes #3112 --- yt_dlp/extractor/youtube.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 078f49696..037d1d967 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3402,13 +3402,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): original_thumbnails = thumbnails.copy() # The best resolution thumbnails sometimes does not appear in the webpage - # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 + # See: https://github.com/yt-dlp/yt-dlp/issues/340 # List of possible thumbnails - Ref: thumbnail_names = [ - 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3', - 'hqdefault', 'hq1', 'hq2', 'hq3', '0', - 'mqdefault', 'mq1', 'mq2', 'mq3', - 'default', '1', '2', '3' + # While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants + # in resolution, these are not the custom thumbnail. So de-prioritize them + 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', + 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3' ] n_thumbnail_names = len(thumbnail_names) thumbnails.extend({ From 07689fc149698d74368eeccfe583824a963b973d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 29 Apr 2022 05:57:50 +0530 Subject: [PATCH 1001/2552] [reddit] Prevent infinite loop Closes #3588 --- yt_dlp/extractor/reddit.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index a042a59cc..aabc8dba9 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -1,4 +1,5 @@ import random +from urllib.parse import urlparse from .common import InfoExtractor from ..utils import ( @@ -19,6 +20,7 @@ class RedditIE(InfoExtractor): 'info_dict': { 'id': 'zv89llsvexdz', 'ext': 'mp4', + 'display_id': '6rrwyj', 'title': 'That small heart attack.', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnails': 'count:4', @@ -158,6 +160,15 @@ class RedditIE(InfoExtractor): 'duration': int_or_none(reddit_video.get('duration')), } + parsed_url = urlparse(video_url) + if parsed_url.netloc == 'v.redd.it': + self.raise_no_formats('This video is processing', expected=True, video_id=video_id) + return { + **info, + 'id': parsed_url.path.split('/')[1], + 'display_id': video_id, + } + # Not hosted on reddit, must continue extraction return { **info, From 94aa064497122084c68f5f366c4c0ad5ea082485 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Sun, 1 May 2022 00:38:30 +0900 Subject: [PATCH 1002/2552] [utils] YoutubeDLCookieJar: Detect and reject JSON file (#3599) Authored by: Lesmiscore --- yt_dlp/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 35426568b..3b75ab6b3 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1507,6 +1507,10 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): try: cf.write(prepare_line(line)) except compat_cookiejar.LoadError as e: + if f'{line.strip()} '[0] in '[{"': + raise compat_cookiejar.LoadError( + 'Cookies file must be Netscape formatted, not JSON. See ' + 'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl') write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') continue cf.seek(0) From 43d7f5a5d0c77556156a3f8caa6976d3908a1e38 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 May 2022 04:58:26 +0530 Subject: [PATCH 1003/2552] [EmbedThumbnail] Do not obey `-k` --- yt_dlp/YoutubeDL.py | 21 +++++++++++++-------- yt_dlp/postprocessor/common.py | 6 ++++++ yt_dlp/postprocessor/embedthumbnail.py | 14 ++++++-------- yt_dlp/postprocessor/ffmpeg.py | 7 +++---- yt_dlp/postprocessor/modify_chapters.py | 2 +- 5 files changed, 29 insertions(+), 21 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1e61be733..cc36e2c9c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3307,6 +3307,17 @@ class YoutubeDL: ''' Alias of sanitize_info for backward compatibility ''' return YoutubeDL.sanitize_info(info_dict, actually_filter) + def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): + for filename in set(filter(None, files_to_delete)): + if msg: + self.to_screen(msg % filename) + try: + os.remove(filename) + except OSError: + self.report_warning(f'Unable to delete file {filename}') + if filename in info.get('__files_to_move', []): # NB: Delete even if None + del info['__files_to_move'][filename] + @staticmethod def post_extract(info_dict): def actual_post_extract(info_dict): @@ -3339,14 +3350,8 @@ class YoutubeDL: for f in files_to_delete: infodict['__files_to_move'].setdefault(f, '') else: - for old_filename in set(files_to_delete): - self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) - try: - os.remove(encodeFilename(old_filename)) - except OSError: - self.report_warning('Unable to remove downloaded original file') - if old_filename in infodict['__files_to_move']: - del infodict['__files_to_move'][old_filename] + self._delete_downloaded_files( + *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)') return infodict def run_all_pps(self, key, info, *, additional_pps=None): diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index 519d06138..1d11e82a2 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -92,6 +92,12 @@ class PostProcessor(metaclass=PostProcessorMetaClass): if self._downloader: return self._downloader.write_debug(text, *args, **kwargs) + def _delete_downloaded_files(self, *files_to_delete, **kwargs): + if not self._downloader: + for filename in set(filter(None, files_to_delete)): + os.remove(filename) + return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs) + def get_param(self, name, default=None, *args, **kwargs): if self._downloader: return self._downloader.params.get(name, default, *args, **kwargs) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 207be776e..d36e0008e 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -220,11 +220,9 @@ class EmbedThumbnailPP(FFmpegPostProcessor): os.replace(temp_filename, filename) self.try_utime(filename, mtime, mtime) - - files_to_delete = [thumbnail_filename] - if self._already_have_thumbnail: - if original_thumbnail == thumbnail_filename: - files_to_delete = [] - elif original_thumbnail != thumbnail_filename: - files_to_delete.append(original_thumbnail) - return files_to_delete, info + converted = original_thumbnail != thumbnail_filename + self._delete_downloaded_files( + thumbnail_filename if converted or not self._already_have_thumbnail else None, + original_thumbnail if converted and not self._already_have_thumbnail else None, + info=info) + return [], info diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index bb7a630c6..d1d8e1687 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -374,7 +374,7 @@ class FFmpegPostProcessor(PostProcessor): self.real_run_ffmpeg( [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])], [(out_file, out_flags)]) - os.remove(concat_file) + self._delete_downloaded_files(concat_file) @classmethod def _concat_spec(cls, in_files, concat_opts=None): @@ -701,8 +701,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): self.run_ffmpeg_multiple_files( (filename, metadata_filename), temp_filename, itertools.chain(self._options(info['ext']), *options)) - for file in filter(None, files_to_delete): - os.remove(file) # Don't obey --keep-files + self._delete_downloaded_files(*files_to_delete) os.replace(temp_filename, filename) return [], info @@ -1049,7 +1048,7 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())]) if in_file != info['filepath']: - os.remove(in_file) + self._delete_downloaded_files(in_file, msg=None) return [], info diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 7e2c23288..8a2ef9065 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -314,7 +314,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): self.to_screen(f'Removing chapters from {filename}') self.concat_files([in_file] * len(concat_opts), out_file, concat_opts) if in_file != filename: - os.remove(in_file) + self._delete_downloaded_files(in_file, msg=None) return out_file @staticmethod From 6f7563beb7509aba2f8b1f03fd37e52427dcfecb Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 May 2022 04:46:05 +0530 Subject: [PATCH 1004/2552] [XAttrMetadata] Refactor and document dependencies --- README.md | 1 + yt_dlp/dependencies.py | 9 +++ yt_dlp/options.py | 2 +- yt_dlp/postprocessor/xattrpp.py | 76 +++++++++------------ yt_dlp/utils.py | 113 ++++++++++++-------------------- 5 files changed, 82 insertions(+), 119 deletions(-) diff --git a/README.md b/README.md index 556977dfa..dc1fad5b3 100644 --- a/README.md +++ b/README.md @@ -273,6 +273,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT [1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) +* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata on Linux. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen/ffmpeg cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) diff --git a/yt_dlp/dependencies.py b/yt_dlp/dependencies.py index a4c2e5f06..772cfb576 100644 --- a/yt_dlp/dependencies.py +++ b/yt_dlp/dependencies.py @@ -75,6 +75,15 @@ except (ImportError, SyntaxError): websockets = None +try: + import xattr # xattr or pyxattr +except ImportError: + xattr = None +else: + if hasattr(xattr, 'set'): # pyxattr + xattr._yt_dlp__identifier = 'pyxattr' + + all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')} diff --git a/yt_dlp/options.py b/yt_dlp/options.py index a62681cbc..c03f69319 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1422,7 +1422,7 @@ def create_parser(): dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3, help='Replace text in a metadata field using the given regex. This option can be used multiple times') postproc.add_option( - '--xattrs', + '--xattrs', '--xattr', action='store_true', dest='xattrs', default=False, help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)') postproc.add_option( diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index d6ac9b876..065ddf963 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -12,68 +12,52 @@ from ..utils import ( class XAttrMetadataPP(PostProcessor): - # - # More info about extended attributes for media: - # http://freedesktop.org/wiki/CommonExtendedAttributes/ - # http://www.freedesktop.org/wiki/PhreedomDraft/ - # http://dublincore.org/documents/usageguide/elements.shtml - # - # TODO: - # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) - # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' - # + """Set extended attributes on downloaded file (if xattr support is found) + + More info about extended attributes for media: + http://freedesktop.org/wiki/CommonExtendedAttributes/ + http://www.freedesktop.org/wiki/PhreedomDraft/ + http://dublincore.org/documents/usageguide/elements.shtml + + TODO: + * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) + * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' + """ + + XATTR_MAPPING = { + 'user.xdg.referrer.url': 'webpage_url', + # 'user.xdg.comment': 'description', + 'user.dublincore.title': 'title', + 'user.dublincore.date': 'upload_date', + 'user.dublincore.description': 'description', + 'user.dublincore.contributor': 'uploader', + 'user.dublincore.format': 'format', + } def run(self, info): - """ Set extended attributes on downloaded file (if xattr support is found). """ - - # Write the metadata to the file's xattrs + mtime = os.stat(info['filepath']).st_mtime self.to_screen('Writing metadata to file\'s xattrs') - - filename = info['filepath'] - mtime = os.stat(filename).st_mtime - try: - xattr_mapping = { - 'user.xdg.referrer.url': 'webpage_url', - # 'user.xdg.comment': 'description', - 'user.dublincore.title': 'title', - 'user.dublincore.date': 'upload_date', - 'user.dublincore.description': 'description', - 'user.dublincore.contributor': 'uploader', - 'user.dublincore.format': 'format', - } - - num_written = 0 - for xattrname, infoname in xattr_mapping.items(): - + for xattrname, infoname in self.XATTR_MAPPING.items(): value = info.get(infoname) - if value: if infoname == 'upload_date': value = hyphenate_date(value) - - byte_value = value.encode('utf-8') - write_xattr(filename, xattrname, byte_value) - num_written += 1 + write_xattr(info['filepath'], xattrname, value.encode('utf-8')) except XAttrUnavailableError as e: raise PostProcessingError(str(e)) - except XAttrMetadataError as e: if e.reason == 'NO_SPACE': self.report_warning( 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' - + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize()) + 'Some extended attributes are not written') elif e.reason == 'VALUE_TOO_LONG': - self.report_warning( - 'Unable to write extended attributes due to too long values.') + self.report_warning('Unable to write extended attributes due to too long values.') else: - msg = 'This filesystem doesn\'t support extended attributes. ' - if compat_os_name == 'nt': - msg += 'You need to use NTFS.' - else: - msg += '(You may have to enable them in your /etc/fstab)' - raise PostProcessingError(str(e)) + tip = ('You need to use NTFS' if compat_os_name == 'nt' + else 'You may have to enable them in your "/etc/fstab"') + raise PostProcessingError(f'This filesystem doesn\'t support extended attributes. {tip}') - self.try_utime(filename, mtime, mtime) + self.try_utime(info['filepath'], mtime, mtime) return [], info diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 3b75ab6b3..fc9eb253b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4673,87 +4673,56 @@ def decode_png(png_data): def write_xattr(path, key, value): - # This mess below finds the best xattr tool for the job - try: - # try the pyxattr module... - import xattr - - if hasattr(xattr, 'set'): # pyxattr - # Unicode arguments are not supported in python-pyxattr until - # version 0.5.0 - # See https://github.com/ytdl-org/youtube-dl/issues/5498 - pyxattr_required_version = '0.5.0' - if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): - # TODO: fallback to CLI tools - raise XAttrUnavailableError( - 'python-pyxattr is detected but is too old. ' - 'yt-dlp requires %s or above while your version is %s. ' - 'Falling back to other xattr implementations' % ( - pyxattr_required_version, xattr.__version__)) - - setxattr = xattr.set - else: # xattr - setxattr = xattr.setxattr + # Windows: Write xattrs to NTFS Alternate Data Streams: + # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 + if compat_os_name == 'nt': + assert ':' not in key + assert os.path.exists(path) try: - setxattr(path, key, value) + with open(f'{path}:{key}', 'wb') as f: + f.write(value) except OSError as e: raise XAttrMetadataError(e.errno, e.strerror) + return - except ImportError: - if compat_os_name == 'nt': - # Write xattrs to NTFS Alternate Data Streams: - # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 - assert ':' not in key - assert os.path.exists(path) - - ads_fn = path + ':' + key - try: - with open(ads_fn, 'wb') as f: - f.write(value) - except OSError as e: - raise XAttrMetadataError(e.errno, e.strerror) - else: - user_has_setfattr = check_executable('setfattr', ['--version']) - user_has_xattr = check_executable('xattr', ['-h']) - - if user_has_setfattr or user_has_xattr: + # UNIX Method 1. Use xattrs/pyxattrs modules + from .dependencies import xattr - value = value.decode('utf-8') - if user_has_setfattr: - executable = 'setfattr' - opts = ['-n', key, '-v', value] - elif user_has_xattr: - executable = 'xattr' - opts = ['-w', key, value] + setxattr = None + if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr': + # Unicode arguments are not supported in pyxattr until version 0.5.0 + # See https://github.com/ytdl-org/youtube-dl/issues/5498 + if version_tuple(xattr.__version__) >= (0, 5, 0): + setxattr = xattr.set + elif xattr: + setxattr = xattr.setxattr - cmd = ([encodeFilename(executable, True)] - + [encodeArgument(o) for o in opts] - + [encodeFilename(path, True)]) + if setxattr: + try: + setxattr(path, key, value) + except OSError as e: + raise XAttrMetadataError(e.errno, e.strerror) + return - try: - p = Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - except OSError as e: - raise XAttrMetadataError(e.errno, e.strerror) - stdout, stderr = p.communicate_or_kill() - stderr = stderr.decode('utf-8', 'replace') - if p.returncode != 0: - raise XAttrMetadataError(p.returncode, stderr) + # UNIX Method 2. Use setfattr/xattr executables + exe = ('setfattr' if check_executable('setfattr', ['--version']) + else 'xattr' if check_executable('xattr', ['-h']) else None) + if not exe: + raise XAttrUnavailableError( + 'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the ' + + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)')) - else: - # On Unix, and can't find pyxattr, setfattr, or xattr. - if sys.platform.startswith('linux'): - raise XAttrUnavailableError( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'pyxattr' or 'xattr' " - "modules, or the GNU 'attr' package " - "(which contains the 'setfattr' tool).") - else: - raise XAttrUnavailableError( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'xattr' module, " - "or the 'xattr' binary.") + value = value.decode('utf-8') + try: + p = Popen( + [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + except OSError as e: + raise XAttrMetadataError(e.errno, e.strerror) + stderr = p.communicate_or_kill()[1].decode('utf-8', 'replace') + if p.returncode: + raise XAttrMetadataError(p.returncode, stderr) def random_birthday(year_field, month_field, day_field): From 3fe75fdc803d50820ddf643dc5184c01162451c4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 29 Apr 2022 21:32:31 +0530 Subject: [PATCH 1005/2552] [cleanup] Misc fixes (see desc) * Do not warn when fixup is skipped for existing file * [fragment] Fix `--skip-unavailable-fragments` for HTTP Errors * [utils] write_string: Fix bug in 59f943cd5097e9bdbc3cb3e6b5675e43d369341a * [utils] parse_codecs: Subtitle codec is generally referred to as `scodec`. https://github.com/yt-dlp/yt-dlp/pull/2174#discussion_r790156048 * [docs] Remove note about permissions. Closes #3597 --- README.md | 4 +--- yt_dlp/YoutubeDL.py | 6 +++--- yt_dlp/downloader/fragment.py | 2 +- yt_dlp/extractor/common.py | 2 +- yt_dlp/options.py | 2 +- yt_dlp/utils.py | 12 ++++++------ 6 files changed, 13 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index dc1fad5b3..ed87a3273 100644 --- a/README.md +++ b/README.md @@ -320,9 +320,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi ## General Options: -h, --help Print this help text and exit --version Print program version and exit - -U, --update Update this program to latest version. Make - sure that you have sufficient permissions - (run with sudo if needed) + -U, --update Update this program to latest version -i, --ignore-errors Ignore download and postprocessing errors. The download will be considered successful even if the postprocessing fails diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index cc36e2c9c..50342c2ca 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3151,16 +3151,16 @@ class YoutubeDL: if fixup_policy in ('ignore', 'never'): return elif fixup_policy == 'warn': - do_fixup = False + do_fixup = 'warn' elif fixup_policy != 'force': assert fixup_policy in ('detect_or_warn', None) if not info_dict.get('__real_download'): do_fixup = False def ffmpeg_fixup(cndn, msg, cls): - if not cndn: + if not (do_fixup and cndn): return - if not do_fixup: + elif do_fixup == 'warn': self.report_warning(f'{vid}: {msg}') return pp = cls(self) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 451e3cc2f..4655f067f 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -123,7 +123,7 @@ class FragmentFD(FileDownloader): 'request_data': request_data, 'ctx_id': ctx.get('ctx_id'), } - success = ctx['dl'].download(fragment_filename, fragment_info_dict) + success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: return False if fragment_info_dict.get('filetime'): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 441d8a136..97cd524bc 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2808,7 +2808,7 @@ class InfoExtractor: content_type = 'video' elif codecs['acodec'] != 'none': content_type = 'audio' - elif codecs.get('tcodec', 'none') != 'none': + elif codecs.get('scodec', 'none') != 'none': content_type = 'text' elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): content_type = 'text' diff --git a/yt_dlp/options.py b/yt_dlp/options.py index c03f69319..944147871 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -236,7 +236,7 @@ def create_parser(): general.add_option( '-U', '--update', action='store_true', dest='update_self', - help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') + help='Update this program to latest version') general.add_option( '-i', '--ignore-errors', action='store_true', dest='ignoreerrors', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index fc9eb253b..0b28b0926 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1859,7 +1859,7 @@ def write_string(s, out=None, encoding=None): from .compat import WINDOWS_VT_MODE # Must be imported locally if WINDOWS_VT_MODE: - s = s.replace('\n', ' \n') + s = re.sub(r'([\r\n]+)', r' \1', s) if 'b' in getattr(out, 'mode', ''): byt = s.encode(encoding or preferredencoding(), 'ignore') @@ -3177,7 +3177,7 @@ def parse_codecs(codecs_str): return {} split_codecs = list(filter(None, map( str.strip, codecs_str.strip().strip(',').split(',')))) - vcodec, acodec, tcodec, hdr = None, None, None, None + vcodec, acodec, scodec, hdr = None, None, None, None for full_codec in split_codecs: parts = full_codec.split('.') codec = parts[0].replace('0', '') @@ -3195,16 +3195,16 @@ def parse_codecs(codecs_str): if not acodec: acodec = full_codec elif codec in ('stpp', 'wvtt',): - if not tcodec: - tcodec = full_codec + if not scodec: + scodec = full_codec else: write_string(f'WARNING: Unknown codec {full_codec}\n') - if vcodec or acodec or tcodec: + if vcodec or acodec or scodec: return { 'vcodec': vcodec or 'none', 'acodec': acodec or 'none', 'dynamic_range': hdr, - **({'tcodec': tcodec} if tcodec is not None else {}), + **({'scodec': scodec} if scodec is not None else {}), } elif len(split_codecs) == 2: return { From 6e634cbe4236591661f3a7f13b62994fff13c73c Mon Sep 17 00:00:00 2001 From: coletdev Date: Sun, 1 May 2022 18:46:28 +1200 Subject: [PATCH 1006/2552] [youtube] Add YoutubeStoriesIE (#3362) Get channel stories with `ytstories:` Authored-by: coletdjnz --- README.md | 1 + yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/youtube.py | 95 ++++++++++++++++++++++++++++------ 3 files changed, 82 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index ed87a3273..448b5c884 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)) * Download livestreams from the start using `--live-from-start` (experimental) + * Support for downloading stories (`ytstories:`) * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]` diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 070d5cc65..57bb6ef48 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -2115,6 +2115,7 @@ from .youtube import ( YoutubeSearchURLIE, YoutubeMusicSearchURLIE, YoutubeSubscriptionsIE, + YoutubeStoriesIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeYtBeIE, diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 037d1d967..4178a2f14 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1,3 +1,4 @@ +import base64 import calendar import copy import datetime @@ -2199,7 +2200,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:2ef1d002cad520f65825346e2084e49d', }, 'params': {'skip_download': True} - }, + }, { + # Story. Requires specific player params to work. + # Note: stories get removed after some period of time + 'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA', + 'info_dict': { + 'id': 'yN3x1t3sieA', + 'ext': 'mp4', + 'uploader': 'Linus Tech Tips', + 'duration': 13, + 'channel': 'Linus Tech Tips', + 'playable_in_embed': True, + 'tags': [], + 'age_limit': 0, + 'uploader_url': 'http://www.youtube.com/user/LinusTechTips', + 'upload_date': '20220402', + 'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp', + 'title': 'Story', + 'live_status': 'not_live', + 'uploader_id': 'LinusTechTips', + 'view_count': int, + 'description': '', + 'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw', + 'categories': ['Science & Technology'], + 'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw', + 'availability': 'unlisted', + } + } ] @classmethod @@ -2831,12 +2858,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4) continuation = self._extract_continuation(root_continuation_data) - message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) - if message and not parent: - self.report_warning(message, video_id=video_id) response = None + is_forced_continuation = False is_first_continuation = parent is None + if is_first_continuation and not continuation: + # Sometimes you can get comments by generating the continuation yourself, + # even if YouTube initially reports them being disabled - e.g. stories comments. + # Note: if the comment section is actually disabled, YouTube may return a response with + # required check_get_keys missing. So we will disable that check initially in this case. + continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id)) + is_forced_continuation = True for page_num in itertools.count(0): if not continuation: @@ -2857,8 +2889,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): response = self._extract_response( item_id=None, query=continuation, ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix, - check_get_keys='onResponseReceivedEndpoints') - + check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None) + is_forced_continuation = False continuation_contents = traverse_obj( response, 'onResponseReceivedEndpoints', expected_type=list, default=[]) @@ -2883,6 +2915,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if continuation: break + message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) + if message and not parent and tracker['running_total'] == 0: + self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True) + + @staticmethod + def _generate_comment_continuation(video_id): + """ + Generates initial comment section continuation token from given video id + """ + token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section' + return base64.b64encode(token.encode()).decode() + def _get_comments(self, ytcfg, video_id, contents, webpage): """Entry for comment extraction""" def _real_comment_extract(contents): @@ -2936,7 +2980,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): headers = self.generate_api_headers( ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client) - yt_query = {'videoId': video_id} + yt_query = { + 'videoId': video_id, + 'params': '8AEB' # enable stories + } yt_query.update(self._generate_player_context(sts)) return self._extract_response( item_id=video_id, ep='player', query=yt_query, @@ -3251,7 +3298,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage = None if 'webpage' not in self._configuration_arg('player_skip'): webpage = self._download_webpage( - webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) + webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() @@ -3696,7 +3743,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): unified_strdate(get_first(microformats, 'uploadDate')) or unified_strdate(search_meta('uploadDate'))) if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'): - upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') + upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date info['upload_date'] = upload_date for to, frm in fallbacks.items(): @@ -4211,7 +4258,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): self._extract_visitor_data(data, ytcfg)), **metadata) - def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg): + def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg): first_id = last_id = response = None for page_num in itertools.count(1): videos = list(self._playlist_entries(playlist)) @@ -4221,9 +4268,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if start >= len(videos): return for video in videos[start:]: - if video['id'] == first_id: - self.to_screen('First video %s found again; Assuming end of Mix' % first_id) - return yield video first_id = first_id or videos[0]['id'] last_id = videos[-1]['id'] @@ -4255,13 +4299,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): playlist_url = urljoin(url, try_get( playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], compat_str)) - if playlist_url and playlist_url != url: + + # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1] + # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg + is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id) + + if playlist_url and playlist_url != url and not is_known_unviewable: return self.url_result( playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title) return self.playlist_result( - self._extract_mix_playlist(playlist, playlist_id, data, ytcfg), + self._extract_inline_playlist(playlist, playlist_id, data, ytcfg), playlist_id=playlist_id, playlist_title=title) def _extract_availability(self, data): @@ -5798,6 +5847,22 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): }] +class YoutubeStoriesIE(InfoExtractor): + IE_DESC = 'YouTube channel stories; "ytstories:" prefix' + IE_NAME = 'youtube:stories' + _VALID_URL = r'ytstories:UC(?P[A-Za-z0-9_-]{21}[AQgw])$' + _TESTS = [{ + 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = f'RLTD{self._match_id(url)}' + return self.url_result( + f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', + ie=YoutubeTabIE, video_id=playlist_id) + + class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list From 131e14dc6650feea26ec814e6964e9d3e94ac881 Mon Sep 17 00:00:00 2001 From: Justin Keogh Date: Sun, 1 May 2022 20:31:06 +0000 Subject: [PATCH 1007/2552] [utils] `locked_file`: Ignore illegal seek on `truncate` (#3610) Closes #3557 Authored by: jakeogh --- yt_dlp/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 0b28b0926..e25a112d3 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2011,7 +2011,11 @@ class locked_file: self.f.close() raise if 'w' in self.mode: - self.f.truncate() + try: + self.f.truncate() + except OSError as e: + if e.errno != 29: # Illegal seek, expected when self.f is a FIFO + raise e return self def unlock(self): From 1a7cd9c4873edb24b5291da14b3105b8933d4316 Mon Sep 17 00:00:00 2001 From: Marwen Dallel <71770363+MarwenDallel@users.noreply.github.com> Date: Mon, 2 May 2022 01:59:48 +0100 Subject: [PATCH 1008/2552] [LCI] Fix extractor (#3534) Authored by: MarwenDallel --- yt_dlp/extractor/lci.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py index 81cf88b6c..e7d2f8a24 100644 --- a/yt_dlp/extractor/lci.py +++ b/yt_dlp/extractor/lci.py @@ -2,22 +2,27 @@ from .common import InfoExtractor class LCIIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P\d+)\.html' - _TEST = { - 'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html', - 'md5': '2fdb2538b884d4d695f9bd2bde137e6c', + _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P\d+)\.html' + _TESTS = [{ + 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', 'info_dict': { - 'id': '13244802', + 'id': '13875948', 'ext': 'mp4', - 'title': 'Hillary Clinton et sa quinte de toux, en plein meeting', - 'description': 'md5:a4363e3a960860132f8124b62f4a01c9', - } - } + 'title': 'md5:660df5481fd418bc3bbb0d070e6fdb5a', + 'thumbnail': 'https://photos.tf1.fr/1280/720/presidentielle-2022-marine-le-pen-et-emmanuel-macron-invites-de-lci-ce-vendredi-9c0e73-e1a036-0@1x.jpg', + 'upload_date': '20220422', + 'duration': 33, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.lci.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._search_regex( - (r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'), - webpage, 'wat id') + wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id') return self.url_result('wat:' + wat_id, 'Wat', wat_id) From e4fa34a13e9f94f27f0fccae6bcadc8dd1ea1415 Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 21 Apr 2022 18:22:03 +0200 Subject: [PATCH 1009/2552] [hls] Fix unapplied byte_range for EXT-X-MAP fragment Cherry-picked from #3302 Authored by: fstirlitz --- yt_dlp/downloader/hls.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 694c843f3..f65f91f4f 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -191,6 +191,14 @@ class HlsFD(FragmentFD): if extra_query: frag_url = update_url_query(frag_url, extra_query) + if map_info.get('BYTERANGE'): + splitted_byte_range = map_info.get('BYTERANGE').split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + fragments.append({ 'frag_index': frag_index, 'url': frag_url, @@ -200,14 +208,6 @@ class HlsFD(FragmentFD): }) media_sequence += 1 - if map_info.get('BYTERANGE'): - splitted_byte_range = map_info.get('BYTERANGE').split('@') - sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] - byte_range = { - 'start': sub_range_start, - 'end': sub_range_start + int(splitted_byte_range[0]), - } - elif line.startswith('#EXT-X-KEY'): decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) From b4f536626aa0e9279869b0ed3506fcf5ab7ed6d2 Mon Sep 17 00:00:00 2001 From: HE7086 Date: Mon, 2 May 2022 04:09:11 +0200 Subject: [PATCH 1010/2552] [BilibiliLive] Add extractor (#3406) Authored by: HE7086, pukkandan --- yt_dlp/extractor/bilibili.py | 86 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 87 insertions(+) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index eb2dcb024..ead0dd88b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -18,6 +18,7 @@ from ..utils import ( float_or_none, mimetype2ext, parse_iso8601, + qualities, traverse_obj, parse_count, smuggle_url, @@ -996,3 +997,88 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'), categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none), thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view'))) + + +class BiliLiveIE(InfoExtractor): + _VALID_URL = r'https?://live.bilibili.com/(?P\d+)' + + _TESTS = [{ + 'url': 'https://live.bilibili.com/196', + 'info_dict': { + 'id': '33989', + 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)", + 'ext': 'flv', + 'title': "太空狼人杀联动,不被爆杀就算赢", + 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg", + 'timestamp': 1650802769, + }, + 'skip': 'not live' + }, { + 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', + 'only_matching': True + }] + + _FORMATS = { + 80: {'format_id': 'low', 'format_note': '流畅'}, + 150: {'format_id': 'high_res', 'format_note': '高清'}, + 250: {'format_id': 'ultra_high_res', 'format_note': '超清'}, + 400: {'format_id': 'blue_ray', 'format_note': '蓝光'}, + 10000: {'format_id': 'source', 'format_note': '原画'}, + 20000: {'format_id': '4K', 'format_note': '4K'}, + 30000: {'format_id': 'dolby', 'format_note': '杜比'}, + } + + _quality = staticmethod(qualities(list(_FORMATS))) + + def _call_api(self, path, room_id, query): + api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query) + if api_result.get('code') != 0: + raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata') + return api_result.get('data') or {} + + def _parse_formats(self, qn, fmt): + for codec in fmt.get('codec') or []: + if codec.get('current_qn') != qn: + continue + for url_info in codec['url_info']: + yield { + 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}', + 'ext': fmt.get('format_name'), + 'vcodec': codec.get('codec_name'), + 'quality': self._quality(qn), + **self._FORMATS[qn], + } + + def _real_extract(self, url): + room_id = self._match_id(url) + room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id}) + if room_data.get('live_status') == 0: + raise ExtractorError('Streamer is not live', expected=True) + + formats = [] + for qn in self._FORMATS.keys(): + stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, { + 'room_id': room_id, + 'qn': qn, + 'codec': '0,1', + 'format': '0,2', + 'mask': '0', + 'no_playurl': '0', + 'platform': 'web', + 'protocol': '0,1', + }) + for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []: + formats.extend(self._parse_formats(qn, fmt)) + self._sort_formats(formats) + + return { + 'id': room_id, + 'title': room_data.get('title'), + 'description': room_data.get('description'), + 'thumbnail': room_data.get('user_cover'), + 'timestamp': stream_data.get('live_time'), + 'formats': formats, + 'http_headers': { + 'Referer': url, + }, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 57bb6ef48..0523b99df 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -168,6 +168,7 @@ from .bilibili import ( BilibiliChannelIE, BiliIntlIE, BiliIntlSeriesIE, + BiliLiveIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( From afac4caa7db30804bebac33e53c3cb0237958224 Mon Sep 17 00:00:00 2001 From: coletdev Date: Mon, 2 May 2022 15:40:26 +1200 Subject: [PATCH 1011/2552] Fix redirect HTTP method handling (#3577) Authored by: coletdjnz --- yt_dlp/utils.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index e25a112d3..5c83b92b4 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1587,9 +1587,21 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): CONTENT_HEADERS = ("content-length", "content-type") # NB: don't use dict comprehension for python 2.6 compatibility newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS} + + # A 303 must either use GET or HEAD for subsequent request + # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 + if code == 303 and m != 'HEAD': + m = 'GET' + # 301 and 302 redirects are commonly turned into a GET from a POST + # for subsequent requests by browsers, so we'll do the same. + # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2 + # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3 + if code in (301, 302) and m == 'POST': + m = 'GET' + return compat_urllib_request.Request( newurl, headers=newheaders, origin_req_host=req.origin_req_host, - unverifiable=True) + unverifiable=True, method=m) def extract_timezone(date_str): From bb58c9ed5c3121bf55edcac9af8d62f5143b89d8 Mon Sep 17 00:00:00 2001 From: coletdev Date: Mon, 2 May 2022 19:59:45 +1200 Subject: [PATCH 1012/2552] Add support for SSL client certificate authentication (#3435) Adds `--client-certificate`, `--client-certificate-key`, `--client-certificate-password` Authored-by: coletdjnz Co-authored-by: df Co-authored-by: pukkandan --- README.md | 9 ++++ test/test_http.py | 44 +++++++++++++++++++ test/testdata/certificate/ca.crt | 10 +++++ test/testdata/certificate/ca.key | 5 +++ test/testdata/certificate/ca.srl | 1 + test/testdata/certificate/client.crt | 9 ++++ test/testdata/certificate/client.csr | 7 +++ test/testdata/certificate/client.key | 5 +++ test/testdata/certificate/clientencrypted.key | 8 ++++ .../certificate/clientwithencryptedkey.crt | 17 +++++++ test/testdata/certificate/clientwithkey.crt | 14 ++++++ test/testdata/certificate/instructions.md | 19 ++++++++ yt_dlp/YoutubeDL.py | 4 ++ yt_dlp/__init__.py | 3 ++ yt_dlp/options.py | 13 ++++++ yt_dlp/utils.py | 8 ++++ 16 files changed, 176 insertions(+) create mode 100644 test/testdata/certificate/ca.crt create mode 100644 test/testdata/certificate/ca.key create mode 100644 test/testdata/certificate/ca.srl create mode 100644 test/testdata/certificate/client.crt create mode 100644 test/testdata/certificate/client.csr create mode 100644 test/testdata/certificate/client.key create mode 100644 test/testdata/certificate/clientencrypted.key create mode 100644 test/testdata/certificate/clientwithencryptedkey.crt create mode 100644 test/testdata/certificate/clientwithkey.crt create mode 100644 test/testdata/certificate/instructions.md diff --git a/README.md b/README.md index 448b5c884..f8813cbb6 100644 --- a/README.md +++ b/README.md @@ -840,6 +840,15 @@ You can also fork the project on github and run your fork's [build workflow](.gi interactively --ap-list-mso List all supported multiple-system operators + --client-certificate CERTFILE Path to client certificate file in PEM + format. May include the private key + --client-certificate-key KEYFILE Path to private key file for client + certificate + --client-certificate-password PASSWORD + Password for client certificate private + key, if encrypted. If not provided and the + key is encrypted, yt-dlp will ask + interactively ## Post-Processing Options: -x, --extract-audio Convert video files to audio-only files diff --git a/test/test_http.py b/test/test_http.py index d99be8be4..fb8c9f4e9 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -85,6 +85,50 @@ class TestHTTPS(unittest.TestCase): self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) +class TestClientCert(unittest.TestCase): + def setUp(self): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate') + cacertfn = os.path.join(self.certdir, 'ca.crt') + self.httpd = compat_http_server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler) + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.verify_mode = ssl.CERT_REQUIRED + sslctx.load_verify_locations(cafile=cacertfn) + sslctx.load_cert_chain(certfn, None) + self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True) + self.port = http_server_port(self.httpd) + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + def _run_test(self, **params): + ydl = YoutubeDL({ + 'logger': FakeLogger(), + # Disable client-side validation of unacceptable self-signed testcert.pem + # The test is of a check on the server side, so unaffected + 'nocheckcertificate': True, + **params, + }) + r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) + self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) + + def test_certificate_combined_nopass(self): + self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt')) + + def test_certificate_nocombined_nopass(self): + self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'), + client_certificate_key=os.path.join(self.certdir, 'client.key')) + + def test_certificate_combined_pass(self): + self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'), + client_certificate_password='foobar') + + def test_certificate_nocombined_pass(self): + self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'), + client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'), + client_certificate_password='foobar') + + def _build_proxy_handler(name): class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): proxy_name = name diff --git a/test/testdata/certificate/ca.crt b/test/testdata/certificate/ca.crt new file mode 100644 index 000000000..ddf7be7ad --- /dev/null +++ b/test/testdata/certificate/ca.crt @@ -0,0 +1,10 @@ +-----BEGIN CERTIFICATE----- +MIIBfDCCASOgAwIBAgIUUgngoxFpuWft8gjj3uEFoqJyoJowCgYIKoZIzj0EAwIw +FDESMBAGA1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEwMVoXDTM4MTAxNTAz +MDEwMVowFDESMBAGA1UEAwwJeXRkbHB0ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D +AQcDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCHYxFU +KpcCfVt9aueRyUFi1TNkkkEZ9D6fbqNTMFEwHQYDVR0OBBYEFBdY2rVNLFGM6r1F +iuamNDaiq0QoMB8GA1UdIwQYMBaAFBdY2rVNLFGM6r1FiuamNDaiq0QoMA8GA1Ud +EwEB/wQFMAMBAf8wCgYIKoZIzj0EAwIDRwAwRAIgXJg2jio1kow2g/iP54Qq+iI2 +m4EAvZiY0Im/Ni3PHawCIC6KCl6QcHANbeq8ckOXNGusjl6OWhvEM3uPBPhqskq1 +-----END CERTIFICATE----- diff --git a/test/testdata/certificate/ca.key b/test/testdata/certificate/ca.key new file mode 100644 index 000000000..38920d571 --- /dev/null +++ b/test/testdata/certificate/ca.key @@ -0,0 +1,5 @@ +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIG2L1bHdl3PnaLiJ7Zm8aAGCj4GiVbSbXQcrJAdL+yqOoAoGCCqGSM49 +AwEHoUQDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCH +YxFUKpcCfVt9aueRyUFi1TNkkkEZ9D6fbg== +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/ca.srl b/test/testdata/certificate/ca.srl new file mode 100644 index 000000000..de2d1eab3 --- /dev/null +++ b/test/testdata/certificate/ca.srl @@ -0,0 +1 @@ +4A260C33C4D34612646E6321E1E767DF1A95EF0B diff --git a/test/testdata/certificate/client.crt b/test/testdata/certificate/client.crt new file mode 100644 index 000000000..874622fae --- /dev/null +++ b/test/testdata/certificate/client.crt @@ -0,0 +1,9 @@ +-----BEGIN CERTIFICATE----- +MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG +A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow +FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA +BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS +XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD +aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY +D0dB8M1kJw== +-----END CERTIFICATE----- diff --git a/test/testdata/certificate/client.csr b/test/testdata/certificate/client.csr new file mode 100644 index 000000000..2d5d7a5c1 --- /dev/null +++ b/test/testdata/certificate/client.csr @@ -0,0 +1,7 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIHQMHcCAQAwFTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqG +SM49AwEHA0IABKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq +3ZuZ7rubyuMSXNuH+2Cl9msSpJB2LhJs5kegADAKBggqhkjOPQQDAgNJADBGAiEA +1LZ72mtPmVxhGtdMvpZ0fyA68H2RC5IMHpLq18T55UcCIQDKpkXXVTvAzS0JioCq +6kiYq8Oxx6ZMoI+11k75/Kip1g== +-----END CERTIFICATE REQUEST----- diff --git a/test/testdata/certificate/client.key b/test/testdata/certificate/client.key new file mode 100644 index 000000000..e47389b51 --- /dev/null +++ b/test/testdata/certificate/client.key @@ -0,0 +1,5 @@ +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 +AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird +m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/clientencrypted.key b/test/testdata/certificate/clientencrypted.key new file mode 100644 index 000000000..0baee37e9 --- /dev/null +++ b/test/testdata/certificate/clientencrypted.key @@ -0,0 +1,8 @@ +-----BEGIN EC PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 + +96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS +rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn +IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/clientwithencryptedkey.crt b/test/testdata/certificate/clientwithencryptedkey.crt new file mode 100644 index 000000000..f357e4c95 --- /dev/null +++ b/test/testdata/certificate/clientwithencryptedkey.crt @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE----- +MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG +A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow +FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA +BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS +XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD +aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY +D0dB8M1kJw== +-----END CERTIFICATE----- +-----BEGIN EC PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 + +96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS +rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn +IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/clientwithkey.crt b/test/testdata/certificate/clientwithkey.crt new file mode 100644 index 000000000..942f6e2a4 --- /dev/null +++ b/test/testdata/certificate/clientwithkey.crt @@ -0,0 +1,14 @@ +-----BEGIN CERTIFICATE----- +MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG +A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow +FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA +BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS +XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD +aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY +D0dB8M1kJw== +-----END CERTIFICATE----- +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 +AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird +m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/instructions.md b/test/testdata/certificate/instructions.md new file mode 100644 index 000000000..b0e3fbd48 --- /dev/null +++ b/test/testdata/certificate/instructions.md @@ -0,0 +1,19 @@ +# Generate certificates for client cert tests + +## CA +```sh +openssl ecparam -name prime256v1 -genkey -noout -out ca.key +openssl req -new -x509 -sha256 -days 6027 -key ca.key -out ca.crt -subj "/CN=ytdlptest" +``` + +## Client +```sh +openssl ecparam -name prime256v1 -genkey -noout -out client.key +openssl ec -in client.key -out clientencrypted.key -passout pass:foobar -aes256 +openssl req -new -sha256 -key client.key -out client.csr -subj "/CN=ytdlptest2" +openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 6027 -sha256 +cp client.crt clientwithkey.crt +cp client.crt clientwithencryptedkey.crt +cat client.key >> clientwithkey.crt +cat clientencrypted.key >> clientwithencryptedkey.crt +``` \ No newline at end of file diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 50342c2ca..1766ff379 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -319,6 +319,10 @@ class YoutubeDL: legacyserverconnect: Explicitly allow HTTPS connection to servers that do not support RFC 5746 secure renegotiation nocheckcertificate: Do not verify SSL certificates + client_certificate: Path to client certificate file in PEM format. May include the private key + client_certificate_key: Path to private key file for client certificate + client_certificate_password: Password for client certificate private key, if encrypted. + If not provided and the key is encrypted, yt-dlp will ask interactively prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. http_headers: A dictionary of custom headers to be used for all requests diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index dc2f905c7..2e9da4c98 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -641,6 +641,9 @@ def parse_options(argv=None): 'ap_mso': opts.ap_mso, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, + 'client_certificate': opts.client_certificate, + 'client_certificate_key': opts.client_certificate_key, + 'client_certificate_password': opts.client_certificate_password, 'quiet': opts.quiet or any_getting or opts.print_json or bool(opts.forceprint), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 944147871..60f866570 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -571,6 +571,19 @@ def create_parser(): '--ap-list-mso', action='store_true', dest='ap_list_mso', default=False, help='List all supported multiple-system operators') + authentication.add_option( + '--client-certificate', + dest='client_certificate', metavar='CERTFILE', + help='Path to client certificate file in PEM format. May include the private key') + authentication.add_option( + '--client-certificate-key', + dest='client_certificate_key', metavar='KEYFILE', + help='Path to private key file for client certificate') + authentication.add_option( + '--client-certificate-password', + dest='client_certificate_password', metavar='PASSWORD', + help='Password for client certificate private key, if encrypted. ' + 'If not provided and the key is encrypted, yt-dlp will ask interactively') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 5c83b92b4..3f22eaf75 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -936,6 +936,14 @@ def make_HTTPS_handler(params, **kwargs): for storename in ('CA', 'ROOT'): _ssl_load_windows_store_certs(context, storename) context.set_default_verify_paths() + client_certfile = params.get('client_certificate') + if client_certfile: + try: + context.load_cert_chain( + client_certfile, keyfile=params.get('client_certificate_key'), + password=params.get('client_certificate_password')) + except ssl.SSLError: + raise YoutubeDLError('Unable to load client certificate') return YoutubeDLHTTPSHandler(params, context=context, **kwargs) From 6ef5ad9e29ab3648e87af32a2a1abc6063237c3f Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Mon, 2 May 2022 17:13:18 +0200 Subject: [PATCH 1013/2552] [trovo] Update to new API (#3509) Closes #3457 Authored by: nyuszika7h --- yt_dlp/extractor/trovo.py | 234 +++++++++++++++++++++++--------------- 1 file changed, 145 insertions(+), 89 deletions(-) diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 3487f3acc..c049025a3 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -1,5 +1,7 @@ import itertools import json +import random +import string from .common import InfoExtractor from ..utils import ( @@ -15,10 +17,20 @@ class TrovoBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/' _HEADERS = {'Origin': 'https://trovo.live'} - def _call_api(self, video_id, query=None, data=None): - return self._download_json( - 'https://gql.trovo.live/', video_id, query=query, data=data, - headers={'Accept': 'application/json'}) + def _call_api(self, video_id, data): + if 'persistedQuery' in data.get('extensions', {}): + url = 'https://gql.trovo.live' + else: + url = 'https://api-web.trovo.live/graphql' + + resp = self._download_json( + url, video_id, data=json.dumps([data]).encode(), headers={'Accept': 'application/json'}, + query={ + 'qid': ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)), + })[0] + if 'errors' in resp: + raise ExtractorError(f'Trovo said: {resp["errors"][0]["message"]}') + return resp['data'][data['operationName']] def _extract_streamer_info(self, data): streamer_info = data.get('streamerInfo') or {} @@ -35,27 +47,14 @@ class TrovoIE(TrovoBaseIE): def _real_extract(self, url): username = self._match_id(url) - live_info = self._call_api(username, query={ - 'query': '''{ - getLiveInfo(params: {userName: "%s"}) { - isLive - programInfo { - coverUrl - id - streamInfo { - desc - playUrl - } - title - } - streamerInfo { - nickName - uid - userName - } - } -}''' % username, - })['data']['getLiveInfo'] + live_info = self._call_api(username, data={ + 'operationName': 'live_LiveReaderService_GetLiveInfo', + 'variables': { + 'params': { + 'userName': username, + }, + }, + }) if live_info.get('isLive') == 0: raise ExtractorError('%s is offline' % username, expected=True) program_info = live_info['programInfo'] @@ -90,56 +89,61 @@ class TrovoIE(TrovoBaseIE): class TrovoVodIE(TrovoBaseIE): _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P[^/?&#]+)' _TESTS = [{ - 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', + 'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos', + 'params': {'getcomments': True}, 'info_dict': { - 'id': 'ltv-100095501_100095501_1609596043', + 'id': 'lc-5285890818705062210', 'ext': 'mp4', - 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!', - 'uploader': 'Exsl', - 'timestamp': 1609640305, - 'upload_date': '20210103', - 'uploader_id': '100095501', - 'duration': 43977, + 'title': 'fatal moaning for a super good🤣🤣', + 'uploader': 'OneTappedYou', + 'timestamp': 1621628019, + 'upload_date': '20210521', + 'uploader_id': '100719456', + 'duration': 31, 'view_count': int, 'like_count': int, 'comment_count': int, - 'comments': 'mincount:8', - 'categories': ['Grand Theft Auto V'], + 'comments': 'mincount:1', + 'categories': ['Call of Duty: Mobile'], + 'uploader_url': 'https://trovo.live/OneTappedYou', + 'thumbnail': r're:^https?://.*\.jpg', }, - 'skip': '404' }, { - 'url': 'https://trovo.live/clip/lc-5285890810184026005', + 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', 'only_matching': True, }] def _real_extract(self, url): vid = self._match_id(url) - resp = self._call_api(vid, data=json.dumps([{ - 'query': '''{ - batchGetVodDetailInfo(params: {vids: ["%s"]}) { - VodDetailInfos - } -}''' % vid, - }, { - 'query': '''{ - getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) { - commentList { - author { - nickName - uid - } - commentID - content - createdAt - parentID - } - } -}''' % vid, - }]).encode()) - vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid] + + # NOTE: It is also possible to extract this info from the Nuxt data on the website, + # however that seems unreliable - sometimes it randomly doesn't return the data, + # at least when using a non-residential IP. + resp = self._call_api(vid, data={ + 'operationName': 'batchGetVodDetailInfo', + 'variables': { + 'params': { + 'vids': [vid], + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'ceae0355d66476e21a1dd8e8af9f68de95b4019da2cda8b177c9a2255dad31d0', + }, + }, + }) + vod_detail_info = resp['VodDetailInfos'][vid] vod_info = vod_detail_info['vodInfo'] title = vod_info['title'] + if try_get(vod_info, lambda x: x['playbackRights']['playbackRights'] != 'Normal'): + playback_rights_setting = vod_info['playbackRights']['playbackRightsSetting'] + if playback_rights_setting == 'SubscriberOnly': + raise ExtractorError('This video is only available for subscribers', expected=True) + else: + raise ExtractorError(f'This video is not available ({playback_rights_setting})', expected=True) + language = vod_info.get('languageName') formats = [] for play_info in (vod_info.get('playInfos') or []): @@ -163,23 +167,6 @@ class TrovoVodIE(TrovoBaseIE): category = vod_info.get('categoryName') get_count = lambda x: int_or_none(vod_info.get(x + 'Num')) - comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or [] - comments = [] - for comment in comment_list: - content = comment.get('content') - if not content: - continue - author = comment.get('author') or {} - parent = comment.get('parentID') - comments.append({ - 'author': author.get('nickName'), - 'author_id': str_or_none(author.get('uid')), - 'id': str_or_none(comment.get('commentID')), - 'text': content, - 'timestamp': int_or_none(comment.get('createdAt')), - 'parent': 'root' if parent == 0 else str_or_none(parent), - }) - info = { 'id': vid, 'title': title, @@ -190,12 +177,51 @@ class TrovoVodIE(TrovoBaseIE): 'view_count': get_count('watch'), 'like_count': get_count('like'), 'comment_count': get_count('comment'), - 'comments': comments, 'categories': [category] if category else None, + '__post_extractor': self.extract_comments(vid), } info.update(self._extract_streamer_info(vod_detail_info)) return info + def _get_comments(self, vid): + for page in itertools.count(1): + comments_json = self._call_api(vid, data={ + 'operationName': 'getCommentList', + 'variables': { + 'params': { + 'appInfo': { + 'postID': vid, + }, + 'preview': {}, + 'pageSize': 99, + 'page': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'be8e5f9522ddac7f7c604c0d284fd22481813263580849926c4c66fb767eed25', + }, + }, + }) + for comment in comments_json['commentList']: + content = comment.get('content') + if not content: + continue + author = comment.get('author') or {} + parent = comment.get('parentID') + yield { + 'author': author.get('nickName'), + 'author_id': str_or_none(author.get('uid')), + 'id': str_or_none(comment.get('commentID')), + 'text': content, + 'timestamp': int_or_none(comment.get('createdAt')), + 'parent': 'root' if parent == 0 else str_or_none(parent), + } + + if comments_json['lastPage']: + break + class TrovoChannelBaseIE(TrovoBaseIE): def _get_vod_json(self, page, uid): @@ -215,9 +241,15 @@ class TrovoChannelBaseIE(TrovoBaseIE): def _real_extract(self, url): id = self._match_id(url) - uid = str(self._call_api(id, query={ - 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id - })['data']['getLiveInfo']['streamerInfo']['uid']) + live_info = self._call_api(id, data={ + 'operationName': 'live_LiveReaderService_GetLiveInfo', + 'variables': { + 'params': { + 'userName': id, + }, + }, + }) + uid = str(live_info['streamerInfo']['uid']) return self.playlist_result(self._entries(uid), playlist_id=uid) @@ -233,13 +265,25 @@ class TrovoChannelVodIE(TrovoChannelBaseIE): }, }] - _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}' _TYPE = 'video' def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelLtvVideoInfos'] + return self._call_api(uid, data={ + 'operationName': 'getChannelLtvVideoInfos', + 'variables': { + 'params': { + 'channelID': int(uid), + 'pageSize': 99, + 'currPage': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': '78fe32792005eab7e922cafcdad9c56bed8bbc5f5df3c7cd24fcb84a744f5f78', + }, + }, + }) class TrovoChannelClipIE(TrovoChannelBaseIE): @@ -254,10 +298,22 @@ class TrovoChannelClipIE(TrovoChannelBaseIE): }, }] - _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}' _TYPE = 'clip' def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelClipVideoInfos'] + return self._call_api(uid, data={ + 'operationName': 'getChannelClipVideoInfos', + 'variables': { + 'params': { + 'channelID': int(uid), + 'pageSize': 99, + 'currPage': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'e7924bfe20059b5c75fc8ff9e7929f43635681a7bdf3befa01072ed22c8eff31', + }, + }, + }) From cbc6ee10da1c4a41273839fcd10f1d3ea34caea7 Mon Sep 17 00:00:00 2001 From: Bricio <216170+Bricio@users.noreply.github.com> Date: Mon, 2 May 2022 18:26:28 -0300 Subject: [PATCH 1014/2552] [Fifa] Add Extractor (#3414) Closes #3408 Authored by: Bricio --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/fifa.py | 108 +++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 yt_dlp/extractor/fifa.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0523b99df..1d4962bbe 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -501,6 +501,7 @@ from .fc2 import ( FC2LiveIE, ) from .fczenit import FczenitIE +from .fifa import FifaIE from .filmmodu import FilmmoduIE from .filmon import ( FilmOnIE, diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py new file mode 100644 index 000000000..92e81a4a9 --- /dev/null +++ b/yt_dlp/extractor/fifa.py @@ -0,0 +1,108 @@ +from .common import InfoExtractor + +from ..utils import ( + int_or_none, + traverse_obj, + unified_timestamp, +) + + +class FifaIE(InfoExtractor): + _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P\w{2})/watch/(?P\w+)/?' + _TESTS = [{ + 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y', + 'info_dict': { + 'id': '7on10qPcnyLajDDU3ntg6y', + 'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay', + 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', + 'ext': 'mp4', + 'categories': ['FIFA Tournaments', 'Replay'], + 'thumbnail': 'https://digitalhub.fifa.com/transform/fa6f0b3e-a2e9-4cf7-9f32-53c57bcb7360/2006_Final_ITA_FRA', + 'duration': 8164, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV', + 'info_dict': { + 'id': '1cg5r5Qt6Qt12ilkDgb1sV', + 'title': 'Brasil x Alemanha | Semifinais | Copa do Mundo FIFA Brasil 2014 | Compacto', + 'description': 'md5:ba4ffcc084802b062beffc3b4c4b19d6', + 'ext': 'mp4', + 'categories': ['FIFA Tournaments', 'Highlights'], + 'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB', + 'duration': 901, + 'release_timestamp': 1404777600, + 'release_date': '20140708', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp', + 'info_dict': { + 'id': '3C6gQH9C2DLwzNx7BMRQdp', + 'title': 'Le but de Josimar contre le Irlande du Nord | Buts classiques', + 'description': 'md5:16f9f789f09960bfe7220fe67af31f34', + 'ext': 'mp4', + 'categories': ['FIFA Tournaments', 'Goal'], + 'duration': 28, + 'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + video_id, locale = self._match_valid_url(url).group('id', 'locale') + webpage = self._download_webpage(url, video_id) + + preconnect_link = self._search_regex( + r']+rel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') + + json_data = self._download_json( + f'{preconnect_link}/video/GetVideoPlayerData/{video_id}', video_id, + 'Downloading Video Player Data', query={'includeIdents': True, 'locale': locale}) + + video_details = self._download_json( + f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) + + preplay_parameters = self._download_json( + f'{preconnect_link}/video/GetVerizonPreplayParameters', video_id, 'Downloading Preplay Parameters', query={ + 'entryId': video_id, + 'assetId': json_data['verizonAssetId'], + 'useExternalId': False, + 'requiresToken': json_data['requiresToken'], + 'adConfig': 'fifaplusvideo', + 'prerollAds': True, + 'adVideoId': json_data['externalVerizonAssetId'], + 'preIdentId': json_data['preIdentId'], + 'postIdentId': json_data['postIdentId'], + }) + + cid = f'{json_data["preIdentId"]},{json_data["verizonAssetId"]},{json_data["postIdentId"]}' + content_data = self._download_json( + f'https://content.uplynk.com/preplay/{cid}/multiple.json', video_id, 'Downloading Content Data', query={ + 'v': preplay_parameters['preplayAPIVersion'], + 'tc': preplay_parameters['tokenCheckAlgorithmVersion'], + 'rn': preplay_parameters['randomNumber'], + 'exp': preplay_parameters['tokenExpirationDate'], + 'ct': preplay_parameters['contentType'], + 'cid': cid, + 'mbtracks': preplay_parameters['tracksAssetNumber'], + 'ad': preplay_parameters['adConfiguration'], + 'ad.preroll': int(preplay_parameters['adPreroll']), + 'ad.cmsid': preplay_parameters['adCMSSourceId'], + 'ad.vid': preplay_parameters['adSourceVideoID'], + 'sig': preplay_parameters['signature'], + }) + + formats = self._extract_m3u8_formats( + content_data['playURL'], video_id, note='Downloading m3u8 Information') + + return { + 'id': video_id, + 'title': json_data.get('title'), + 'description': json_data.get('description'), + 'duration': int_or_none(json_data.get('duration')), + 'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')), + 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)), + 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')), + 'formats': formats, + } From 468f104ce7d8da25ba34a1cc860b57de09aea651 Mon Sep 17 00:00:00 2001 From: m4tu4g <71326926+m4tu4g@users.noreply.github.com> Date: Tue, 3 May 2022 03:06:37 +0530 Subject: [PATCH 1015/2552] [masters] Add extractor (#3358) Closes #3240 Authored by: m4tu4g --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/masters.py | 39 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 yt_dlp/extractor/masters.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1d4962bbe..a3da85a0f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -849,6 +849,7 @@ from .markiza import ( MarkizaPageIE, ) from .massengeschmacktv import MassengeschmackTVIE +from .masters import MastersIE from .matchtv import MatchTVIE from .mdr import MDRIE from .medaltv import MedalTVIE diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py new file mode 100644 index 000000000..d1ce07f10 --- /dev/null +++ b/yt_dlp/extractor/masters.py @@ -0,0 +1,39 @@ +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import ( + traverse_obj, + unified_strdate, +) + + +class MastersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?masters\.com/en_US/watch/(?P\d{4}-\d{2}-\d{2})/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.masters.com/en_US/watch/2022-04-07/16493755593805191/sungjae_im_thursday_interview_2022.html', + 'info_dict': { + 'id': '16493755593805191', + 'ext': 'mp4', + 'title': 'Sungjae Im: Thursday Interview 2022', + 'upload_date': '20220407', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + video_id, upload_date = self._match_valid_url(url).group('id', 'date') + content_resp = self._download_json( + f'https://www.masters.com/relatedcontent/rest/v2/masters_v1/en/content/masters_v1_{video_id}_en', + video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(traverse_obj(content_resp, ('media', 'm3u8')), video_id, 'mp4') + self._sort_formats(formats) + + thumbnails = [{'id': name, 'url': url} for name, url in traverse_obj(content_resp, ('images', 0), default={}).items()] + + return { + 'id': video_id, + 'title': content_resp.get('title'), + 'formats': formats, + 'subtitles': subtitles, + 'upload_date': unified_strdate(upload_date), + 'thumbnails': thumbnails, + } From 86925f63344267fca38fe67b3918990081aba0b4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 4 May 2022 19:07:34 +0530 Subject: [PATCH 1016/2552] [Fifa] Sort formats Closes #3632 --- yt_dlp/extractor/fifa.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index 92e81a4a9..bdc8d7fbf 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -8,7 +8,7 @@ from ..utils import ( class FifaIE(InfoExtractor): - _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P\w{2})/watch/(?P\w+)/?' + _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P\w{2})/watch/([^#?]+/)?(?P\w+)' _TESTS = [{ 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y', 'info_dict': { @@ -93,8 +93,8 @@ class FifaIE(InfoExtractor): 'sig': preplay_parameters['signature'], }) - formats = self._extract_m3u8_formats( - content_data['playURL'], video_id, note='Downloading m3u8 Information') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) + self._sort_formats(formats) return { 'id': video_id, @@ -105,4 +105,5 @@ class FifaIE(InfoExtractor): 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)), 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')), 'formats': formats, + 'subtitles': subtitles, } From f963b7ab189790ae516a04579d301f1cd79cf26f Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Wed, 4 May 2022 21:13:52 +0700 Subject: [PATCH 1017/2552] [Likee] Add extractor (#3625) Closes #3603 Authored by: hatienl0i261299 --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/likee.py | 193 +++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 yt_dlp/extractor/likee.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a3da85a0f..c29a78deb 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -788,6 +788,10 @@ from .lifenews import ( LifeNewsIE, LifeEmbedIE, ) +from .likee import ( + LikeeIE, + LikeeUserIE +) from .limelight import ( LimelightMediaIE, LimelightChannelIE, diff --git a/yt_dlp/extractor/likee.py b/yt_dlp/extractor/likee.py new file mode 100644 index 000000000..b53e7a5ca --- /dev/null +++ b/yt_dlp/extractor/likee.py @@ -0,0 +1,193 @@ +import json + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + parse_iso8601, + str_or_none, + traverse_obj, +) + + +class LikeeIE(InfoExtractor): + IE_NAME = 'likee' + _VALID_URL = r'(?x)https?://(www\.)?likee\.video/(?:(?P[^/]+)/video/|v/)(?P\w+)' + _TESTS = [{ + 'url': 'https://likee.video/@huynh_hong_quan_/video/7093444807096327263', + 'info_dict': { + 'id': '7093444807096327263', + 'ext': 'mp4', + 'title': '🤴🤴🤴', + 'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4', + 'thumbnail': r're:^https?://.+\.jpg', + 'uploader': 'Huỳnh Hồng Quân ', + 'play_count': int, + 'download_count': int, + 'artist': 'Huỳnh Hồng Quân ', + 'timestamp': 1651571320, + 'upload_date': '20220503', + 'view_count': int, + 'uploader_id': 'huynh_hong_quan_', + 'duration': 12374, + 'comment_count': int, + 'like_count': int, + }, + }, { + 'url': 'https://likee.video/@649222262/video/7093167848050058862', + 'info_dict': { + 'id': '7093167848050058862', + 'ext': 'mp4', + 'title': 'likee video #7093167848050058862', + 'description': 'md5:3f971c8c6ee8a216f2b1a9094c5de99f', + 'thumbnail': r're:^https?://.+\.jpg', + 'comment_count': int, + 'like_count': int, + 'uploader': 'Vương Phước Nhi', + 'download_count': int, + 'timestamp': 1651506835, + 'upload_date': '20220502', + 'duration': 60024, + 'play_count': int, + 'artist': 'Vương Phước Nhi', + 'uploader_id': '649222262', + 'view_count': int, + }, + }, { + 'url': 'https://likee.video/@fernanda_rivasg/video/6932224568407629502', + 'info_dict': { + 'id': '6932224568407629502', + 'ext': 'mp4', + 'title': 'Un trend viejito🔥 #LIKEE #Ferlovers #trend ', + 'description': 'md5:c42b903a72a99d6d8b73e3d1126fbcef', + 'thumbnail': r're:^https?://.+\.jpg', + 'comment_count': int, + 'duration': 9684, + 'uploader_id': 'fernanda_rivasg', + 'view_count': int, + 'play_count': int, + 'artist': 'La Cami La✨', + 'download_count': int, + 'like_count': int, + 'uploader': 'Fernanda Rivas🎶', + 'timestamp': 1614034308, + 'upload_date': '20210222', + }, + }, { + 'url': 'https://likee.video/v/k6QcOp', + 'info_dict': { + 'id': 'k6QcOp', + 'ext': 'mp4', + 'title': '#AguaChallenge tú ya lo intentaste?😱🤩', + 'description': 'md5:b0cc462689d4ff2b624daa4dba7640d9', + 'thumbnail': r're:^https?://.+\.jpg', + 'comment_count': int, + 'duration': 18014, + 'play_count': int, + 'view_count': int, + 'timestamp': 1611694774, + 'like_count': int, + 'uploader': 'Fernanda Rivas🎶', + 'uploader_id': 'fernanda_rivasg', + 'download_count': int, + 'artist': 'ʟᴇʀɪᴋ_ᴜɴɪᴄᴏʀɴ♡︎', + 'upload_date': '20210126', + }, + }, { + 'url': 'https://www.likee.video/@649222262/video/7093167848050058862', + 'only_matching': True, + }, { + 'url': 'https://www.likee.video/v/k6QcOp', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + info = self._parse_json( + self._search_regex(r'window\.data\s=\s({.+?});', webpage, 'video info'), + video_id, transform_source=js_to_json) + video_url = traverse_obj(info, 'video_url', ('originVideoInfo', 'video_url')) + if not video_url: + self.raise_no_formats('Video was deleted', expected=True) + formats = [{ + 'format_id': 'mp4-with-watermark', + 'url': video_url, + 'height': info.get('video_height'), + 'width': info.get('video_width'), + }, { + 'format_id': 'mp4-without-watermark', + 'url': video_url.replace('_4', ''), + 'height': info.get('video_height'), + 'width': info.get('video_width'), + 'quality': 1, + }] + self._sort_formats(formats) + return { + 'id': video_id, + 'title': info.get('msgText'), + 'description': info.get('share_desc'), + 'view_count': int_or_none(info.get('video_count')), + 'like_count': int_or_none(info.get('likeCount')), + 'play_count': int_or_none(info.get('play_count')), + 'download_count': int_or_none(info.get('download_count')), + 'comment_count': int_or_none(info.get('comment_count')), + 'uploader': str_or_none(info.get('nick_name')), + 'uploader_id': str_or_none(info.get('likeeId')), + 'artist': str_or_none(traverse_obj(info, ('sound', 'owner_name'))), + 'timestamp': parse_iso8601(info.get('uploadDate')), + 'thumbnail': info.get('coverUrl'), + 'duration': int_or_none(traverse_obj(info, ('option_data', 'dur'))), + 'formats': formats, + } + + +class LikeeUserIE(InfoExtractor): + IE_NAME = 'likee:user' + _VALID_URL = r'https?://(www\.)?likee\.video/(?P[^/]+)/?$' + _TESTS = [{ + 'url': 'https://likee.video/@fernanda_rivasg', + 'info_dict': { + 'id': '925638334', + 'title': 'fernanda_rivasg', + }, + 'playlist_mincount': 500, + }, { + 'url': 'https://likee.video/@may_hmoob', + 'info_dict': { + 'id': '2943949041', + 'title': 'may_hmoob', + }, + 'playlist_mincount': 80, + }] + _PAGE_SIZE = 50 + _API_GET_USER_VIDEO = 'https://api.like-video.com/likee-activity-flow-micro/videoApi/getUserVideo' + + def _entries(self, user_name, user_id): + last_post_id = '' + while True: + user_videos = self._download_json( + self._API_GET_USER_VIDEO, user_name, + data=json.dumps({ + 'uid': user_id, + 'count': self._PAGE_SIZE, + 'lastPostId': last_post_id, + 'tabType': 0, + }).encode('utf-8'), + headers={'content-type': 'application/json'}, + note=f'Get user info with lastPostId #{last_post_id}') + items = traverse_obj(user_videos, ('data', 'videoList')) + if not items: + break + for item in items: + last_post_id = item['postId'] + yield self.url_result(f'https://likee.video/{user_name}/video/{last_post_id}') + + def _real_extract(self, url): + user_name = self._match_id(url) + webpage = self._download_webpage(url, user_name) + info = self._parse_json( + self._search_regex(r'window\.data\s*=\s*({.+?});', webpage, 'user info'), + user_name, transform_source=js_to_json) + user_id = traverse_obj(info, ('userinfo', 'uid')) + return self.playlist_result(self._entries(user_name, user_id), user_id, traverse_obj(info, ('userinfo', 'user_name'))) From b58f8d8f2c6389ad07fa31a81a6489cae7d205c9 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Wed, 4 May 2022 23:16:56 +0900 Subject: [PATCH 1018/2552] [TVer] Improve extraction (#3634) Authored by: Lesmiscore --- yt_dlp/extractor/tver.py | 43 +++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index 19236f8e8..b04575bd5 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -1,8 +1,10 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, + join_nonempty, smuggle_url, str_or_none, + strip_or_none, traverse_obj, ) @@ -11,19 +13,16 @@ class TVerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?Plp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P[a-zA-Z0-9]+)' _TESTS = [{ 'skip': 'videos are only available for 7 days', - 'url': 'https://tver.jp/episodes/ephss8yveb', + 'url': 'https://tver.jp/episodes/ep83nf3w4p', 'info_dict': { - 'title': '#44 料理と値段と店主にびっくり オモてなしすぎウマい店 2時間SP', - 'description': 'md5:66985373a66fed8ad3cd595a3cfebb13', - }, - 'add_ie': ['BrightcoveNew'], - }, { - 'skip': 'videos are only available for 7 days', - 'url': 'https://tver.jp/lp/episodes/ep6f16g26p', - 'info_dict': { - # sorry but this is "correct" - 'title': '4月11日(月)23時06分 ~ 放送予定', - 'description': 'md5:4029cc5f4b1e8090dfc5b7bd2bc5cd0b', + 'title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', + 'description': 'md5:dc2c06b6acc23f1e7c730c513737719b', + 'series': '家事ヤロウ!!!', + 'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', + 'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!', + 'channel': 'テレビ朝日', + 'onair_label': '5月3日(火)放送分', + 'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着! テレビ朝日 5月3日(火)放送分', }, 'add_ie': ['BrightcoveNew'], }, { @@ -78,14 +77,26 @@ class TVerIE(InfoExtractor): 'x-tver-platform-type': 'web' }) + additional_content_info = traverse_obj( + additional_info, ('result', 'episode', 'content'), get_all=False) or {} + episode = strip_or_none(additional_content_info.get('title')) + series = str_or_none(additional_content_info.get('seriesTitle')) + title = ( + join_nonempty(series, episode, delim=' ') + or str_or_none(video_info.get('title'))) + provider = str_or_none(additional_content_info.get('productionProviderName')) + onair_label = str_or_none(additional_content_info.get('broadcastDateLabel')) + return { '_type': 'url_transparent', - 'title': str_or_none(video_info.get('title')), + 'title': title, + 'series': series, + 'episode': episode, + # an another title which is considered "full title" for some viewers + 'alt_title': join_nonempty(title, provider, onair_label, delim=' '), + 'channel': provider, 'description': str_or_none(video_info.get('description')), 'url': smuggle_url( self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), - 'series': traverse_obj( - additional_info, ('result', ('episode', 'series'), 'content', ('seriesTitle', 'title')), - get_all=False), 'ie_key': 'BrightcoveNew', } From 4f7a98c565873ea7a758efcd86e4296b6a06e817 Mon Sep 17 00:00:00 2001 From: rand-net <34341872+rand-net@users.noreply.github.com> Date: Wed, 4 May 2022 14:26:45 +0000 Subject: [PATCH 1019/2552] [KhanAcademy] Fix extractor (#3462) Authored by: rand-net --- yt_dlp/extractor/khanacademy.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/khanacademy.py b/yt_dlp/extractor/khanacademy.py index 83cfeadba..5333036a8 100644 --- a/yt_dlp/extractor/khanacademy.py +++ b/yt_dlp/extractor/khanacademy.py @@ -25,16 +25,21 @@ class KhanAcademyBaseIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - component_props = self._parse_json(self._download_json( - 'https://www.khanacademy.org/api/internal/graphql', + content = self._download_json( + 'https://www.khanacademy.org/api/internal/graphql/FetchContentData', display_id, query={ - 'hash': 1604303425, + 'fastly_cacheable': 'persist_until_publish', + 'hash': '4134764944', + 'lang': 'en', 'variables': json.dumps({ 'path': display_id, - 'queryParams': '', + 'queryParams': 'lang=en', + 'isModal': False, + 'followRedirects': True, + 'countryCode': 'US', }), - })['data']['contentJson'], display_id)['componentProps'] - return self._parse_component_props(component_props) + })['data']['contentJson'] + return self._parse_component_props(self._parse_json(content, display_id)['componentProps']) class KhanAcademyIE(KhanAcademyBaseIE): From ff4d7860d50407f8a1daa1094f65300e8455ec92 Mon Sep 17 00:00:00 2001 From: i6t <62123048+i6t@users.noreply.github.com> Date: Thu, 5 May 2022 00:49:46 +0900 Subject: [PATCH 1020/2552] [iwara] Add playlist extractors (#3639) Authored by: i6t --- yt_dlp/extractor/extractors.py | 6 ++- yt_dlp/extractor/iwara.py | 95 ++++++++++++++++++++++++++++++++-- 2 files changed, 96 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index c29a78deb..2c09a161e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -702,7 +702,11 @@ from .ivi import ( IviCompilationIE ) from .ivideon import IvideonIE -from .iwara import IwaraIE +from .iwara import ( + IwaraIE, + IwaraPlaylistIE, + IwaraUserIE, +) from .izlesene import IzleseneIE from .jable import ( JableIE, diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index 974b4be7d..4b88da35f 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,19 +1,28 @@ import re +import urllib from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse from ..utils import ( int_or_none, mimetype2ext, remove_end, url_or_none, + urljoin, unified_strdate, strip_or_none, ) -class IwaraIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P[a-zA-Z0-9]+)' +class IwaraBaseIE(InfoExtractor): + _BASE_REGEX = r'(?Phttps?://(?:www\.|ecchi\.)?iwara\.tv)' + + def _extract_playlist(self, base_url, webpage): + for path in re.findall(r'class="title">\s*[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD', # md5 is unstable @@ -58,7 +67,7 @@ class IwaraIE(InfoExtractor): webpage, urlh = self._download_webpage_handle(url, video_id) - hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname + hostname = urllib.parse.urlparse(urlh.geturl()).hostname # ecchi is 'sexy' in Japanese age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0 @@ -118,3 +127,81 @@ class IwaraIE(InfoExtractor): 'upload_date': upload_date, 'description': description, } + + +class IwaraPlaylistIE(IwaraBaseIE): + _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/playlist/(?P[^/?#&]+)' + IE_NAME = 'iwara:playlist' + + _TESTS = [{ + 'url': 'https://ecchi.iwara.tv/playlist/best-enf', + 'info_dict': { + 'title': 'Best enf', + 'uploader': 'Jared98112', + 'id': 'best-enf', + }, + 'playlist_mincount': 1097, + }, { + # urlencoded + 'url': 'https://ecchi.iwara.tv/playlist/%E3%83%97%E3%83%AC%E3%82%A4%E3%83%AA%E3%82%B9%E3%83%88-2', + 'info_dict': { + 'id': 'プレイリスト-2', + 'title': 'プレイリスト', + 'uploader': 'mainyu', + }, + 'playlist_mincount': 91, + }] + + def _real_extract(self, url): + playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url') + playlist_id = urllib.parse.unquote(playlist_id) + webpage = self._download_webpage(url, playlist_id) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': self._html_search_regex(r'class="title"[^>]*>([^<]+)', webpage, 'title', fatal=False), + 'uploader': self._html_search_regex(r'

    ([^<]+)', webpage, 'uploader', fatal=False), + 'entries': self._extract_playlist(base_url, webpage), + } + + +class IwaraUserIE(IwaraBaseIE): + _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/users/(?P[^/?#&]+)' + IE_NAME = 'iwara:user' + + _TESTS = [{ + 'url': 'https://ecchi.iwara.tv/users/CuteMMD', + 'info_dict': { + 'id': 'CuteMMD', + }, + 'playlist_mincount': 198, + }, { + # urlencoded + 'url': 'https://ecchi.iwara.tv/users/%E5%92%95%E5%98%BF%E5%98%BF', + 'info_dict': { + 'id': '咕嘿嘿', + }, + 'playlist_mincount': 141, + }] + + def _entries(self, playlist_id, base_url, webpage): + yield from self._extract_playlist(base_url, webpage) + + page_urls = re.findall( + r'class="pager-item"[^>]*>\s* Date: Thu, 5 May 2022 19:31:54 +0200 Subject: [PATCH 1021/2552] [VideocampusSachsen] Improve extractor (#3604) Authored by: FestplattenSchnitzel --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/videocampus_sachsen.py | 159 +++++++++++++++++------- 2 files changed, 117 insertions(+), 47 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2c09a161e..6f6862915 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1899,10 +1899,7 @@ from .vice import ( from .vidbit import VidbitIE from .viddler import ViddlerIE from .videa import VideaIE -from .videocampus_sachsen import ( - VideocampusSachsenIE, - VideocampusSachsenEmbedIE, -) +from .videocampus_sachsen import VideocampusSachsenIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomore import ( diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py index fe9e061ae..906412f08 100644 --- a/yt_dlp/extractor/videocampus_sachsen.py +++ b/yt_dlp/extractor/videocampus_sachsen.py @@ -1,11 +1,70 @@ +import re + from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ExtractorError class VideocampusSachsenIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://videocampus\.sachsen\.de/(?: + IE_NAME = 'Vimp' + _INSTANCES = ( + 'campus.demo.vimp.com', + 'corporate.demo.vimp.com', + 'dancehalldatabase.com', + 'educhannel.hs-gesundheit.de', + 'emedia.ls.haw-hamburg.de', + 'globale-evolution.net', + 'k210039.vimp.mivitec.net', + 'media.cmslegal.com', + 'media.hs-furtwangen.de', + 'media.hwr-berlin.de', + 'mediathek.dkfz.de', + 'mediathek.htw-berlin.de', + 'mediathek.polizei-bw.de', + 'medien.hs-merseburg.de', + 'mportal.europa-uni.de', + 'pacific.demo.vimp.com', + 'slctv.com', + 'tube.isbonline.cn', + 'univideo.uni-kassel.de', + 'ursula2.genetics.emory.edu', + 'ursulablicklevideoarchiv.com', + 'v.agrarumweltpaedagogik.at', + 'video.eplay-tv.de', + 'video.fh-dortmund.de', + 'video.hs-offenburg.de', + 'video.hs-pforzheim.de', + 'video.hspv.nrw.de', + 'video.irtshdf.fr', + 'video.pareygo.de', + 'video.tu-freiberg.de', + 'videocampus.sachsen.de', + 'videoportal.uni-freiburg.de', + 'videoportal.vm.uni-freiburg.de', + 'videos.duoc.cl', + 'videos.uni-paderborn.de', + 'vimp-bemus.udk-berlin.de', + 'vimp.aekwl.de', + 'vimp.hs-mittweida.de', + 'vimp.oth-regensburg.de', + 'vimp.ph-heidelberg.de', + 'vimp.sma-events.com', + 'vimp.weka-fachmedien.de', + 'webtv.univ-montp3.fr', + 'www.b-tu.de/media', + 'www.bigcitytv.de', + 'www.cad-videos.de', + 'www.fh-bielefeld.de/medienportal', + 'www.orvovideo.com', + 'www.rwe.tv', + 'www.wenglor-media.com', + 'www2.univ-sba.dz', + ) + _VALID_URL = r'''(?x)https?://(?P%s)/(?: m/(?P[0-9a-f]+)| - (?:category/)?video/(?P[\w-]+)/(?P[0-9a-f]{32}) - )''' + (?:category/)?video/(?P[\w-]+)/(?P[0-9a-f]{32})| + media/embed.*(?:\?|&)key=(?P[0-9a-f]{32}&?) + )''' % ('|'.join(map(re.escape, _INSTANCES))) _TESTS = [ { @@ -13,6 +72,7 @@ class VideocampusSachsenIE(InfoExtractor): 'info_dict': { 'id': 'e6b9349905c1628631f175712250f2a1', 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7', + 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7', 'ext': 'mp4', }, }, @@ -21,6 +81,7 @@ class VideocampusSachsenIE(InfoExtractor): 'info_dict': { 'id': 'fc99c527e4205b121cb7c74433469262', 'title': 'Was ist selbstgesteuertes Lernen?', + 'description': 'md5:196aa3b0509a526db62f84679522a2f5', 'display_id': 'Was-ist-selbstgesteuertes-Lernen', 'ext': 'mp4', }, @@ -30,43 +91,32 @@ class VideocampusSachsenIE(InfoExtractor): 'info_dict': { 'id': '09d4ed029002eb1bdda610f1103dd54c', 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht', + 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58', 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht', 'ext': 'mp4', }, }, - ] - - def _real_extract(self, url): - video_id, tmp_id, display_id = self._match_valid_url(url).group('id', 'tmp_id', 'display_id') - webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or '' - - if not tmp_id: - video_id = self._html_search_regex( - r'src="https?://videocampus\.sachsen\.de/media/embed\?key=([0-9a-f]+)&', - webpage, 'video_id') - - title = self._html_search_regex( - (r'

    (?P[^<]+)

    ', *self._meta_regex('title')), - webpage, 'title', group='content', fatal=False) - - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', - video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'display_id': display_id, - 'formats': formats, - 'subtitles': subtitles - } - - -class VideocampusSachsenEmbedIE(InfoExtractor): - _VALID_URL = r'https?://videocampus.sachsen.de/media/embed\?key=(?P[0-9a-f]+)' - - _TESTS = [ + { + 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3', + 'info_dict': { + 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4', + 'id': '0183356e41af7bfb83d7667b20d9b6a3', + 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22', + 'description': 'md5:508958bd93e0ca002ac731d94182a54f', + 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122', + 'ext': 'mp4', + } + }, + { + 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c', + 'info_dict': { + 'id': 'c8816f1cc942c12b6cce57c835cffd7c', + 'title': 'Preisverleihung »Produkte des Jahres 2022«', + 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3', + 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022', + 'ext': 'mp4', + }, + }, { 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262', 'info_dict': { @@ -78,18 +128,41 @@ class VideocampusSachsenEmbedIE(InfoExtractor): ] def _real_extract(self, url): - video_id = self._match_id(url) + host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group( + 'host', 'id', 'tmp_id', 'display_id', 'embed_id') + webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or '' + + if not video_id: + video_id = embed_id or self._html_search_regex( + rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?', + webpage, 'video_id') - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r']*title="([^"<]+)"', webpage, 'title', fatal=False) - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', - video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + if not (display_id or tmp_id): + # Title, description from embedded page's meta wouldn't be correct + title = self._html_search_regex(r']* title="([^"<]+)"', webpage, 'title', fatal=False) + description = None + else: + title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False) + description = self._html_search_meta( + ('og:description', 'twitter:description', 'description'), webpage, default=None) + + formats, subtitles = [], {} + try: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', + video_id, 'mp4', m3u8_id='hls', fatal=True) + except ExtractorError as e: + if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (404, 500): + raise + + formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'}) self._sort_formats(formats) return { 'id': video_id, 'title': title, + 'description': description, + 'display_id': display_id, 'formats': formats, - 'subtitles': subtitles, + 'subtitles': subtitles } From 91e5e839d3017577dabba7e9b142910ec32a495a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 7 May 2022 08:03:41 +0530 Subject: [PATCH 1022/2552] [youtube] Deprioritize format 22 Reduces chance of encountering #3372 --- yt_dlp/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4178a2f14..1c6e20510 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3183,7 +3183,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), - 'source_preference': -10 if throttled else -1, + # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 + 'source_preference': -10 if throttled else -5 if itag == '22' else -1, 'fps': int_or_none(fmt.get('fps')) or None, 'height': height, 'quality': q(quality), From 89f383c4ee7b0b7674acc5a584fc754df6e5f118 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Sat, 7 May 2022 15:44:41 +0700 Subject: [PATCH 1023/2552] [gronkh] Add playlist extractors (#3337) Closes #3300 Authored by: hatienl0i261299 --- yt_dlp/extractor/extractors.py | 6 +++- yt_dlp/extractor/gronkh.py | 59 +++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6f6862915..0ba129f96 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -601,7 +601,11 @@ from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE -from .gronkh import GronkhIE +from .gronkh import ( + GronkhIE, + GronkhFeedIE, + GronkhVodsIE +) from .groupon import GrouponIE from .hbo import HBOIE from .hearthisat import HearThisAtIE diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index 52bbf3bc7..c112c7857 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -1,5 +1,11 @@ +import functools + from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + OnDemandPagedList, + traverse_obj, + unified_strdate, +) class GronkhIE(InfoExtractor): @@ -41,3 +47,54 @@ class GronkhIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class GronkhFeedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv(?:/feed)?/?(?:#|$)' + IE_NAME = 'gronkh:feed' + + _TESTS = [{ + 'url': 'https://gronkh.tv/feed', + 'info_dict': { + 'id': 'feed', + }, + 'playlist_count': 16, + }, { + 'url': 'https://gronkh.tv', + 'only_matching': True, + }] + + def _entries(self): + for type_ in ('recent', 'views'): + info = self._download_json( + f'https://api.gronkh.tv/v1/video/discovery/{type_}', 'feed', note=f'Downloading {type_} API JSON') + for item in traverse_obj(info, ('discovery', ...)) or []: + yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item.get('title')) + + def _real_extract(self, url): + return self.playlist_result(self._entries(), 'feed') + + +class GronkhVodsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/vods/streams/?(?:#|$)' + IE_NAME = 'gronkh:vods' + + _TESTS = [{ + 'url': 'https://gronkh.tv/vods/streams', + 'info_dict': { + 'id': 'vods', + }, + 'playlist_mincount': 150, + }] + _PER_PAGE = 25 + + def _fetch_page(self, page): + items = traverse_obj(self._download_json( + 'https://api.gronkh.tv/v1/search', 'vods', query={'offset': self._PER_PAGE * page, 'first': self._PER_PAGE}, + note=f'Downloading stream video page {page + 1}'), ('results', 'videos', ...)) + for item in items or []: + yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item['episode'], item.get('title')) + + def _real_extract(self, url): + entries = OnDemandPagedList(functools.partial(self._fetch_page), self._PER_PAGE) + return self.playlist_result(entries, 'vods') From 54044decd0d8ffecaa9dee0ec82574c4890dcd8f Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Sat, 7 May 2022 17:25:58 +0700 Subject: [PATCH 1024/2552] [ZingMp3] Add chart and user extractors (#3423) Authored by: hatienl0i261299 --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/zingmp3.py | 241 ++++++++++++++++++++++++++++++--- 2 files changed, 229 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0ba129f96..ee5ced11a 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -2162,6 +2162,10 @@ from .zhihu import ZhihuIE from .zingmp3 import ( ZingMp3IE, ZingMp3AlbumIE, + ZingMp3ChartHomeIE, + ZingMp3WeekChartIE, + ZingMp3ChartMusicVideoIE, + ZingMp3UserIE, ) from .zoom import ZoomIE from .zype import ZypeIE diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index 42a8ac056..7238bf2fd 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -1,11 +1,15 @@ +import functools import hashlib import hmac +import json import urllib.parse from .common import InfoExtractor from ..utils import ( + OnDemandPagedList, int_or_none, traverse_obj, + urljoin, ) @@ -14,15 +18,26 @@ class ZingMp3BaseIE(InfoExtractor): _GEO_COUNTRIES = ['VN'] _DOMAIN = 'https://zingmp3.vn' _SLUG_API = { + # For audio/video 'bai-hat': '/api/v2/page/get/song', 'embed': '/api/v2/page/get/song', 'video-clip': '/api/v2/page/get/video', - 'playlist': '/api/v2/page/get/playlist', - 'album': '/api/v2/page/get/playlist', 'lyric': '/api/v2/lyric/get/lyric', 'song_streaming': '/api/v2/song/get/streaming', + # For playlist + 'playlist': '/api/v2/page/get/playlist', + 'album': '/api/v2/page/get/playlist', + # For chart + 'zing-chart': '/api/v2/page/get/chart-home', + 'zing-chart-tuan': '/api/v2/page/get/week-chart', + 'moi-phat-hanh': '/api/v2/page/get/newrelease-chart', + 'the-loai-video': '/api/v2/video/get/list', + # For user + 'info-artist': '/api/v2/page/get/artist', + 'user-list-song': '/api/v2/song/get/list', + 'user-list-video': '/api/v2/video/get/list', } - + _PER_PAGE = 50 _API_KEY = '88265e23d4284f25963e6eedac8fbfa3' _SECRET_KEY = b'2aa2d1c561e809b267f3638c4a307aab' @@ -31,7 +46,12 @@ class ZingMp3BaseIE(InfoExtractor): title = item.get('title') or item.get('alias') if type_url == 'video-clip': + info = self._download_json( + 'http://api.mp3.zing.vn/api/mobile/video/getvideoinfo', item_id, + query={'requestdata': json.dumps({'id': item_id})}) source = item.get('streaming') + if info.get('source'): + source['mp4'] = info.get('source') else: api = self.get_api_with_signature(name_api=self._SLUG_API.get('song_streaming'), param={'id': item_id}) source = self._download_json(api, video_id=item_id).get('data') @@ -52,8 +72,7 @@ class ZingMp3BaseIE(InfoExtractor): formats.append({ 'format_id': 'mp4-' + res, 'url': video_url, - 'height': int_or_none(self._search_regex( - r'^(\d+)p', res, 'resolution', default=None)), + 'height': int_or_none(res), }) continue elif v == 'VIP': @@ -120,6 +139,11 @@ class ZingMp3BaseIE(InfoExtractor): } return f'{self._DOMAIN}{name_api}?{urllib.parse.urlencode(data)}' + def _entries(self, items): + for item in items or []: + if item and item.get('link'): + yield self.url_result(urljoin(self._DOMAIN, item['link'])) + class ZingMp3IE(ZingMp3BaseIE): _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed' @@ -189,19 +213,17 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): _TESTS = [{ 'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html', 'info_dict': { - '_type': 'playlist', 'id': 'ZWZBWDAF', 'title': 'Lâu Đài Tình Ái', }, - 'playlist_count': 9, + 'playlist_mincount': 9, }, { 'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html', 'info_dict': { - '_type': 'playlist', 'id': 'ZWZAEZZD', 'title': 'Những Bài Hát Hay Nhất Của Mr. Siro', }, - 'playlist_count': 49, + 'playlist_mincount': 49, }, { 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 'only_matching': True, @@ -212,11 +234,198 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): IE_NAME = 'zingmp3:album' def _process_data(self, data, song_id, type_url): - def entries(): - for item in traverse_obj(data, ('song', 'items')) or []: - entry = self._extract_item(item, song_id, type_url, False) - if entry: - yield entry - - return self.playlist_result(entries(), traverse_obj(data, 'id', 'encodeId'), + items = traverse_obj(data, ('song', 'items')) or [] + return self.playlist_result(self._entries(items), traverse_obj(data, 'id', 'encodeId'), traverse_obj(data, 'name', 'title')) + + +class ZingMp3ChartHomeIE(ZingMp3BaseIE): + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P(?:zing-chart|moi-phat-hanh))/?(?:[#?]|$)' + _TESTS = [{ + 'url': 'https://zingmp3.vn/zing-chart', + 'info_dict': { + 'id': 'zing-chart', + 'title': 'zing-chart', + }, + 'playlist_mincount': 100, + }, { + 'url': 'https://zingmp3.vn/moi-phat-hanh', + 'info_dict': { + 'id': 'moi-phat-hanh', + 'title': 'moi-phat-hanh', + }, + 'playlist_mincount': 100, + }] + IE_NAME = 'zingmp3:chart-home' + + def _real_extract(self, url): + type_url = self._match_id(url) + api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': type_url}) + return self._process_data(self._download_json(api, type_url)['data'], type_url, type_url) + + def _process_data(self, data, chart_id, type_url): + if type_url == 'zing-chart': + items = traverse_obj(data, ('RTChart', 'items'), default=[]) + else: + items = data.get('items') + return self.playlist_result(self._entries(items), type_url, type_url) + + +class ZingMp3WeekChartIE(ZingMp3BaseIE): + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?Pzing-chart-tuan)/[^/?#]+/(?P\w+)' + IE_NAME = 'zingmp3:week-chart' + _TESTS = [{ + 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-Viet-Nam/IWZ9Z08I.html', + 'info_dict': { + 'id': 'IWZ9Z08I', + 'title': 'zing-chart-vn', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-US-UK/IWZ9Z0BW.html', + 'info_dict': { + 'id': 'IWZ9Z0BW', + 'title': 'zing-chart-us', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-KPop/IWZ9Z0BO.html', + 'info_dict': { + 'id': 'IWZ9Z0BO', + 'title': 'zing-chart-korea', + }, + 'playlist_mincount': 10, + }] + + def _process_data(self, data, chart_id, type_url): + return self.playlist_result(self._entries(data['items']), chart_id, f'zing-chart-{data.get("country", "")}') + + +class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE): + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?Pthe-loai-video)/(?P[^/]+)/(?P[^\.]+)' + IE_NAME = 'zingmp3:chart-music-video' + _TESTS = [{ + 'url': 'https://zingmp3.vn/the-loai-video/Viet-Nam/IWZ9Z08I.html', + 'info_dict': { + 'id': 'IWZ9Z08I', + 'title': 'the-loai-video_Viet-Nam', + }, + 'playlist_mincount': 400, + }, { + 'url': 'https://zingmp3.vn/the-loai-video/Au-My/IWZ9Z08O.html', + 'info_dict': { + 'id': 'IWZ9Z08O', + 'title': 'the-loai-video_Au-My', + }, + 'playlist_mincount': 40, + }, { + 'url': 'https://zingmp3.vn/the-loai-video/Han-Quoc/IWZ9Z08W.html', + 'info_dict': { + 'id': 'IWZ9Z08W', + 'title': 'the-loai-video_Han-Quoc', + }, + 'playlist_mincount': 30, + }, { + 'url': 'https://zingmp3.vn/the-loai-video/Khong-Loi/IWZ9Z086.html', + 'info_dict': { + 'id': 'IWZ9Z086', + 'title': 'the-loai-video_Khong-Loi', + }, + 'playlist_mincount': 10, + }] + + def _fetch_page(self, song_id, type_url, page): + page += 1 + api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={ + 'id': song_id, + 'type': 'genre', + 'page': page, + 'count': self._PER_PAGE + }) + data = self._download_json(api, song_id)['data'] + return self._entries(data.get('items')) + + def _real_extract(self, url): + song_id, regions, type_url = self._match_valid_url(url).group('id', 'regions', 'type') + entries = OnDemandPagedList(functools.partial(self._fetch_page, song_id, type_url), self._PER_PAGE) + return self.playlist_result(entries, song_id, f'{type_url}_{regions}') + + +class ZingMp3UserIE(ZingMp3BaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:mp3\.zing|zingmp3)\.vn/ + (?P[^/]+) + (?: + /(?Pbai-hat|single|album|video) + ) + /?(?:[?#]|$) + ''' + IE_NAME = 'zingmp3:user' + _TESTS = [{ + 'url': 'https://zingmp3.vn/Mr-Siro/bai-hat', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - bai-hat', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 91, + }, { + 'url': 'https://zingmp3.vn/Mr-Siro/album', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - album', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 3, + }, { + 'url': 'https://zingmp3.vn/Mr-Siro/single', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - single', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 20, + }, { + 'url': 'https://zingmp3.vn/Mr-Siro/video', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - video', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 15, + }] + + def _fetch_page(self, user_id, type_url, page): + page += 1 + name_api = self._SLUG_API['user-list-song'] if type_url == 'bai-hat' else self._SLUG_API['user-list-video'] + api = self.get_api_with_signature(name_api=name_api, param={ + 'id': user_id, + 'type': 'artist', + 'page': page, + 'count': self._PER_PAGE + }) + data = self._download_json(api, user_id, query={'sort': 'new', 'sectionId': 'aSong'})['data'] + return self._entries(data.get('items')) + + def _real_extract(self, url): + user_alias, type_url = self._match_valid_url(url).group('user', 'type') + if not type_url: + type_url = 'bai-hat' + user_info = self._download_json( + self.get_api_with_signature(name_api=self._SLUG_API['info-artist'], param={}), + video_id=user_alias, query={'alias': user_alias})['data'] + user_id = user_info.get('id') + biography = user_info.get('biography') + if type_url == 'bai-hat' or type_url == 'video': + entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, type_url), self._PER_PAGE) + return self.playlist_result(entries, user_id, f'{user_info.get("name")} - {type_url}', biography) + else: + entries = [] + for section in user_info.get('sections', {}): + if section.get('link') == f'/{user_alias}/{type_url}': + items = section.get('items') + for item in items: + entries.append(self.url_result(urljoin(self._DOMAIN, item.get('link')))) + break + return self.playlist_result(entries, user_id, f'{user_info.get("name")} - {type_url}', biography) From bd18c5d1709533f352534a3fc3cd8445c569666d Mon Sep 17 00:00:00 2001 From: diegorodriguezv Date: Sat, 7 May 2022 06:21:55 -0500 Subject: [PATCH 1025/2552] [cleanup, tmz] Update tests (#3654) Authored by: diegorodriguezv --- yt_dlp/extractor/tmz.py | 59 +++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/tmz.py b/yt_dlp/extractor/tmz.py index a8c91f617..ffb30c6b8 100644 --- a/yt_dlp/extractor/tmz.py +++ b/yt_dlp/extractor/tmz.py @@ -18,8 +18,10 @@ class TMZIE(InfoExtractor): "title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet", "description": "Harvey talks about Director Comey’s decision not to prosecute Hillary Clinton.", "timestamp": 1467831837, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20160706", + "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2016/07/06/5eea7dc01baa5c2e83eb06930c170e46_xl.jpg", + "duration": 772.0, }, }, { @@ -30,8 +32,10 @@ class TMZIE(InfoExtractor): "title": "Angry Bagel Shop Guy Says He Doesn't Trust Women", "description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.", "timestamp": 1562889485, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20190711", + "thumbnail": "https://imagez.tmz.com/image/a8/4by3/2019/07/12/a85480d27b2f50a7bfea2322151d67a5_xl.jpg", + "duration": 123.0, }, }, { @@ -43,8 +47,10 @@ class TMZIE(InfoExtractor): "title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake", "description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', "timestamp": 1429467813, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20150419", + "duration": 29.0, + "thumbnail": "https://imagez.tmz.com/image/15/4by3/2015/04/20/1539c7ae136359fc979236fa6a9449dd_xl.jpg", }, }, { @@ -56,8 +62,10 @@ class TMZIE(InfoExtractor): "description": "Patti LaBelle made it known loud and clear last night ... NO " "ONE gets on her stage and strips down.", "timestamp": 1442683746, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20150919", + "duration": 104.0, + "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2015/09/20/5e57d7575062528082994e18ac3f0f48_xl.jpg", }, }, { @@ -68,8 +76,10 @@ class TMZIE(InfoExtractor): "title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This", "description": "Two pretty parts of this video with NBA Commish Adam Silver.", "timestamp": 1454010989, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20160128", + "duration": 59.0, + "thumbnail": "https://imagez.tmz.com/image/38/4by3/2016/01/29/3856e83e0beb57059ec412122b842fb1_xl.jpg", }, }, { @@ -80,8 +90,10 @@ class TMZIE(InfoExtractor): "title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!", "description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.", "timestamp": 1477500095, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20161026", + "thumbnail": "https://imagez.tmz.com/image/0d/4by3/2016/10/27/0d904814d4a75dcf9cc3b8cfd1edc1a3_xl.jpg", + "duration": 128.0, }, }, { @@ -96,8 +108,10 @@ class TMZIE(InfoExtractor): "swinging their billy clubs at both Anti-Fascist and Pro-Trump " "demonstrators.", "timestamp": 1604182772, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20201031", + "duration": 96.0, + "thumbnail": "https://imagez.tmz.com/image/f3/4by3/2020/10/31/f37bd5a8aef84497866f425130c58be3_xl.jpg", }, }, { @@ -108,8 +122,23 @@ class TMZIE(InfoExtractor): "title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing", "uploader": "ESNEWS", "description": "md5:49675bc58883ccf80474b8aa701e1064", - "upload_date": "20201101", + "upload_date": "20201102", "uploader_id": "ESNEWS", + "uploader_url": "http://www.youtube.com/user/ESNEWS", + "like_count": int, + "channel_id": "UCI-Oq7oFGakzSzHFlTtsUsQ", + "channel": "ESNEWS", + "view_count": int, + "duration": 225, + "live_status": "not_live", + "thumbnail": "https://i.ytimg.com/vi_webp/Dddb6IGe-ws/maxresdefault.webp", + "channel_url": "https://www.youtube.com/channel/UCI-Oq7oFGakzSzHFlTtsUsQ", + "channel_follower_count": int, + "playable_in_embed": True, + "categories": ["Sports"], + "age_limit": 0, + "tags": "count:10", + "availability": "public", }, }, { @@ -117,12 +146,20 @@ class TMZIE(InfoExtractor): "info_dict": { "id": "1329450007125225473", "ext": "mp4", - "title": "TheMacLife - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.", - "uploader": "TheMacLife", + "title": "The Mac Life - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.", + "uploader": "The Mac Life", "description": "md5:56e6009bbc3d12498e10d08a8e1f1c69", "upload_date": "20201119", - "uploader_id": "Maclifeofficial", + "uploader_id": "TheMacLife", "timestamp": 1605800556, + "thumbnail": "https://pbs.twimg.com/media/EnMmfT8XYAExgxJ.jpg?name=small", + "like_count": int, + "duration": 11.812, + "uploader_url": "https://twitter.com/TheMacLife", + "age_limit": 0, + "repost_count": int, + "tags": [], + "comment_count": int, }, }, ] From a0fe51d5623a18eb7c2c460a3d35f916e1752504 Mon Sep 17 00:00:00 2001 From: Teemu Ikonen Date: Sat, 7 May 2022 14:24:41 +0300 Subject: [PATCH 1026/2552] [ruutu] Support hs.fi embeds (#3547) Authored by: tpikonen, pukkandan --- yt_dlp/extractor/generic.py | 29 +++++++++++++++++++++--- yt_dlp/extractor/ruutu.py | 45 ++++++++++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 8192fbb86..340161a42 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2517,6 +2517,29 @@ class GenericIE(InfoExtractor): 'upload_date': '20220308', }, }, + { + # Multiple Ruutu embeds + 'url': 'https://www.hs.fi/kotimaa/art-2000008762560.html', + 'info_dict': { + 'title': 'Koronavirus | Epidemiahuippu voi olla Suomessa ohi, mutta koronaviruksen poistamista yleisvaarallisten tautien joukosta harkitaan vasta syksyllä', + 'id': 'art-2000008762560' + }, + 'playlist_count': 3 + }, + { + # Ruutu embed in hs.fi with a single video + 'url': 'https://www.hs.fi/kotimaa/art-2000008793421.html', + 'md5': 'f8964e65d8fada6e8a562389bf366bb4', + 'info_dict': { + 'id': '4081841', + 'ext': 'mp4', + 'title': 'Puolustusvoimat siirsi panssariajoneuvoja harjoituksiin Niinisaloon 2.5.2022', + 'thumbnail': r're:^https?://.+\.jpg$', + 'duration': 138, + 'age_limit': 0, + 'upload_date': '20220504', + }, + }, ] def report_following_redirect(self, new_url): @@ -3749,9 +3772,9 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches(panopto_urls, video_id, video_title) # Look for Ruutu embeds - ruutu_url = RuutuIE._extract_url(webpage) - if ruutu_url: - return self.url_result(ruutu_url, RuutuIE) + ruutu_urls = RuutuIE._extract_urls(webpage) + if ruutu_urls: + return self.playlist_from_matches(ruutu_urls, video_id, video_title) # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index f5dadf278..c6d94c100 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -38,6 +38,7 @@ class RuutuIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 114, 'age_limit': 0, + 'upload_date': '20150508', }, }, { @@ -51,6 +52,9 @@ class RuutuIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 40, 'age_limit': 0, + 'upload_date': '20150507', + 'series': 'Superpesis', + 'categories': ['Urheilu'], }, }, { @@ -63,6 +67,8 @@ class RuutuIE(InfoExtractor): 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 0, + 'upload_date': '20151012', + 'series': 'Läpivalaisu', }, }, # Episode where is "NOT-USED", but has other @@ -82,6 +88,9 @@ class RuutuIE(InfoExtractor): 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 0, + 'upload_date': '20190320', + 'series': 'Mysteeritarinat', + 'duration': 1324, }, 'expected_warnings': [ 'HTTP Error 502: Bad Gateway', @@ -126,14 +135,30 @@ class RuutuIE(InfoExtractor): _API_BASE = 'https://gatling.nelonenmedia.fi' @classmethod - def _extract_url(cls, webpage): + def _extract_urls(cls, webpage): + # nelonen.fi settings = try_call( lambda: json.loads(re.search( r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False)) - video_id = traverse_obj(settings, ( - 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) - if video_id: - return f'http://www.ruutu.fi/video/{video_id}' + if settings: + video_id = traverse_obj(settings, ( + 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) + if video_id: + return [f'http://www.ruutu.fi/video/{video_id}'] + # hs.fi and is.fi + settings = try_call( + lambda: json.loads(re.search( + '(?s)]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)', + webpage).group(1), strict=False)) + if settings: + video_ids = set(traverse_obj(settings, ( + 'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or []) + if video_ids: + return [f'http://www.ruutu.fi/video/{v}' for v in video_ids] + video_id = traverse_obj(settings, ( + 'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId')) + if video_id: + return [f'http://www.ruutu.fi/video/{video_id}'] def _real_extract(self, url): video_id = self._match_id(url) @@ -206,10 +231,10 @@ class RuutuIE(InfoExtractor): extract_formats(video_xml.find('./Clip')) def pv(name): - node = find_xpath_attr( - video_xml, './Clip/PassthroughVariables/variable', 'name', name) - if node is not None: - return node.get('value') + value = try_call(lambda: find_xpath_attr( + video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value')) + if value != 'NA': + return value or None if not formats: if (not self.get_param('allow_unplayable_formats') @@ -234,6 +259,6 @@ class RuutuIE(InfoExtractor): 'series': pv('series_name'), 'season_number': int_or_none(pv('season_number')), 'episode_number': int_or_none(pv('episode_number')), - 'categories': themes.split(',') if themes else [], + 'categories': themes.split(',') if themes else None, 'formats': formats, } From 6b70527f9d522ed0bcf5ccb20822f0d3901253ea Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 7 May 2022 15:59:55 +0530 Subject: [PATCH 1027/2552] [cleanup, zingmp3] Refactor extractors --- yt_dlp/extractor/zingmp3.py | 309 +++++++++++++++--------------------- 1 file changed, 132 insertions(+), 177 deletions(-) diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index 7238bf2fd..26eddb06a 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -14,139 +14,64 @@ from ..utils import ( class ZingMp3BaseIE(InfoExtractor): - _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P(?:%s))/[^/]+/(?P\w+)(?:\.html|\?)' + _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P(?:%s))/[^/?#]+/(?P\w+)(?:\.html|\?)' _GEO_COUNTRIES = ['VN'] _DOMAIN = 'https://zingmp3.vn' - _SLUG_API = { - # For audio/video + _PER_PAGE = 50 + _API_SLUGS = { + # Audio/video 'bai-hat': '/api/v2/page/get/song', 'embed': '/api/v2/page/get/song', 'video-clip': '/api/v2/page/get/video', 'lyric': '/api/v2/lyric/get/lyric', - 'song_streaming': '/api/v2/song/get/streaming', - # For playlist + 'song-streaming': '/api/v2/song/get/streaming', + # Playlist 'playlist': '/api/v2/page/get/playlist', 'album': '/api/v2/page/get/playlist', - # For chart + # Chart 'zing-chart': '/api/v2/page/get/chart-home', 'zing-chart-tuan': '/api/v2/page/get/week-chart', 'moi-phat-hanh': '/api/v2/page/get/newrelease-chart', 'the-loai-video': '/api/v2/video/get/list', - # For user + # User 'info-artist': '/api/v2/page/get/artist', 'user-list-song': '/api/v2/song/get/list', 'user-list-video': '/api/v2/video/get/list', } - _PER_PAGE = 50 - _API_KEY = '88265e23d4284f25963e6eedac8fbfa3' - _SECRET_KEY = b'2aa2d1c561e809b267f3638c4a307aab' - - def _extract_item(self, item, song_id, type_url, fatal): - item_id = item.get('encodeId') or song_id - title = item.get('title') or item.get('alias') - - if type_url == 'video-clip': - info = self._download_json( - 'http://api.mp3.zing.vn/api/mobile/video/getvideoinfo', item_id, - query={'requestdata': json.dumps({'id': item_id})}) - source = item.get('streaming') - if info.get('source'): - source['mp4'] = info.get('source') - else: - api = self.get_api_with_signature(name_api=self._SLUG_API.get('song_streaming'), param={'id': item_id}) - source = self._download_json(api, video_id=item_id).get('data') - - formats = [] - for k, v in (source or {}).items(): - if not v: - continue - if k in ('mp4', 'hls'): - for res, video_url in v.items(): - if not video_url: - continue - if k == 'hls': - formats.extend(self._extract_m3u8_formats( - video_url, item_id, 'mp4', - 'm3u8_native', m3u8_id=k, fatal=False)) - elif k == 'mp4': - formats.append({ - 'format_id': 'mp4-' + res, - 'url': video_url, - 'height': int_or_none(res), - }) - continue - elif v == 'VIP': - continue - formats.append({ - 'ext': 'mp3', - 'format_id': k, - 'tbr': int_or_none(k), - 'url': self._proto_relative_url(v), - 'vcodec': 'none', - }) - if not formats: - if not fatal: - return - msg = item.get('msg') - if msg == 'Sorry, this content is not available in your country.': - self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) - self.raise_no_formats(msg, expected=True) - self._sort_formats(formats) - lyric = item.get('lyric') - if not lyric: - api = self.get_api_with_signature(name_api=self._SLUG_API.get("lyric"), param={'id': item_id}) - info_lyric = self._download_json(api, video_id=item_id) - lyric = traverse_obj(info_lyric, ('data', 'file')) - subtitles = { - 'origin': [{ - 'url': lyric, - }], - } if lyric else None - - album = item.get('album') or {} - - return { - 'id': item_id, - 'title': title, - 'formats': formats, - 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'), - 'subtitles': subtitles, - 'duration': int_or_none(item.get('duration')), - 'track': title, - 'artist': traverse_obj(item, 'artistsNames', 'artists_names'), - 'album': traverse_obj(album, 'name', 'title'), - 'album_artist': traverse_obj(album, 'artistsNames', 'artists_names'), + def _api_url(self, url_type, params): + api_slug = self._API_SLUGS[url_type] + params.update({'ctime': '1'}) + sha256 = hashlib.sha256( + ''.join(f'{k}={v}' for k, v in sorted(params.items())).encode()).hexdigest() + data = { + **params, + 'apiKey': '88265e23d4284f25963e6eedac8fbfa3', + 'sig': hmac.new( + b'2aa2d1c561e809b267f3638c4a307aab', f'{api_slug}{sha256}'.encode(), hashlib.sha512).hexdigest(), } + return f'{self._DOMAIN}{api_slug}?{urllib.parse.urlencode(data)}' + + def _call_api(self, url_type, params, display_id=None, **kwargs): + resp = self._download_json( + self._api_url(url_type, params), display_id or params.get('id'), + note=f'Downloading {url_type} JSON metadata', **kwargs) + return (resp or {}).get('data') or {} def _real_initialize(self): if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'): - self._request_webpage(self.get_api_with_signature(name_api=self._SLUG_API['bai-hat'], param={'id': ''}), - None, note='Updating cookies') - - def _real_extract(self, url): - song_id, type_url = self._match_valid_url(url).group('id', 'type') - api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id}) - return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url) - - def get_api_with_signature(self, name_api, param): - param.update({'ctime': '1'}) - sha256 = hashlib.sha256(''.join(f'{i}={param[i]}' for i in sorted(param)).encode('utf-8')).hexdigest() - data = { - 'apiKey': self._API_KEY, - 'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(), - **param, - } - return f'{self._DOMAIN}{name_api}?{urllib.parse.urlencode(data)}' + self._request_webpage( + self._api_url('bai-hat', {'id': ''}), None, note='Updating cookies') - def _entries(self, items): - for item in items or []: - if item and item.get('link'): - yield self.url_result(urljoin(self._DOMAIN, item['link'])) + def _parse_items(self, items): + for url in traverse_obj(items, (..., 'link')) or []: + yield self.url_result(urljoin(self._DOMAIN, url)) class ZingMp3IE(ZingMp3BaseIE): _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed' + IE_NAME = 'zingmp3' + IE_DESC = 'zingmp3.vn' _TESTS = [{ 'url': 'https://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'md5': 'ead7ae13693b3205cbc89536a077daed', @@ -168,7 +93,7 @@ class ZingMp3IE(ZingMp3BaseIE): }, }, { 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html', - 'md5': 'c7f23d971ac1a4f675456ed13c9b9612', + 'md5': '3c2081e79471a2f4a3edd90b70b185ea', 'info_dict': { 'id': 'ZO8ZF7C7', 'title': 'Sương Hoa Đưa Lối', @@ -201,11 +126,64 @@ class ZingMp3IE(ZingMp3BaseIE): 'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'only_matching': True, }] - IE_NAME = 'zingmp3' - IE_DESC = 'zingmp3.vn' - def _process_data(self, data, song_id, type_url): - return self._extract_item(data, song_id, type_url, True) + def _real_extract(self, url): + song_id, url_type = self._match_valid_url(url).group('id', 'type') + item = self._call_api(url_type, {'id': song_id}) + + item_id = item.get('encodeId') or song_id + if url_type == 'video-clip': + source = item.get('streaming') + source['mp4'] = self._download_json( + 'http://api.mp3.zing.vn/api/mobile/video/getvideoinfo', item_id, + query={'requestdata': json.dumps({'id': item_id})}, + note='Downloading mp4 JSON metadata').get('source') + else: + source = self._call_api('song-streaming', {'id': item_id}) + + formats = [] + for k, v in (source or {}).items(): + if not v or v == 'VIP': + continue + if k not in ('mp4', 'hls'): + formats.append({ + 'ext': 'mp3', + 'format_id': k, + 'tbr': int_or_none(k), + 'url': self._proto_relative_url(v), + 'vcodec': 'none', + }) + continue + for res, video_url in v.items(): + if not video_url: + continue + if k == 'hls': + formats.extend(self._extract_m3u8_formats(video_url, item_id, 'mp4', m3u8_id=k, fatal=False)) + continue + formats.append({ + 'format_id': f'mp4-{res}', + 'url': video_url, + 'height': int_or_none(res), + }) + + if not formats and item.get('msg') == 'Sorry, this content is not available in your country.': + self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) + self._sort_formats(formats) + + lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file') + + return { + 'id': item_id, + 'title': traverse_obj(item, 'title', 'alias'), + 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'), + 'duration': int_or_none(item.get('duration')), + 'track': traverse_obj(item, 'title', 'alias'), + 'artist': traverse_obj(item, 'artistsNames', 'artists_names'), + 'album': traverse_obj(item, ('album', ('name', 'title')), get_all=False), + 'album_artist': traverse_obj(item, ('album', ('artistsNames', 'artists_names')), get_all=False), + 'formats': formats, + 'subtitles': {'origin': [{'url': lyric}]} if lyric else None, + } class ZingMp3AlbumIE(ZingMp3BaseIE): @@ -233,10 +211,12 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): }] IE_NAME = 'zingmp3:album' - def _process_data(self, data, song_id, type_url): - items = traverse_obj(data, ('song', 'items')) or [] - return self.playlist_result(self._entries(items), traverse_obj(data, 'id', 'encodeId'), - traverse_obj(data, 'name', 'title')) + def _real_extract(self, url): + song_id, url_type = self._match_valid_url(url).group('id', 'type') + data = self._call_api(url_type, {'id': song_id}) + return self.playlist_result( + self._parse_items(traverse_obj(data, ('song', 'items'))), + traverse_obj(data, 'id', 'encodeId'), traverse_obj(data, 'name', 'title')) class ZingMp3ChartHomeIE(ZingMp3BaseIE): @@ -245,34 +225,26 @@ class ZingMp3ChartHomeIE(ZingMp3BaseIE): 'url': 'https://zingmp3.vn/zing-chart', 'info_dict': { 'id': 'zing-chart', - 'title': 'zing-chart', }, 'playlist_mincount': 100, }, { 'url': 'https://zingmp3.vn/moi-phat-hanh', 'info_dict': { 'id': 'moi-phat-hanh', - 'title': 'moi-phat-hanh', }, 'playlist_mincount': 100, }] IE_NAME = 'zingmp3:chart-home' def _real_extract(self, url): - type_url = self._match_id(url) - api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': type_url}) - return self._process_data(self._download_json(api, type_url)['data'], type_url, type_url) - - def _process_data(self, data, chart_id, type_url): - if type_url == 'zing-chart': - items = traverse_obj(data, ('RTChart', 'items'), default=[]) - else: - items = data.get('items') - return self.playlist_result(self._entries(items), type_url, type_url) + url_type = self._match_id(url) + data = self._call_api(url_type, {'id': url_type}) + items = traverse_obj(data, ('RTChart', 'items') if url_type == 'zing-chart' else 'items') + return self.playlist_result(self._parse_items(items), url_type) class ZingMp3WeekChartIE(ZingMp3BaseIE): - _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?Pzing-chart-tuan)/[^/?#]+/(?P\w+)' + _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'zing-chart-tuan' IE_NAME = 'zingmp3:week-chart' _TESTS = [{ 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-Viet-Nam/IWZ9Z08I.html', @@ -297,8 +269,11 @@ class ZingMp3WeekChartIE(ZingMp3BaseIE): 'playlist_mincount': 10, }] - def _process_data(self, data, chart_id, type_url): - return self.playlist_result(self._entries(data['items']), chart_id, f'zing-chart-{data.get("country", "")}') + def _real_extract(self, url): + song_id, url_type = self._match_valid_url(url).group('id', 'type') + data = self._call_api(url_type, {'id': song_id}) + return self.playlist_result( + self._parse_items(data['items']), song_id, f'zing-chart-{data.get("country", "")}') class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE): @@ -334,33 +309,23 @@ class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE): 'playlist_mincount': 10, }] - def _fetch_page(self, song_id, type_url, page): - page += 1 - api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={ + def _fetch_page(self, song_id, url_type, page): + return self._parse_items(self._call_api(url_type, { 'id': song_id, 'type': 'genre', - 'page': page, + 'page': page + 1, 'count': self._PER_PAGE - }) - data = self._download_json(api, song_id)['data'] - return self._entries(data.get('items')) + }).get('items')) def _real_extract(self, url): - song_id, regions, type_url = self._match_valid_url(url).group('id', 'regions', 'type') - entries = OnDemandPagedList(functools.partial(self._fetch_page, song_id, type_url), self._PER_PAGE) - return self.playlist_result(entries, song_id, f'{type_url}_{regions}') + song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type') + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, song_id, url_type), self._PER_PAGE), + song_id, f'{url_type}_{regions}') class ZingMp3UserIE(ZingMp3BaseIE): - _VALID_URL = r'''(?x) - https?:// - (?:mp3\.zing|zingmp3)\.vn/ - (?P[^/]+) - (?: - /(?Pbai-hat|single|album|video) - ) - /?(?:[?#]|$) - ''' + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P[^/]+)/(?Pbai-hat|single|album|video)/?(?:[?#]|$)' IE_NAME = 'zingmp3:user' _TESTS = [{ 'url': 'https://zingmp3.vn/Mr-Siro/bai-hat', @@ -396,36 +361,26 @@ class ZingMp3UserIE(ZingMp3BaseIE): 'playlist_mincount': 15, }] - def _fetch_page(self, user_id, type_url, page): - page += 1 - name_api = self._SLUG_API['user-list-song'] if type_url == 'bai-hat' else self._SLUG_API['user-list-video'] - api = self.get_api_with_signature(name_api=name_api, param={ + def _fetch_page(self, user_id, url_type, page): + url_type = 'user-list-song' if url_type == 'bai-hat' else 'user-list-video' + return self._parse_items(self._call_api(url_type, { 'id': user_id, 'type': 'artist', - 'page': page, + 'page': page + 1, 'count': self._PER_PAGE - }) - data = self._download_json(api, user_id, query={'sort': 'new', 'sectionId': 'aSong'})['data'] - return self._entries(data.get('items')) + }, query={'sort': 'new', 'sectionId': 'aSong'}).get('items')) def _real_extract(self, url): - user_alias, type_url = self._match_valid_url(url).group('user', 'type') - if not type_url: - type_url = 'bai-hat' - user_info = self._download_json( - self.get_api_with_signature(name_api=self._SLUG_API['info-artist'], param={}), - video_id=user_alias, query={'alias': user_alias})['data'] - user_id = user_info.get('id') - biography = user_info.get('biography') - if type_url == 'bai-hat' or type_url == 'video': - entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, type_url), self._PER_PAGE) - return self.playlist_result(entries, user_id, f'{user_info.get("name")} - {type_url}', biography) + user_alias, url_type = self._match_valid_url(url).group('user', 'type') + if not url_type: + url_type = 'bai-hat' + + user_info = self._call_api('info-artist', {}, user_alias, query={'alias': user_alias}) + if url_type in ('bai-hat', 'video'): + entries = OnDemandPagedList( + functools.partial(self._fetch_page, user_info['id'], url_type), self._PER_PAGE) else: - entries = [] - for section in user_info.get('sections', {}): - if section.get('link') == f'/{user_alias}/{type_url}': - items = section.get('items') - for item in items: - entries.append(self.url_result(urljoin(self._DOMAIN, item.get('link')))) - break - return self.playlist_result(entries, user_id, f'{user_info.get("name")} - {type_url}', biography) + entries = self._parse_items(traverse_obj(user_info, ( + 'sections', lambda _, v: v['link'] == f'/{user_alias}/{url_type}', 'items', ...))) + return self.playlist_result( + entries, user_info['id'], f'{user_info.get("name")} - {url_type}', user_info.get('biography')) From 4f28b537d9bba625a0097ee506c49b063291dba6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 7 May 2022 18:08:47 +0530 Subject: [PATCH 1028/2552] Allow use of weaker ciphers with `--legacy-server-connect` Closes #2043 --- yt_dlp/utils.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 3f22eaf75..8b2c1c75a 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -917,6 +917,8 @@ def make_HTTPS_handler(params, **kwargs): context.check_hostname = opts_check_certificate if params.get('legacyserverconnect'): context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT + # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998 + context.set_ciphers('DEFAULT') context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE if opts_check_certificate: if has_certifi and 'no-certifi' not in params.get('compat_opts', []): @@ -930,9 +932,6 @@ def make_HTTPS_handler(params, **kwargs): except ssl.SSLError: # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): - # Create a new context to discard any certificates that were already loaded - context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) - context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED for storename in ('CA', 'ROOT'): _ssl_load_windows_store_certs(context, storename) context.set_default_verify_paths() @@ -1414,9 +1413,14 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): conn_class = make_socks_conn_class(conn_class, socks_proxy) del req.headers['Ytdl-socks-proxy'] - return self.do_open(functools.partial( - _create_http_connection, self, conn_class, True), - req, **kwargs) + try: + return self.do_open( + functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs) + except urllib.error.URLError as e: + if (isinstance(e.reason, ssl.SSLError) + and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'): + raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect') + raise class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): From 895aeb71d794227a24c93b39449a0f6bab068c21 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 7 May 2022 19:20:23 +0530 Subject: [PATCH 1029/2552] [toggo] Fix `_VALID_URL` Closes #2610 --- yt_dlp/extractor/toggo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/toggo.py b/yt_dlp/extractor/toggo.py index da5f0c4d1..4c03d1dc0 100644 --- a/yt_dlp/extractor/toggo.py +++ b/yt_dlp/extractor/toggo.py @@ -4,7 +4,7 @@ from ..utils import int_or_none, parse_qs class ToggoIE(InfoExtractor): IE_NAME = 'toggo' - _VALID_URL = r'https?://(?:www\.)?toggo\.de/[\w-]+/folge/(?P[\w-]+)' + _VALID_URL = r'https?://(?:www\.)?toggo\.de/[^/?#]+/folge/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.toggo.de/weihnachtsmann--co-kg/folge/ein-geschenk-fuer-zwei', 'info_dict': { @@ -27,6 +27,9 @@ class ToggoIE(InfoExtractor): 'upload_date': '20200217', }, 'params': {'skip_download': True}, + }, { + 'url': 'https://www.toggo.de/grizzy--die-lemminge/folge/ab-durch-die-wand-vogelfrei-rock\'n\'lemming', + 'only_matching': True, }] def _real_extract(self, url): From d4736fdb43be5f0e3050e831b8d8d73e815ba98d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 7 May 2022 19:45:00 +0530 Subject: [PATCH 1030/2552] Remove warning for videos with an empty title --- yt_dlp/YoutubeDL.py | 12 ++++++++---- yt_dlp/extractor/common.py | 4 +++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1766ff379..3946311cd 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2335,12 +2335,16 @@ class YoutubeDL: # TODO: move sanitization here if is_video: # playlists are allowed to lack "title" - info_dict['fulltitle'] = info_dict.get('title') - if 'title' not in info_dict: + title = info_dict.get('title', NO_DEFAULT) + if title is NO_DEFAULT: raise ExtractorError('Missing "title" field in extractor result', video_id=info_dict['id'], ie=info_dict['extractor']) - elif not info_dict.get('title'): - self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') + info_dict['fulltitle'] = title + if not title: + if title == '': + self.write_debug('Extractor gave empty title. Creating a generic title') + else: + self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}' if info_dict.get('duration') is not None: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 97cd524bc..e5a44e296 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -103,7 +103,9 @@ class InfoExtractor: For a video, the dictionaries must include the following fields: id: Video identifier. - title: Video title, unescaped. + title: Video title, unescaped. Set to an empty string if video has + no title as opposed to "None" which signifies that the + extractor failed to obtain a title Additionally, it must contain either a formats entry or a url one: From 1f8b4ab7335e684b3f2a6938dac941103d026105 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Sun, 8 May 2022 00:47:51 +0900 Subject: [PATCH 1031/2552] [radiko] Fix extractor (#3655) Authored by: Lesmiscore --- yt_dlp/extractor/radiko.py | 68 +++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index 651cfe63b..dbb748715 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -1,26 +1,22 @@ -import re import base64 -import calendar -import datetime +import re +import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, - update_url_query, clean_html, + time_seconds, + try_call, unified_timestamp, + update_url_query, ) -from ..compat import compat_urllib_parse class RadikoBaseIE(InfoExtractor): _FULL_KEY = None def _auth_client(self): - auth_cache = self._downloader.cache.load('radiko', 'auth_data') - if auth_cache: - return auth_cache - _, auth1_handle = self._download_webpage_handle( 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', headers={ @@ -89,8 +85,8 @@ class RadikoBaseIE(InfoExtractor): def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query): m3u8_playlist_data = self._download_xml( - 'https://radiko.jp/v3/station/stream/pc_html5/%s.xml' % station, video_id, - note='Downloading m3u8 information') + f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id, + note='Downloading stream information') m3u8_urls = m3u8_playlist_data.findall('.//url') formats = [] @@ -102,7 +98,7 @@ class RadikoBaseIE(InfoExtractor): 'station_id': station, **query, 'l': '15', - 'lsid': '77d0678df93a1034659c14d6fc89f018', + 'lsid': '88ecea37e968c1f17d5413312d9f8003', 'type': 'b', }) if playlist_url in found: @@ -112,16 +108,17 @@ class RadikoBaseIE(InfoExtractor): time_to_skip = None if is_onair else cursor - ft + domain = urllib.parse.urlparse(playlist_url).netloc subformats = self._extract_m3u8_formats( playlist_url, video_id, ext='m4a', - live=True, fatal=False, m3u8_id=None, + live=True, fatal=False, m3u8_id=domain, + note=f'Downloading m3u8 information from {domain}', headers={ 'X-Radiko-AreaId': area_id, 'X-Radiko-AuthToken': auth_token, }) for sf in subformats: - domain = sf['format_id'] = compat_urllib_parse.urlparse(sf['url']).netloc - if re.match(r'^[cf]-radiko\.smartstream\.ne\.jp$', domain): + if re.fullmatch(r'[cf]-radiko\.smartstream\.ne\.jp', domain): # Prioritize live radio vs playback based on extractor sf['preference'] = 100 if is_onair else -100 if not is_onair and url_attrib['timefree'] == '1' and time_to_skip: @@ -151,31 +148,29 @@ class RadikoIE(RadikoBaseIE): def _real_extract(self, url): station, video_id = self._match_valid_url(url).groups() vid_int = unified_timestamp(video_id, False) - - auth_token, area_id = self._auth_client() - prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) - title = prog.find('title').text - description = clean_html(prog.find('info').text) - station_name = station_program.find('.//name').text - - formats = self._extract_formats( - video_id=video_id, station=station, is_onair=False, - ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, - query={ - 'start_at': radio_begin, - 'ft': radio_begin, - 'end_at': radio_end, - 'to': radio_end, - 'seek': video_id, - }) + auth_cache = self._downloader.cache.load('radiko', 'auth_data') + for attempt in range(2): + auth_token, area_id = (not attempt and auth_cache) or self._auth_client() + formats = self._extract_formats( + video_id=video_id, station=station, is_onair=False, + ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, + query={ + 'start_at': radio_begin, + 'ft': radio_begin, + 'end_at': radio_end, + 'to': radio_end, + 'seek': video_id, + }) + if formats: + break return { 'id': video_id, - 'title': title, - 'description': description, - 'uploader': station_name, + 'title': try_call(lambda: prog.find('title').text), + 'description': clean_html(try_call(lambda: prog.find('info').text)), + 'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader_id': station, 'timestamp': vid_int, 'formats': formats, @@ -205,8 +200,7 @@ class RadikoRadioIE(RadikoBaseIE): auth_token, area_id = self._auth_client() # get current time in JST (GMT+9:00 w/o DST) - vid_now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9))) - vid_now = calendar.timegm(vid_now.timetuple()) + vid_now = time_seconds(hours=9) prog, station_program, ft, _, _ = self._find_program(station, station, vid_now) From 5747d4f4e864348c28eb6de4159bcfd7b8e6ddec Mon Sep 17 00:00:00 2001 From: MMM Date: Sat, 7 May 2022 18:06:05 +0200 Subject: [PATCH 1032/2552] [kaltura] Update API calls (#3657) Authored by: flashdagger --- yt_dlp/extractor/kaltura.py | 47 ++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index f9b9c5c78..afad279bd 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -1,5 +1,6 @@ -import re import base64 +import json +import re from .common import InfoExtractor from ..compat import ( @@ -13,6 +14,7 @@ from ..utils import ( int_or_none, unsmuggle_url, smuggle_url, + traverse_obj, ) @@ -33,7 +35,7 @@ class KalturaIE(InfoExtractor): ) ''' _SERVICE_URL = 'http://cdnapi.kaltura.com' - _SERVICE_BASE = '/api_v3/index.php' + _SERVICE_BASE = '/api_v3/service/multirequest' # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php _CAPTION_TYPES = { 1: 'srt', @@ -169,30 +171,35 @@ class KalturaIE(InfoExtractor): def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): params = actions[0] - if len(actions) > 1: - for i, a in enumerate(actions[1:], start=1): - for k, v in a.items(): - params['%d:%s' % (i, k)] = v + params.update({i: a for i, a in enumerate(actions[1:], start=1)}) data = self._download_json( (service_url or self._SERVICE_URL) + self._SERVICE_BASE, - video_id, query=params, *args, **kwargs) + video_id, data=json.dumps(params).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + 'Accept-Encoding': 'gzip, deflate, br', + }, *args, **kwargs) + + for idx, status in enumerate(data): + if not isinstance(status, dict): + continue + if status.get('objectType') == 'KalturaAPIException': + raise ExtractorError( + '%s said: %s (%d)' % (self.IE_NAME, status['message'], idx)) - status = data if len(actions) == 1 else data[0] - if status.get('objectType') == 'KalturaAPIException': - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, status['message'])) + data[1] = traverse_obj(data, (1, 'objects', 0)) return data def _get_video_info(self, video_id, partner_id, service_url=None): actions = [ { - 'action': 'null', - 'apiVersion': '3.1.5', - 'clientTag': 'kdp:v3.8.5', + 'apiVersion': '3.3.0', + 'clientTag': 'html5:v3.1.0', 'format': 1, # JSON, 2 = XML, 3 = PHP - 'service': 'multirequest', + 'ks': '', + 'partnerId': partner_id, }, { 'expiry': 86400, @@ -201,12 +208,14 @@ class KalturaIE(InfoExtractor): 'widgetId': '_%s' % partner_id, }, { - 'action': 'get', - 'entryId': video_id, + 'action': 'list', + 'filter': {'redirectFromEntryId': video_id}, 'service': 'baseentry', 'ks': '{1:result:ks}', - 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', - 'responseProfile:type': 1, + 'responseProfile': { + 'type': 1, + 'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', + }, }, { 'action': 'getbyentryid', From d7a1aa00c65dd516c70c10bd070113b87b96d1c8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 7 May 2022 22:36:18 +0530 Subject: [PATCH 1033/2552] Run `FFmpegFixupM3u8PP` for live-streams if needed Closes #3669 --- yt_dlp/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3946311cd..de34b8bd7 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3194,7 +3194,8 @@ class YoutubeDL: downloader = downloader.__name__ if downloader else None if info_dict.get('requested_formats') is None: # Not necessary if doing merger - ffmpeg_fixup(downloader == 'HlsFD', + live_fixup = info_dict.get('is_live') and not self.params.get('hls_use_mpegts') + ffmpeg_fixup(downloader == 'HlsFD' or live_fixup, 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', From 5f8ea7e0d83d9096d30e0c6554a51cb4cb678522 Mon Sep 17 00:00:00 2001 From: Evan Spensley <94762716+evansp@users.noreply.github.com> Date: Sat, 7 May 2022 18:48:34 -0400 Subject: [PATCH 1034/2552] [Jamendo] Extract more metadata (#3672) Authored by: evansp --- yt_dlp/extractor/jamendo.py | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index 5dc2c25e6..d960ee51c 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -28,10 +28,11 @@ class JamendoIE(InfoExtractor): 'ext': 'flac', # 'title': 'Maya Filipič - Stories from Emona I', 'title': 'Stories from Emona I', - # 'artist': 'Maya Filipič', + 'artist': 'Maya Filipič', + 'album': 'Between two worlds', 'track': 'Stories from Emona I', 'duration': 210, - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=29279&width=300&trackid=196219', 'timestamp': 1217438117, 'upload_date': '20080730', 'license': 'by-nc-nd', @@ -45,11 +46,11 @@ class JamendoIE(InfoExtractor): 'only_matching': True, }] - def _call_api(self, resource, resource_id): + def _call_api(self, resource, resource_id, fatal=True): path = '/api/%ss' % resource rand = compat_str(random.random()) return self._download_json( - 'https://www.jamendo.com' + path, resource_id, query={ + 'https://www.jamendo.com' + path, resource_id, fatal=fatal, query={ 'id[]': resource_id, }, headers={ 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand) @@ -71,6 +72,8 @@ class JamendoIE(InfoExtractor): # if artist_name: # title = '%s - %s' % (artist_name, title) # album = get_model('album') + artist = self._call_api("artist", track.get('artistId'), fatal=False) + album = self._call_api("album", track.get('albumId'), fatal=False) formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -118,9 +121,9 @@ class JamendoIE(InfoExtractor): 'title': title, 'description': track.get('description'), 'duration': int_or_none(track.get('duration')), - # 'artist': artist_name, + 'artist': artist.get('name'), 'track': track_name, - # 'album': album.get('name'), + 'album': album.get('name'), 'formats': formats, 'license': '-'.join(license) if license else None, 'timestamp': int_or_none(track.get('dateCreated')), @@ -145,22 +148,38 @@ class JamendoAlbumIE(JamendoIE): 'info_dict': { 'id': '1032333', 'ext': 'flac', - 'title': 'Shearer - Warmachine', + 'title': 'Warmachine', 'artist': 'Shearer', 'track': 'Warmachine', 'timestamp': 1368089771, 'upload_date': '20130509', + 'view_count': int, + 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032333', + 'duration': 190, + 'license': 'by', + 'album': 'Duck On Cover', + 'average_rating': 4, + 'tags': ['rock', 'drums', 'bass', 'world', 'punk', 'neutral'], + 'like_count': int, } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { 'id': '1032330', 'ext': 'flac', - 'title': 'Shearer - Without Your Ghost', + 'title': 'Without Your Ghost', 'artist': 'Shearer', 'track': 'Without Your Ghost', 'timestamp': 1368089771, 'upload_date': '20130509', + 'duration': 192, + 'tags': ['rock', 'drums', 'bass', 'world', 'punk'], + 'album': 'Duck On Cover', + 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032330', + 'view_count': int, + 'average_rating': 4, + 'license': 'by', + 'like_count': int, } }], 'params': { From 385ffb467b2285e85a2a5495b90314ba1f8e0700 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 8 May 2022 20:40:06 +0530 Subject: [PATCH 1035/2552] [wistia] Fix `_VALID_URL` Closes #2866 Authored by: dirkf --- yt_dlp/extractor/wistia.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index 8f0e7949b..3cbcb4aa0 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -12,7 +12,7 @@ from ..utils import ( class WistiaBaseIE(InfoExtractor): _VALID_ID_REGEX = r'(?P[a-z0-9]{10})' - _VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/' + _VALID_URL_BASE = r'https?://(?:\w+\.)?wistia\.(?:net|com)/(?:embed/)?' _EMBED_BASE_URL = 'http://fast.wistia.com/embed/' def _download_embed_config(self, config_type, config_id, referer): @@ -173,7 +173,7 @@ class WistiaIE(WistiaBaseIE): class WistiaPlaylistIE(WistiaBaseIE): - _VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX) + _VALID_URL = r'%splaylists/%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX) _TEST = { 'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc', From d239db030671b9445c77c7d8cb190ba5fee76b96 Mon Sep 17 00:00:00 2001 From: ca-za Date: Mon, 9 May 2022 13:42:22 +0200 Subject: [PATCH 1036/2552] [toggo] Improve `_VALID_URL` (#3689) Authored by: ca-za --- yt_dlp/extractor/toggo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/toggo.py b/yt_dlp/extractor/toggo.py index 4c03d1dc0..9f98cfaf0 100644 --- a/yt_dlp/extractor/toggo.py +++ b/yt_dlp/extractor/toggo.py @@ -4,7 +4,7 @@ from ..utils import int_or_none, parse_qs class ToggoIE(InfoExtractor): IE_NAME = 'toggo' - _VALID_URL = r'https?://(?:www\.)?toggo\.de/[^/?#]+/folge/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?toggo\.de/(?:toggolino/)?[^/?#]+/folge/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.toggo.de/weihnachtsmann--co-kg/folge/ein-geschenk-fuer-zwei', 'info_dict': { @@ -30,6 +30,9 @@ class ToggoIE(InfoExtractor): }, { 'url': 'https://www.toggo.de/grizzy--die-lemminge/folge/ab-durch-die-wand-vogelfrei-rock\'n\'lemming', 'only_matching': True, + }, { + 'url': 'https://www.toggo.de/toggolino/paw-patrol/folge/der-wetter-zeppelin-der-chili-kochwettbewerb', + 'only_matching': True, }] def _real_extract(self, url): From 0f06bcd7591332937fdec497d6cbb4914358bc79 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 9 May 2022 17:24:28 +0530 Subject: [PATCH 1037/2552] [cleanup] Minor fixes (See desc) * [youtube] Fix `--youtube-skip-dash-manifest` * [build] Use `$()` in `Makefile`. Closes #3684 * Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700 * Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38 * [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode` * [utils] LazyList: Expose unnecessarily "protected" attributes and other minor cleanup --- Makefile | 2 +- devscripts/make_readme.py | 2 +- devscripts/make_supportedsites.py | 5 +- devscripts/update-formulae.py | 2 +- test/helper.py | 2 +- test/test_InfoExtractor.py | 8 +-- test/test_YoutubeDLCookieJar.py | 2 +- test/test_aes.py | 8 +-- test/test_compat.py | 2 +- test/test_http.py | 8 +-- test/test_socks.py | 10 +-- test/test_subtitles.py | 2 +- test/test_update.py.disabled | 2 +- test/test_utils.py | 2 +- yt_dlp/YoutubeDL.py | 16 ++--- yt_dlp/aes.py | 2 +- yt_dlp/cookies.py | 20 +++--- yt_dlp/downloader/external.py | 2 +- yt_dlp/downloader/f4m.py | 2 +- yt_dlp/downloader/hls.py | 4 +- yt_dlp/downloader/http.py | 6 +- yt_dlp/downloader/ism.py | 2 +- yt_dlp/downloader/mhtml.py | 4 +- yt_dlp/downloader/niconico.py | 2 +- yt_dlp/downloader/websocket.py | 2 +- yt_dlp/downloader/youtube_live_chat.py | 6 +- yt_dlp/extractor/dplay.py | 3 +- yt_dlp/extractor/generic.py | 14 ----- yt_dlp/extractor/youtube.py | 5 +- yt_dlp/postprocessor/common.py | 8 +-- yt_dlp/postprocessor/xattrpp.py | 2 +- yt_dlp/socks.py | 10 +-- yt_dlp/update.py | 4 +- yt_dlp/utils.py | 87 +++++++++++++------------- yt_dlp/webvtt.py | 2 +- 35 files changed, 124 insertions(+), 136 deletions(-) diff --git a/Makefile b/Makefile index 179aaff57..7fa4a6d46 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ PYTHON ?= /usr/bin/env python3 SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) # set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 -MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) +MARKDOWN = $(shell if [ "$(pandoc -v | head -n1 | cut -d" " -f2 | head -c1)" = "2" ]; then echo markdown-smart; else echo markdown; fi) install: lazy-extractors yt-dlp yt-dlp.1 completions mkdir -p $(DESTDIR)$(BINDIR) diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 1401c2e5a..fd234bf58 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -14,7 +14,7 @@ EPILOG_START = 'See full documentation' helptext = sys.stdin.read() if isinstance(helptext, bytes): - helptext = helptext.decode('utf-8') + helptext = helptext.decode() start, end = helptext.index(f'\n {OPTIONS_START}'), helptext.index(f'\n{EPILOG_START}') options = re.sub(r'(?m)^ (\w.+)$', r'## \1', helptext[start + 1: end + 1]) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 0a0d08f56..0403c1ae6 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -3,9 +3,8 @@ import optparse import os import sys -# Import yt_dlp -ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') -sys.path.insert(0, ROOT_DIR) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + import yt_dlp diff --git a/devscripts/update-formulae.py b/devscripts/update-formulae.py index 6424f5d9b..a89872c7b 100644 --- a/devscripts/update-formulae.py +++ b/devscripts/update-formulae.py @@ -17,7 +17,7 @@ normalized_version = '.'.join(str(int(x)) for x in version.split('.')) pypi_release = json.loads(compat_urllib_request.urlopen( 'https://pypi.org/pypi/yt-dlp/%s/json' % normalized_version -).read().decode('utf-8')) +).read().decode()) tarball_file = next(x for x in pypi_release['urls'] if x['filename'].endswith('.tar.gz')) diff --git a/test/helper.py b/test/helper.py index 81e53ed74..2333ace98 100644 --- a/test/helper.py +++ b/test/helper.py @@ -92,7 +92,7 @@ def gettestcases(include_onlymatching=False): yield from ie.get_testcases(include_onlymatching) -md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() +md5 = lambda s: hashlib.md5(s.encode()).hexdigest() def expect_value(self, got, expected, field): diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 173b62920..257ea7dd3 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1360,7 +1360,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( - compat_etree_fromstring(f.read().encode('utf-8')), + compat_etree_fromstring(f.read().encode()), mpd_base_url=mpd_base_url, mpd_url=mpd_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) @@ -1551,7 +1551,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES: with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_ism_formats_and_subtitles( - compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url) + compat_etree_fromstring(f.read().encode()), ism_url=ism_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subtitles, expected_subtitles, None) @@ -1577,7 +1577,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ for f4m_file, f4m_url, expected_formats in _TEST_CASES: with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( - compat_etree_fromstring(f.read().encode('utf-8')), + compat_etree_fromstring(f.read().encode()), f4m_url, None) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) @@ -1624,7 +1624,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ for xspf_file, xspf_url, expected_entries in _TEST_CASES: with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f: entries = self.ie._parse_xspf( - compat_etree_fromstring(f.read().encode('utf-8')), + compat_etree_fromstring(f.read().encode()), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) expect_value(self, entries, expected_entries, None) for i in range(len(entries)): diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index 13a4569b2..6280e1f2c 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -17,7 +17,7 @@ class TestYoutubeDLCookieJar(unittest.TestCase): tf = tempfile.NamedTemporaryFile(delete=False) try: cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True) - temp = tf.read().decode('utf-8') + temp = tf.read().decode() self.assertTrue(re.search( r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp)) self.assertTrue(re.search( diff --git a/test/test_aes.py b/test/test_aes.py index c934104e3..2b7b7cf54 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -81,19 +81,19 @@ class TestAES(unittest.TestCase): self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_decrypt_text(self): - password = intlist_to_bytes(self.key).decode('utf-8') + password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' - ).decode('utf-8') + ).decode() decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) - password = intlist_to_bytes(self.key).decode('utf-8') + password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' - ).decode('utf-8') + ).decode() decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) diff --git a/test/test_compat.py b/test/test_compat.py index 9b185853d..224175c65 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -90,7 +90,7 @@ class TestCompat(unittest.TestCase): spam ''' - doc = compat_etree_fromstring(xml.encode('utf-8')) + doc = compat_etree_fromstring(xml.encode()) self.assertTrue(isinstance(doc.attrib['foo'], compat_str)) self.assertTrue(isinstance(doc.attrib['spam'], compat_str)) self.assertTrue(isinstance(doc.find('normal').text, compat_str)) diff --git a/test/test_http.py b/test/test_http.py index fb8c9f4e9..664e09ace 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -140,7 +140,7 @@ def _build_proxy_handler(name): self.send_response(200) self.send_header('Content-Type', 'text/plain; charset=utf-8') self.end_headers() - self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8')) + self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode()) return HTTPTestRequestHandler @@ -167,12 +167,12 @@ class TestProxy(unittest.TestCase): 'geo_verification_proxy': geo_proxy, }) url = 'http://foo.com/bar' - response = ydl.urlopen(url).read().decode('utf-8') + response = ydl.urlopen(url).read().decode() self.assertEqual(response, f'normal: {url}') req = compat_urllib_request.Request(url) req.add_header('Ytdl-request-proxy', geo_proxy) - response = ydl.urlopen(req).read().decode('utf-8') + response = ydl.urlopen(req).read().decode() self.assertEqual(response, f'geo: {url}') def test_proxy_with_idn(self): @@ -180,7 +180,7 @@ class TestProxy(unittest.TestCase): 'proxy': f'127.0.0.1:{self.port}', }) url = 'http://中文.tw/' - response = ydl.urlopen(url).read().decode('utf-8') + response = ydl.urlopen(url).read().decode() # b'xn--fiq228c' is '中文'.encode('idna') self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') diff --git a/test/test_socks.py b/test/test_socks.py index 546f0d73d..a8b068cdd 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -32,7 +32,7 @@ class TestMultipleSocks(unittest.TestCase): 'proxy': params['primary_proxy'] }) self.assertEqual( - ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'), + ydl.urlopen('http://yt-dl.org/ip').read().decode(), params['primary_server_ip']) def test_proxy_https(self): @@ -43,7 +43,7 @@ class TestMultipleSocks(unittest.TestCase): 'proxy': params['primary_proxy'] }) self.assertEqual( - ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'), + ydl.urlopen('https://yt-dl.org/ip').read().decode(), params['primary_server_ip']) def test_secondary_proxy_http(self): @@ -54,7 +54,7 @@ class TestMultipleSocks(unittest.TestCase): req = compat_urllib_request.Request('http://yt-dl.org/ip') req.add_header('Ytdl-request-proxy', params['secondary_proxy']) self.assertEqual( - ydl.urlopen(req).read().decode('utf-8'), + ydl.urlopen(req).read().decode(), params['secondary_server_ip']) def test_secondary_proxy_https(self): @@ -65,7 +65,7 @@ class TestMultipleSocks(unittest.TestCase): req = compat_urllib_request.Request('https://yt-dl.org/ip') req.add_header('Ytdl-request-proxy', params['secondary_proxy']) self.assertEqual( - ydl.urlopen(req).read().decode('utf-8'), + ydl.urlopen(req).read().decode(), params['secondary_server_ip']) @@ -96,7 +96,7 @@ class TestSocks(unittest.TestCase): ydl = FakeYDL({ 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), }) - return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8') + return ydl.urlopen('http://yt-dl.org/ip').read().decode() def test_socks4(self): self.assertTrue(isinstance(self._get_ip('socks4'), compat_str)) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 362b67cef..182bd7a4b 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -51,7 +51,7 @@ class BaseTestSubtitles(unittest.TestCase): for sub_info in subtitles.values(): if sub_info.get('data') is None: uf = self.DL.urlopen(sub_info['url']) - sub_info['data'] = uf.read().decode('utf-8') + sub_info['data'] = uf.read().decode() return {l: sub_info['data'] for l, sub_info in subtitles.items()} diff --git a/test/test_update.py.disabled b/test/test_update.py.disabled index 389b8ffe5..73b55cdac 100644 --- a/test/test_update.py.disabled +++ b/test/test_update.py.disabled @@ -21,7 +21,7 @@ class TestUpdate(unittest.TestCase): signature = versions_info['signature'] del versions_info['signature'] self.assertTrue(rsa_verify( - json.dumps(versions_info, sort_keys=True).encode('utf-8'), + json.dumps(versions_info, sort_keys=True).encode(), signature, UPDATES_RSA_KEY)) diff --git a/test/test_utils.py b/test/test_utils.py index 5e220087b..184c39cff 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1759,7 +1759,7 @@ Line 1 def test(ll, idx, val, cache): self.assertEqual(ll[idx], val) - self.assertEqual(getattr(ll, '_LazyList__cache'), list(cache)) + self.assertEqual(ll._cache, list(cache)) ll = LazyList(range(10)) test(ll, 0, 0, range(1)) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index de34b8bd7..f9670429a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -773,9 +773,9 @@ class YoutubeDL: assert hasattr(self, '_output_process') assert isinstance(message, compat_str) line_count = message.count('\n') + 1 - self._output_process.stdin.write((message + '\n').encode('utf-8')) + self._output_process.stdin.write((message + '\n').encode()) self._output_process.stdin.flush() - res = ''.join(self._output_channel.readline().decode('utf-8') + res = ''.join(self._output_channel.readline().decode() for _ in range(line_count)) return res[:-len('\n')] @@ -1181,7 +1181,7 @@ class YoutubeDL: value = map(str, variadic(value) if '#' in flags else [value]) value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt elif fmt[-1] == 'B': # bytes - value = f'%{str_fmt}'.encode() % str(value).encode('utf-8') + value = f'%{str_fmt}'.encode() % str(value).encode() value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( @@ -2243,7 +2243,7 @@ class YoutubeDL: return selector_function(ctx_copy) return final_selector - stream = io.BytesIO(format_spec.encode('utf-8')) + stream = io.BytesIO(format_spec.encode()) try: tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline))) except tokenize.TokenError: @@ -3194,8 +3194,8 @@ class YoutubeDL: downloader = downloader.__name__ if downloader else None if info_dict.get('requested_formats') is None: # Not necessary if doing merger - live_fixup = info_dict.get('is_live') and not self.params.get('hls_use_mpegts') - ffmpeg_fixup(downloader == 'HlsFD' or live_fixup, + fixup_live = info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None + ffmpeg_fixup(downloader == 'HlsFD' or fixup_live, 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', @@ -3700,10 +3700,10 @@ class YoutubeDL: # Not implemented if False and self.params.get('call_home'): - ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') + ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() write_debug('Public IP address: %s' % ipaddr) latest_version = self.urlopen( - 'https://yt-dl.org/latest/version').read().decode('utf-8') + 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( 'You are using an outdated version (newest version: %s)! ' diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index ba3baf3de..d0e6d7549 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -265,7 +265,7 @@ def aes_decrypt_text(data, password, key_size_bytes): NONCE_LENGTH_BYTES = 8 data = bytes_to_intlist(compat_b64decode(data)) - password = bytes_to_intlist(password.encode('utf-8')) + password = bytes_to_intlist(password.encode()) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 621c91e86..b06edfc5d 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -283,10 +283,10 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure): - host_key = host_key.decode('utf-8') - name = name.decode('utf-8') - value = value.decode('utf-8') - path = path.decode('utf-8') + host_key = host_key.decode() + name = name.decode() + value = value.decode() + path = path.decode() is_encrypted = not value and encrypted_value if is_encrypted: @@ -458,7 +458,7 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): self._cookie_counts['other'] += 1 # any other prefix means the data is DPAPI encrypted # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc - return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8') + return _decrypt_windows_dpapi(encrypted_value, self._logger).decode() def _extract_safari_cookies(profile, logger): @@ -521,7 +521,7 @@ class DataParser: while True: c = self.read_bytes(1) if c == b'\x00': - return b''.join(buffer).decode('utf-8') + return b''.join(buffer).decode() else: buffer.append(c) @@ -735,7 +735,7 @@ def _get_kwallet_network_wallet(logger): logger.warning('failed to read NetworkWallet') return default_wallet else: - network_wallet = stdout.decode('utf-8').strip() + network_wallet = stdout.decode().strip() logger.debug(f'NetworkWallet = "{network_wallet}"') return network_wallet except Exception as e: @@ -873,7 +873,7 @@ def pbkdf2_sha1(password, salt, iterations, key_length): def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) try: - return plaintext.decode('utf-8') + return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -887,7 +887,7 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): return None try: - return plaintext.decode('utf-8') + return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -939,7 +939,7 @@ def _open_database_copy(database_path, tmpdir): def _get_column_names(cursor, table_name): table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall() - return [row[1].decode('utf-8') for row in table_info] + return [row[1].decode() for row in table_info] def _find_most_recently_used_file(root, filename, logger): diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 4f9f8f6e5..85c6a6977 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -299,7 +299,7 @@ class Aria2cFD(ExternalFD): fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) stream, _ = self.sanitize_open(url_list_file, 'wb') - stream.write('\n'.join(url_list).encode('utf-8')) + stream.write('\n'.join(url_list).encode()) stream.close() cmd += ['-i', url_list_file] else: diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 12ecec008..7b6665167 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -412,7 +412,7 @@ class F4mFD(FragmentFD): if box_type == b'mdat': self._append_fragment(ctx, box_data) break - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError as err: if live and (err.code == 404 or err.code == 410): # We didn't keep up with the live window. Continue # with the next available fragment. diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index f65f91f4f..2e01c7bac 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -329,7 +329,7 @@ class HlsFD(FragmentFD): continue block.write_into(output) - return output.getvalue().encode('utf-8') + return output.getvalue().encode() def fin_fragments(): dedup_window = extra_state.get('webvtt_dedup_window') @@ -340,7 +340,7 @@ class HlsFD(FragmentFD): for cue in dedup_window: webvtt.CueBlock.from_json(cue).write_into(output) - return output.getvalue().encode('utf-8') + return output.getvalue().encode() self.download_and_append_fragments( ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index d590dbfbd..9b7598b1c 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -150,7 +150,7 @@ class HttpFD(FileDownloader): ctx.resume_len = 0 ctx.open_mode = 'wb' ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError as err: if err.code == 416: # Unable to resume (requested range not satisfiable) try: @@ -158,7 +158,7 @@ class HttpFD(FileDownloader): ctx.data = self.ydl.urlopen( sanitized_Request(url, request_data, headers)) content_length = ctx.data.info()['Content-Length'] - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError as err: if err.code < 500 or err.code >= 600: raise else: @@ -268,7 +268,7 @@ class HttpFD(FileDownloader): if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) + write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index 82ed51e88..0aaba8c15 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -151,7 +151,7 @@ def write_piff_header(stream, params): sample_entry_payload += u16.pack(0x18) # depth sample_entry_payload += s16.pack(-1) # pre defined - codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) + codec_private_data = binascii.unhexlify(params['codec_private_data'].encode()) if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index 8a6619960..f999fca78 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -54,7 +54,7 @@ body > figure > img { def _escape_mime(s): return '=?utf-8?Q?' + (b''.join( bytes((b,)) if b >= 0x20 else b'=%02X' % b - for b in quopri.encodestring(s.encode('utf-8'), header=True) + for b in quopri.encodestring(s.encode(), header=True) )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): @@ -151,7 +151,7 @@ body > figure > img { length=len(stub), title=self._escape_mime(title), stub=stub - ).encode('utf-8')) + ).encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 0e6c177b7..5947446b1 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -51,4 +51,4 @@ class NiconicoDmcFD(FileDownloader): with heartbeat_lock: timer[0].cancel() download_complete = True - return success + return success diff --git a/yt_dlp/downloader/websocket.py b/yt_dlp/downloader/websocket.py index eb1b99b45..727a15828 100644 --- a/yt_dlp/downloader/websocket.py +++ b/yt_dlp/downloader/websocket.py @@ -19,7 +19,7 @@ class FFmpegSinkFD(FileDownloader): async def call_conn(proc, stdin): try: await self.real_connection(stdin, info_dict) - except (BrokenPipeError, OSError): + except OSError: pass finally: with contextlib.suppress(OSError): diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index 7f06dfb48..448660725 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -47,7 +47,7 @@ class YoutubeLiveChatFD(FragmentFD): replay_chat_item_action = action['replayChatItemAction'] offset = int(replay_chat_item_action['videoOffsetTimeMsec']) processed_fragment.extend( - json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + json.dumps(action, ensure_ascii=False).encode() + b'\n') if offset is not None: continuation = try_get( live_chat_continuation, @@ -89,7 +89,7 @@ class YoutubeLiveChatFD(FragmentFD): 'isLive': True, } processed_fragment.extend( - json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') + json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n') continuation_data_getters = [ lambda x: x['continuations'][0]['invalidationContinuationData'], lambda x: x['continuations'][0]['timedContinuationData'], @@ -183,7 +183,7 @@ class YoutubeLiveChatFD(FragmentFD): request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) headers.update({'content-type': 'application/json'}) - fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n' success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( url, frag_index, fragment_request_data, headers) else: diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 54f95a44a..5c4f3c892 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -8,6 +8,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + remove_start, strip_or_none, try_get, unified_timestamp, @@ -311,7 +312,7 @@ class DPlayIE(DPlayBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = mobj.group('id') - domain = mobj.group('domain').lstrip('www.') + domain = remove_start(mobj.group('domain'), 'www.') country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com' return self._get_disco_api_info( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 340161a42..0d0e002e5 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1030,20 +1030,6 @@ class GenericIE(InfoExtractor): 'filesize': 24687186, }, }, - { - 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz', - 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4', - 'info_dict': { - 'id': 'uxjb0lwrcz', - 'ext': 'mp4', - 'title': 'Conversation about Hexagonal Rails Part 1', - 'description': 'a Martin Fowler video from ThoughtWorks', - 'duration': 1715.0, - 'uploader': 'thoughtworks.wistia.com', - 'timestamp': 1401832161, - 'upload_date': '20140603', - }, - }, # Wistia standard embed (async) { 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1c6e20510..907b079ec 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3173,7 +3173,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Eg: __2ABJjxzNo, ySuUZEjARPY is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500) if is_damaged: - self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) + self.report_warning( + f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), @@ -3222,6 +3223,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): skip_manifests = self._configuration_arg('skip') if not self.get_param('youtube_include_hls_manifest', True): skip_manifests.append('hls') + if not self.get_param('youtube_include_dash_manifest', True): + skip_manifests.append('dash') get_dash = 'dash' not in skip_manifests and ( not is_live or live_from_start or self._configuration_arg('include_live_dash')) get_hls = not live_from_start and 'hls' not in skip_manifests diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index 1d11e82a2..addc46e5b 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -93,10 +93,10 @@ class PostProcessor(metaclass=PostProcessorMetaClass): return self._downloader.write_debug(text, *args, **kwargs) def _delete_downloaded_files(self, *files_to_delete, **kwargs): - if not self._downloader: - for filename in set(filter(None, files_to_delete)): - os.remove(filename) - return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs) + if self._downloader: + return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs) + for filename in set(filter(None, files_to_delete)): + os.remove(filename) def get_param(self, name, default=None, *args, **kwargs): if self._downloader: diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index 065ddf963..f822eff41 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -43,7 +43,7 @@ class XAttrMetadataPP(PostProcessor): if value: if infoname == 'upload_date': value = hyphenate_date(value) - write_xattr(info['filepath'], xattrname, value.encode('utf-8')) + write_xattr(info['filepath'], xattrname, value.encode()) except XAttrUnavailableError as e: raise PostProcessingError(str(e)) diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py index 56fab08ab..34ba1394a 100644 --- a/yt_dlp/socks.py +++ b/yt_dlp/socks.py @@ -149,11 +149,11 @@ class sockssocket(socket.socket): packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr - username = (self._proxy.username or '').encode('utf-8') + username = (self._proxy.username or '').encode() packet += username + b'\x00' if is_4a and self._proxy.remote_dns: - packet += destaddr.encode('utf-8') + b'\x00' + packet += destaddr.encode() + b'\x00' self.sendall(packet) @@ -192,8 +192,8 @@ class sockssocket(socket.socket): raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE) if method == Socks5Auth.AUTH_USER_PASS: - username = self._proxy.username.encode('utf-8') - password = self._proxy.password.encode('utf-8') + username = self._proxy.username.encode() + password = self._proxy.password.encode() packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION) packet += self._len_and_data(username) + self._len_and_data(password) self.sendall(packet) @@ -216,7 +216,7 @@ class sockssocket(socket.socket): reserved = 0 packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) if ipaddr is None: - destaddr = destaddr.encode('utf-8') + destaddr = destaddr.encode() packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) packet += self._len_and_data(destaddr) else: diff --git a/yt_dlp/update.py b/yt_dlp/update.py index eea08ce43..8dcf260f5 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -74,7 +74,7 @@ def run_update(ydl): # Download and check versions info try: - version_info = ydl._opener.open(JSON_URL).read().decode('utf-8') + version_info = ydl._opener.open(JSON_URL).read().decode() version_info = json.loads(version_info) except Exception: return report_network_error('obtain version info', delim='; Please try again later or') @@ -118,7 +118,7 @@ def run_update(ydl): {}).get('browser_download_url') if not urlh: return None - hash_data = ydl._opener.open(urlh).read().decode('utf-8') + hash_data = ydl._opener.open(urlh).read().decode() return dict(ln.split()[::-1] for ln in hash_data.splitlines()).get(filename) if not os.access(filename, os.W_OK): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 8b2c1c75a..62dc412a8 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -737,8 +737,8 @@ def extract_basic_auth(url): parts.hostname if parts.port is None else '%s:%d' % (parts.hostname, parts.port)))) auth_payload = base64.b64encode( - ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8')) - return url, 'Basic ' + auth_payload.decode('utf-8') + ('%s:%s' % (parts.username, parts.password or '')).encode()) + return url, f'Basic {auth_payload.decode()}' def sanitized_Request(url, *args, **kwargs): @@ -1339,7 +1339,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): location = resp.headers.get('Location') if location: # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 - location = location.encode('iso-8859-1').decode('utf-8') + location = location.encode('iso-8859-1').decode() location_escaped = escape_url(location) if location != location_escaped: del resp.headers['Location'] @@ -2309,7 +2309,7 @@ def setproctitle(title): # a bytestring, but since unicode_literals turns # every string into a unicode string, it fails. return - title_bytes = title.encode('utf-8') + title_bytes = title.encode() buf = ctypes.create_string_buffer(len(title_bytes)) buf.value = title_bytes try: @@ -2351,13 +2351,13 @@ def base_url(url): def urljoin(base, path): if isinstance(path, bytes): - path = path.decode('utf-8') + path = path.decode() if not isinstance(path, compat_str) or not path: return None if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path): return path if isinstance(base, bytes): - base = base.decode('utf-8') + base = base.decode() if not isinstance(base, compat_str) or not re.match( r'^(?:https?:)?//', base): return None @@ -2557,49 +2557,48 @@ def get_exe_version(exe, args=['--version'], class LazyList(collections.abc.Sequence): - ''' Lazy immutable list from an iterable - Note that slices of a LazyList are lists and not LazyList''' + """Lazy immutable list from an iterable + Note that slices of a LazyList are lists and not LazyList""" class IndexError(IndexError): pass def __init__(self, iterable, *, reverse=False, _cache=None): - self.__iterable = iter(iterable) - self.__cache = [] if _cache is None else _cache - self.__reversed = reverse + self._iterable = iter(iterable) + self._cache = [] if _cache is None else _cache + self._reversed = reverse def __iter__(self): - if self.__reversed: + if self._reversed: # We need to consume the entire iterable to iterate in reverse yield from self.exhaust() return - yield from self.__cache - for item in self.__iterable: - self.__cache.append(item) + yield from self._cache + for item in self._iterable: + self._cache.append(item) yield item - def __exhaust(self): - self.__cache.extend(self.__iterable) - # Discard the emptied iterable to make it pickle-able - self.__iterable = [] - return self.__cache + def _exhaust(self): + self._cache.extend(self._iterable) + self._iterable = [] # Discard the emptied iterable to make it pickle-able + return self._cache def exhaust(self): - ''' Evaluate the entire iterable ''' - return self.__exhaust()[::-1 if self.__reversed else 1] + """Evaluate the entire iterable""" + return self._exhaust()[::-1 if self._reversed else 1] @staticmethod - def __reverse_index(x): + def _reverse_index(x): return None if x is None else -(x + 1) def __getitem__(self, idx): if isinstance(idx, slice): - if self.__reversed: - idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1)) + if self._reversed: + idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1)) start, stop, step = idx.start, idx.stop, idx.step or 1 elif isinstance(idx, int): - if self.__reversed: - idx = self.__reverse_index(idx) + if self._reversed: + idx = self._reverse_index(idx) start, stop, step = idx, idx, 0 else: raise TypeError('indices must be integers or slices') @@ -2608,35 +2607,35 @@ class LazyList(collections.abc.Sequence): or (stop is None and step > 0)): # We need to consume the entire iterable to be able to slice from the end # Obviously, never use this with infinite iterables - self.__exhaust() + self._exhaust() try: - return self.__cache[idx] + return self._cache[idx] except IndexError as e: raise self.IndexError(e) from e - n = max(start or 0, stop or 0) - len(self.__cache) + 1 + n = max(start or 0, stop or 0) - len(self._cache) + 1 if n > 0: - self.__cache.extend(itertools.islice(self.__iterable, n)) + self._cache.extend(itertools.islice(self._iterable, n)) try: - return self.__cache[idx] + return self._cache[idx] except IndexError as e: raise self.IndexError(e) from e def __bool__(self): try: - self[-1] if self.__reversed else self[0] + self[-1] if self._reversed else self[0] except self.IndexError: return False return True def __len__(self): - self.__exhaust() - return len(self.__cache) + self._exhaust() + return len(self._cache) def __reversed__(self): - return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache) + return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache) def __copy__(self): - return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache) + return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache) def __repr__(self): # repr and str should mimic a list. So we exhaust the iterable @@ -2850,9 +2849,9 @@ def _multipart_encode_impl(data, boundary): for k, v in data.items(): out += b'--' + boundary.encode('ascii') + b'\r\n' if isinstance(k, compat_str): - k = k.encode('utf-8') + k = k.encode() if isinstance(v, compat_str): - v = v.encode('utf-8') + v = v.encode() # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 # suggests sending UTF-8 directly. Firefox sends UTF-8, too content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n' @@ -4741,7 +4740,7 @@ def write_xattr(path, key, value): 'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the ' + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)')) - value = value.decode('utf-8') + value = value.decode() try: p = Popen( [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path], @@ -4820,7 +4819,7 @@ def iri_to_uri(iri): net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~") net_location += '@' - net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames. + net_location += iri_parts.hostname.encode('idna').decode() # Punycode for Unicode hostnames. # The 'idna' encoding produces ASCII text. if iri_parts.port is not None and iri_parts.port != 80: net_location += ':' + str(iri_parts.port) @@ -5063,9 +5062,9 @@ def jwt_encode_hs256(payload_data, key, headers={}): } if headers: header_data.update(headers) - header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8')) - payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8')) - h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256) + header_b64 = base64.b64encode(json.dumps(header_data).encode()) + payload_b64 = base64.b64encode(json.dumps(payload_data).encode()) + h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256) signature_b64 = base64.b64encode(h.digest()) token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64 return token diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 4c222ba8e..b8974f883 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -346,7 +346,7 @@ def parse_fragment(frag_content): a bytes object containing the raw contents of a WebVTT file. """ - parser = _MatchParser(frag_content.decode('utf-8')) + parser = _MatchParser(frag_content.decode()) yield Magic.parse(parser) From fe1daad3cb224904cc72462204da5f6427be6f44 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 10 May 2022 11:08:19 +0530 Subject: [PATCH 1038/2552] Bugfix for 59f943cd5097e9bdbc3cb3e6b5675e43d369341a Fixes: https://github.com/yt-dlp/yt-dlp/commit/59f943cd5097e9bdbc3cb3e6b5675e43d369341a#commitcomment-73251597 --- yt_dlp/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 62dc412a8..c9589537f 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1881,8 +1881,7 @@ def write_string(s, out=None, encoding=None): assert isinstance(s, str) out = out or sys.stderr - from .compat import WINDOWS_VT_MODE # Must be imported locally - if WINDOWS_VT_MODE: + if compat_os_name == 'nt' and supports_terminal_sequences(out): s = re.sub(r'([\r\n]+)', r' \1', s) if 'b' in getattr(out, 'mode', ''): From d76fa1f3d4f559e82a4c54e6f8feb0727ffc4b58 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 10 May 2022 11:44:45 +0530 Subject: [PATCH 1039/2552] [cookies] Allow `cookiefile` to be a text stream Closes #3674 --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/cookies.py | 7 +++++-- yt_dlp/utils.py | 24 ++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index f9670429a..38ecd276f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -312,7 +312,7 @@ class YoutubeDL: has been filtered out. break_per_url: Whether break_on_reject and break_on_existing should act on each input URL as opposed to for the entire queue - cookiefile: File name where cookies should be read from and dumped to + cookiefile: File name or text stream from where cookies should be read and dumped to cookiesfrombrowser: A tuple containing the name of the browser, the profile name/pathfrom where cookies are loaded, and the name of the keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT') diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index b06edfc5d..c6edaebe4 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -83,9 +83,12 @@ def load_cookies(cookie_file, browser_specification, ydl): cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring)) if cookie_file is not None: - cookie_file = expand_path(cookie_file) + is_filename = YoutubeDLCookieJar.is_path(cookie_file) + if is_filename: + cookie_file = expand_path(cookie_file) + jar = YoutubeDLCookieJar(cookie_file) - if os.access(cookie_file, os.R_OK): + if not is_filename or os.access(cookie_file, os.R_OK): jar.load(ignore_discard=True, ignore_expires=True) cookie_jars.append(jar) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index c9589537f..e683eaaf1 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1439,6 +1439,26 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): 'CookieFileEntry', ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) + def __init__(self, filename=None, *args, **kwargs): + super().__init__(None, *args, **kwargs) + if self.is_path(filename): + filename = os.fspath(filename) + self.filename = filename + + @staticmethod + def is_path(file): + return isinstance(file, (str, bytes, os.PathLike)) + + @contextlib.contextmanager + def open(self, file, *, write=False): + if self.is_path(file): + with open(file, 'w' if write else 'r', encoding='utf-8') as f: + yield f + else: + if write: + file.truncate(0) + yield file + def save(self, filename=None, ignore_discard=False, ignore_expires=False): """ Save cookies to a file. @@ -1458,7 +1478,7 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): if cookie.expires is None: cookie.expires = 0 - with open(filename, 'w', encoding='utf-8') as f: + with self.open(filename, write=True) as f: f.write(self._HEADER) now = time.time() for cookie in self: @@ -1514,7 +1534,7 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): return line cf = io.StringIO() - with open(filename, encoding='utf-8') as f: + with self.open(filename) as f: for line in f: try: cf.write(prepare_line(line)) From 3a408f9d199127ca2626359e21a866a09ab236b3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 11 May 2022 06:36:29 +0530 Subject: [PATCH 1040/2552] Show name of downloader in verbose log Closes #3703 --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/downloader/common.py | 4 ++++ yt_dlp/downloader/f4m.py | 2 -- yt_dlp/downloader/ism.py | 2 -- yt_dlp/downloader/mhtml.py | 2 -- yt_dlp/downloader/niconico.py | 2 -- yt_dlp/downloader/youtube_live_chat.py | 2 -- 7 files changed, 5 insertions(+), 11 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 38ecd276f..83210f6c8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2833,7 +2833,7 @@ class YoutubeDL: urls = '", "'.join( (f['url'].split(',')[0] + ',' if f['url'].startswith('data:') else f['url']) for f in info.get('requested_formats', []) or [info]) - self.write_debug('Invoking downloader on "%s"' % urls) + self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"') # Note: Ideally info should be a deep-copied so that hooks cannot modify it. # But it may contain objects that are not deep-copyable diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index d79863300..1f14ebb3a 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -98,6 +98,10 @@ class FileDownloader: def to_screen(self, *args, **kargs): self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) + @property + def FD_NAME(self): + return re.sub(r'(? Date: Wed, 11 May 2022 05:52:31 +0530 Subject: [PATCH 1041/2552] Fix `--date today` Closes #3704 --- README.md | 3 ++- yt_dlp/options.py | 5 ++--- yt_dlp/utils.py | 32 +++++++++++++++----------------- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index f8813cbb6..a9a9a4c63 100644 --- a/README.md +++ b/README.md @@ -427,7 +427,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi (e.g. 50k or 44.6m) --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format - "(now|today)[+-][0-9](day|week|month|year)(s)?" + [now|today|yesterday][-N[day|week|month|year]]. + Eg: --date today-2weeks --datebefore DATE Download only videos uploaded on or before this date. The date formats accepted is the same as --date diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 60f866570..8a9195217 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -435,9 +435,8 @@ def create_parser(): '--date', metavar='DATE', dest='date', default=None, help=( - 'Download only videos uploaded on this date. ' - 'The date can be "YYYYMMDD" or in the format ' - '"(now|today)[+-][0-9](day|week|month|year)(s)?"')) + 'Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format ' + '[now|today|yesterday][-N[day|week|month|year]]. Eg: --date today-2weeks')) selection.add_option( '--datebefore', metavar='DATE', dest='datebefore', default=None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index e683eaaf1..ba73c2191 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1756,14 +1756,14 @@ def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None): def datetime_from_str(date_str, precision='auto', format='%Y%m%d'): - """ - Return a datetime object from a string in the format YYYYMMDD or - (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)? - - format: string date format used to return datetime object from - precision: round the time portion of a datetime object. - auto|microsecond|second|minute|hour|day. - auto: round to the unit provided in date_str (if applicable). + R""" + Return a datetime object from a string. + Supported format: + (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)? + + @param format strftime format of DATE + @param precision Round the datetime object: auto|microsecond|second|minute|hour|day + auto: round to the unit provided in date_str (if applicable). """ auto_precision = False if precision == 'auto': @@ -1775,7 +1775,7 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'): if date_str == 'yesterday': return today - datetime.timedelta(days=1) match = re.match( - r'(?P.+)(?P[+-])(?P

    ', - self._meta_regex('title') - ), webpage, 'title', group='content', fatal=False) - - # Get part title for anthologies - if page_id is not None: - # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video. - part_info = traverse_obj(self._download_json( - f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', - video_id, note='Extracting videos in anthology'), 'data', expected_type=list) - title = title if len(part_info) == 1 else traverse_obj(part_info, (int(page_id) - 1, 'part')) or title - - description = self._html_search_meta('description', webpage) - timestamp = unified_timestamp(self._html_search_regex( - r']+datetime="([^"]+)"', webpage, 'upload time', - default=None) or self._html_search_meta( - 'uploadDate', webpage, 'timestamp', default=None)) - thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) - - # TODO 'view_count' requires deobfuscating Javascript - info.update({ - 'id': f'{video_id}_part{page_id or 1}', - 'cid': cid, + return { + 'id': f'{video_id}{format_field(part_id, None, "_p%d")}', + 'formats': self.extract_formats(play_info), + '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None, + 'http_headers': {'Referer': url}, + **self.extract_common_info(video_id, initial_state, play_info, aid, cid=( + traverse_obj(video_data, ('pages', part_id - 1, 'cid')) + if part_id else video_data.get('cid'))), 'title': title, - 'description': description, - 'timestamp': timestamp, - 'thumbnail': thumbnail, - 'duration': float_or_none(video_info.get('timelength'), scale=1000), - }) - - uploader_mobj = re.search( - r']+href="(?:https?:)?//space\.bilibili\.com/(?P\d+)"[^>]*>\s*(?P[^<]+?)\s*<', - webpage) - if uploader_mobj: - info.update({ - 'uploader': uploader_mobj.group('name').strip(), - 'uploader_id': uploader_mobj.group('id'), - }) - - if not info.get('uploader'): - info['uploader'] = self._html_search_meta( - 'author', webpage, 'uploader', default=None) - - top_level_info = { - 'tags': traverse_obj(self._download_json( - f'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}', - video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')), } - info['subtitles'] = { - 'danmaku': [{ - 'ext': 'xml', - 'url': f'https://comment.bilibili.com/{cid}.xml', - }] - } - r''' - # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3 - # See https://github.com/animelover1984/youtube-dl +class BiliBiliBangumiIE(BilibiliBaseIE): + _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P(?:ss|ep)\d+)' - raw_danmaku = self._download_webpage( - f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments') - danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576) - entries[0]['subtitles'] = { - 'danmaku': [{ - 'ext': 'ass', - 'data': danmaku - }] - } - ''' + _TESTS = [{ + 'url': 'https://www.bilibili.com/bangumi/play/ss897', + 'info_dict': { + 'id': 'ss897', + 'ext': 'mp4', + 'series': '神的记事本', + 'season': '神的记事本', + 'season_id': 897, + 'season_number': 1, + 'episode': '你与旅行包', + 'episode_number': 2, + 'title': '神的记事本:第2话 你与旅行包', + 'duration': 1428.487, + 'timestamp': 1310809380, + 'upload_date': '20110716', + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + }, + }, { + 'url': 'https://www.bilibili.com/bangumi/play/ep508406', + 'only_matching': True, + }] - top_level_info['__post_extractor'] = self.extract_comments(video_id) + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - for entry in entries: - entry.update(info) + if '您所在的地区无法观看本片' in webpage: + raise GeoRestrictedError('This video is restricted') + elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage + or '正在观看预览,大会员免费看全片' in webpage): + self.raise_login_required('This video is for premium members only') - if len(entries) == 1: - entries[0].update(top_level_info) - return entries[0] + play_info = self._search_json(r'window.__playinfo__\s*=\s*', webpage, 'play info', video_id)['data'] + formats = self.extract_formats(play_info) + if (not formats and '成为大会员抢先看' in webpage + and play_info.get('durl') and not play_info.get('dash')): + self.raise_login_required('This video is for premium members only') - for idx, entry in enumerate(entries): - entry['id'] = '%s_part%d' % (video_id, (idx + 1)) + initial_state = self._search_json(r'window.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) return { - 'id': str(video_id), - 'bv_id': bv_id, - 'title': title, - 'description': description, - **info, **top_level_info - } - - def _extract_anthology_entries(self, bv_id, video_id, webpage): - title = self._html_search_regex( - (r']+\btitle=(["\'])(?P(?:(?!\1).)+)\1', - r'(?s)<h1[^>]*>(?P<title>.+?)</h1>', - r'<title>(?P<title>.+?)'), webpage, 'title', - group='title') - json_data = self._download_json( - f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', - video_id, note='Extracting videos in anthology') - - if json_data['data']: - return self.playlist_from_matches( - json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(), - getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page'])) - - def _get_video_id_set(self, id, is_bv): - query = {'bvid': id} if is_bv else {'aid': id} - response = self._download_json( - "http://api.bilibili.cn/x/web-interface/view", - id, query=query, - note='Grabbing original ID via API') - - if response['code'] == -400: - raise ExtractorError('Video ID does not exist', expected=True, video_id=id) - elif response['code'] != 0: - raise ExtractorError(f'Unknown error occurred during API check (code {response["code"]})', - expected=True, video_id=id) - return response['data']['aid'], response['data']['bvid'] - - def _get_comments(self, video_id, commentPageNumber=0): - for idx in itertools.count(1): - replies = traverse_obj( - self._download_json( - f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685', - video_id, note=f'Extracting comments from page {idx}', fatal=False), - ('data', 'replies')) - if not replies: - return - for children in map(self._get_all_children, replies): - yield from children - - def _get_all_children(self, reply): - yield { - 'author': traverse_obj(reply, ('member', 'uname')), - 'author_id': traverse_obj(reply, ('member', 'mid')), - 'id': reply.get('rpid'), - 'text': traverse_obj(reply, ('content', 'message')), - 'timestamp': reply.get('ctime'), - 'parent': reply.get('parent') or 'root', + 'id': video_id, + 'formats': formats, + 'http_headers': {'Referer': url, **self.geo_verification_headers()}, + **self.extract_common_info( + video_id, initial_state, play_info, + aid=traverse_obj(initial_state, ('epInfo', 'aid')), + cid=traverse_obj(initial_state, ('epInfo', 'cid'))) } - for children in map(self._get_all_children, reply.get('replies') or []): - yield from children - -class BiliBiliBangumiIE(InfoExtractor): - _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P\d+)' - - IE_NAME = 'bangumi.bilibili.com' - IE_DESC = 'BiliBili番剧' +class BiliBiliBangumiMediaIE(InfoExtractor): + _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P\d+)' _TESTS = [{ - 'url': 'http://bangumi.bilibili.com/anime/1869', + 'url': 'https://www.bilibili.com/bangumi/media/md24097891', 'info_dict': { - 'id': '1869', - 'title': '混沌武士', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - }, - 'playlist_count': 26, - }, { - 'url': 'http://bangumi.bilibili.com/anime/1869', - 'info_dict': { - 'id': '1869', - 'title': '混沌武士', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - }, - 'playlist': [{ - 'md5': '91da8621454dd58316851c27c68b0c13', - 'info_dict': { - 'id': '40062', - 'ext': 'mp4', - 'title': '混沌武士', - 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', - 'timestamp': 1414538739, - 'upload_date': '20141028', - 'episode': '疾风怒涛 Tempestuous Temperaments', - 'episode_number': 1, - }, - }], - 'params': { - 'playlist_items': '1', + 'id': '24097891', }, + 'playlist_mincount': 25, }] - @classmethod - def suitable(cls, url): - return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) - def _real_extract(self, url): - bangumi_id = self._match_id(url) - - # Sometimes this API returns a JSONP response - season_info = self._download_json( - 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, - bangumi_id, transform_source=strip_jsonp)['result'] + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) - entries = [{ - '_type': 'url_transparent', - 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), - 'ie_key': BiliBiliIE.ie_key(), - 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), - 'episode': episode.get('index_title'), - 'episode_number': int_or_none(episode.get('index')), - } for episode in season_info['episodes']] + initial_state = self._search_json(r'window.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) + episode_list = self._download_json( + 'https://api.bilibili.com/pgc/web/season/section', media_id, + query={'season_id': initial_state['mediaInfo']['season_id']}, + note='Downloading season info')['result']['main_section']['episodes'] - entries = sorted(entries, key=lambda entry: entry.get('episode_number')) - - return self.playlist_result( - entries, bangumi_id, - season_info.get('bangumi_title'), season_info.get('evaluate')) + return self.playlist_result(( + self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid']) + for entry in episode_list), media_id) class BilibiliSpaceBaseIE(InfoExtractor): @@ -700,8 +596,7 @@ class BilibiliCategoryIE(InfoExtractor): self._fetch_page, api_url, num_pages, query), size) def _real_extract(self, url): - u = compat_urllib_parse_urlparse(url) - category, subcategory = u.path.split('/')[2:4] + category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4] query = '%s: %s' % (category, subcategory) return self.playlist_result(self._entries(category, subcategory, query), query, query) From c90c5b9bddfaa36afd07db676e351571fce102e8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 25 Oct 2022 20:09:27 +0530 Subject: [PATCH 1692/2552] [extractor/bilibili] Add chapters and misc cleanup (#4221) Authored by: lockmatrix, pukkandan --- yt_dlp/extractor/bilibili.py | 125 +++++++++++++++++++++-------------- 1 file changed, 75 insertions(+), 50 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 5aa4e4b58..a237343c6 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -93,6 +93,16 @@ class BilibiliBaseIE(InfoExtractor): }) return subtitles + def _get_chapters(self, aid, cid): + chapters = aid and cid and self._download_json( + 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid}, + note='Extracting chapters', fatal=False) + return traverse_obj(chapters, ('data', 'view_points', ..., { + 'title': 'content', + 'start_time': 'from', + 'end_time': 'to', + })) or None + def _get_comments(self, aid): for idx in itertools.count(1): replies = traverse_obj( @@ -117,38 +127,6 @@ class BilibiliBaseIE(InfoExtractor): for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))): yield from children - def extract_common_info(self, video_id, initial_state, play_info, aid, cid): - season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id')) - season_number = season_id and next(( - idx + 1 for idx, e in enumerate( - traverse_obj(initial_state, ('mediaInfo', 'seasons', ...))) - if e.get('season_id') == season_id - ), None) - - return { - 'title': traverse_obj(initial_state, 'h1Title'), - 'description': traverse_obj(initial_state, ('videoData', 'desc')), - 'duration': float_or_none(play_info.get('timelength'), scale=1000), - 'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')), - 'uploader': traverse_obj(initial_state, ('upData', 'name')), - 'uploader_id': traverse_obj(initial_state, ('upData', 'mid')), - 'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')), - 'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')), - 'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')) or None, - 'thumbnail': traverse_obj( - initial_state, ('videoData', 'pic'), ('epInfo', 'cover')), - 'timestamp': traverse_obj( - initial_state, ('videoData', 'pubdate'), ('epInfo', 'pub_time')), - 'episode': traverse_obj(initial_state, ('epInfo', 'long_title')), - 'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))), - 'series': traverse_obj(initial_state, ('mediaInfo', 'series')), - 'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')), - 'season_id': season_id, - 'season_number': season_number, - 'subtitles': self.extract_subtitles(video_id, initial_state, cid), - '__post_extractor': self.extract_comments(aid), - } - class BiliBiliIE(BilibiliBaseIE): _VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P[^/?#&]+)' @@ -190,9 +168,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'tags': list, }, - 'params': { - 'skip_download': True, - }, + 'params': {'skip_download': True}, }, { 'note': 'Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797', @@ -244,7 +220,7 @@ class BiliBiliIE(BilibiliBaseIE): 'info_dict': { 'id': 'BV12N4y1M7rh', 'ext': 'mp4', - 'title': '游戏帧数增加40%?下代联发科天玑芯片或将支持光线追踪!从Immortalis-G715看下代联发科SoC的GPU表现 | Arm: 可以不用咬打火机了!', + 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1', 'tags': list, 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4', 'duration': 313.557, @@ -266,7 +242,7 @@ class BiliBiliIE(BilibiliBaseIE): 'ext': 'mp4', 'title': '阿滴英文|英文歌分享#6 "Closer', 'upload_date': '20170301', - 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', + 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', 'timestamp': 1488353834, 'uploader_id': '65880958', 'uploader': '阿滴英文', @@ -280,13 +256,34 @@ class BiliBiliIE(BilibiliBaseIE): 'params': { 'skip_download': True, }, + }, { + 'note': 'video has chapter', + 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/', + 'info_dict': { + 'id': 'BV1vL411G7N7', + 'ext': 'mp4', + 'title': '如何为你的B站视频添加进度条分段', + 'timestamp': 1634554558, + 'upload_date': '20211018', + 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d', + 'tags': list, + 'uploader': '爱喝咖啡的当麻', + 'duration': 669.482, + 'uploader_id': '1680903', + 'chapters': 'count:6', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + }, + 'params': {'skip_download': True}, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - initial_state = self._search_json(r'window.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) - play_info = self._search_json(r'window.__playinfo__\s*=', webpage, 'play info', video_id)['data'] + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] video_data = initial_state['videoData'] video_id, title = video_data['bvid'], video_data.get('title') @@ -312,15 +309,27 @@ class BiliBiliIE(BilibiliBaseIE): aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') + cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') + return { 'id': f'{video_id}{format_field(part_id, None, "_p%d")}', 'formats': self.extract_formats(play_info), '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None, - 'http_headers': {'Referer': url}, - **self.extract_common_info(video_id, initial_state, play_info, aid, cid=( - traverse_obj(video_data, ('pages', part_id - 1, 'cid')) - if part_id else video_data.get('cid'))), 'title': title, + 'description': traverse_obj(initial_state, ('videoData', 'desc')), + 'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')), + 'uploader': traverse_obj(initial_state, ('upData', 'name')), + 'uploader_id': traverse_obj(initial_state, ('upData', 'mid')), + 'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')), + 'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')), + 'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')), + 'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')), + 'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')), + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + 'chapters': self._get_chapters(aid, cid), + 'subtitles': self.extract_subtitles(video_id, initial_state, cid), + '__post_extractor': self.extract_comments(aid), + 'http_headers': {'Referer': url}, } @@ -359,22 +368,38 @@ class BiliBiliBangumiIE(BilibiliBaseIE): or '正在观看预览,大会员免费看全片' in webpage): self.raise_login_required('This video is for premium members only') - play_info = self._search_json(r'window.__playinfo__\s*=\s*', webpage, 'play info', video_id)['data'] + play_info = self._search_json(r'window\.__playinfo__\s*=\s*', webpage, 'play info', video_id)['data'] formats = self.extract_formats(play_info) if (not formats and '成为大会员抢先看' in webpage and play_info.get('durl') and not play_info.get('dash')): self.raise_login_required('This video is for premium members only') - initial_state = self._search_json(r'window.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + + season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id')) + season_number = season_id and next(( + idx + 1 for idx, e in enumerate( + traverse_obj(initial_state, ('mediaInfo', 'seasons', ...))) + if e.get('season_id') == season_id + ), None) return { 'id': video_id, 'formats': formats, + 'title': traverse_obj(initial_state, 'h1Title'), + 'episode': traverse_obj(initial_state, ('epInfo', 'long_title')), + 'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))), + 'series': traverse_obj(initial_state, ('mediaInfo', 'series')), + 'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')), + 'season_id': season_id, + 'season_number': season_number, + 'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')), + 'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')), + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + 'subtitles': self.extract_subtitles( + video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))), + '__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))), 'http_headers': {'Referer': url, **self.geo_verification_headers()}, - **self.extract_common_info( - video_id, initial_state, play_info, - aid=traverse_obj(initial_state, ('epInfo', 'aid')), - cid=traverse_obj(initial_state, ('epInfo', 'cid'))) } @@ -392,7 +417,7 @@ class BiliBiliBangumiMediaIE(InfoExtractor): media_id = self._match_id(url) webpage = self._download_webpage(url, media_id) - initial_state = self._search_json(r'window.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) episode_list = self._download_json( 'https://api.bilibili.com/pgc/web/season/section', media_id, query={'season_id': initial_state['mediaInfo']['season_id']}, From 497074f044b4641289527f6c960b88705d256568 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 25 Oct 2022 15:55:42 +0530 Subject: [PATCH 1693/2552] Write API params in debug head --- yt_dlp/YoutubeDL.py | 51 +++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 42780e794..92b802da6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -616,6 +616,30 @@ class YoutubeDL: ' If you experience any issues while using this option, ' f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') + if self.params.get('bidi_workaround', False): + try: + import pty + master, slave = pty.openpty() + width = shutil.get_terminal_size().columns + width_args = [] if width is None else ['-w', str(width)] + sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} + try: + self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) + except OSError: + self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_channel = os.fdopen(master, 'rb') + except OSError as ose: + if ose.errno == errno.ENOENT: + self.report_warning( + 'Could not find fribidi executable, ignoring --bidi-workaround. ' + 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') + else: + raise + + self.params['compat_opts'] = set(self.params.get('compat_opts', ())) + if auto_init and auto_init != 'no_verbose_header': + self.print_debug_header() + def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: self.report_warning(f'{option} is deprecated. Use {suggestion} instead') @@ -635,7 +659,6 @@ class YoutubeDL: for msg in self.params.get('_deprecation_warnings', []): self.deprecated_feature(msg) - self.params['compat_opts'] = set(self.params.get('compat_opts', ())) if 'list-formats' in self.params['compat_opts']: self.params['listformats_table'] = False @@ -656,29 +679,7 @@ class YoutubeDL: if not isinstance(params['forceprint'], dict): self.params['forceprint'] = {'video': params['forceprint']} - if self.params.get('bidi_workaround', False): - try: - import pty - master, slave = pty.openpty() - width = shutil.get_terminal_size().columns - width_args = [] if width is None else ['-w', str(width)] - sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} - try: - self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) - except OSError: - self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) - self._output_channel = os.fdopen(master, 'rb') - except OSError as ose: - if ose.errno == errno.ENOENT: - self.report_warning( - 'Could not find fribidi executable, ignoring --bidi-workaround. ' - 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') - else: - raise - if auto_init: - if auto_init != 'no_verbose_header': - self.print_debug_header() self.add_default_info_extractors() if (sys.platform != 'win32' @@ -3728,6 +3729,10 @@ class YoutubeDL: '' if source == 'unknown' else f'({source})', '' if _IN_CLI else 'API', delim=' ')) + + if not _IN_CLI: + write_debug(f'params: {self.params}') + if not _LAZY_LOADER: if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): write_debug('Lazy loading extractors is forcibly disabled') From e63faa101cf7b9bf9f899cabb74ce03c7f893572 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Thu, 27 Oct 2022 17:33:35 +1300 Subject: [PATCH 1694/2552] [extractor/youtube] Fix `live_status` extraction for playlist videos Regression in https://github.com/yt-dlp/yt-dlp/commit/867c66ff97b0639485a2b6ebc28f2e0df0bf8187 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e894f74cd..719a151c4 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -960,6 +960,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None, is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None), 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count, + 'live_status': live_status } From 9da6612b0fc3a86b3aa207dd9f9d9379c6a62b92 Mon Sep 17 00:00:00 2001 From: nosoop Date: Fri, 28 Oct 2022 11:30:33 -0700 Subject: [PATCH 1695/2552] [extractor/youtube] Fix `duration` for premieres (#5382) Closes #5378 Authored by: nosoop --- yt_dlp/extractor/youtube.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 719a151c4..77a8b93f3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3787,10 +3787,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self.playlist_result( entries, video_id, video_title, video_description) - duration = int_or_none( - get_first(video_details, 'lengthSeconds') - or get_first(microformats, 'lengthSeconds') - or parse_duration(search_meta('duration'))) or None + duration = (int_or_none(get_first(video_details, 'lengthSeconds')) + or int_or_none(get_first(microformats, 'lengthSeconds')) + or parse_duration(search_meta('duration')) or None) live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \ self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration) From 682b4524bfb2ce18eada6fbddd2d5541d3cb5e88 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Mon, 31 Oct 2022 15:51:53 +0900 Subject: [PATCH 1696/2552] [extractor/japandiet] Add extractors (#5368) Authored by: Lesmiscore --- yt_dlp/extractor/_extractors.py | 7 + yt_dlp/extractor/japandiet.py | 277 ++++++++++++++++++++++++++++++++ 2 files changed, 284 insertions(+) create mode 100644 yt_dlp/extractor/japandiet.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1776029d0..d7362df3a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -792,6 +792,13 @@ from .jamendo import ( JamendoIE, JamendoAlbumIE, ) +from .japandiet import ( + ShugiinItvLiveIE, + ShugiinItvLiveRoomIE, + ShugiinItvVodIE, + SangiinInstructionIE, + SangiinIE, +) from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .joj import JojIE diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py new file mode 100644 index 000000000..f2f50db7a --- /dev/null +++ b/yt_dlp/extractor/japandiet.py @@ -0,0 +1,277 @@ +import re + +from ..utils import ( + ExtractorError, + clean_html, + int_or_none, + join_nonempty, + parse_qs, + smuggle_url, + traverse_obj, + try_call, + unsmuggle_url +) +from .common import InfoExtractor + + +def _parse_japanese_date(text): + if not text: + return None + ERA_TABLE = { + '明治': 1868, + '大正': 1912, + '昭和': 1926, + '平成': 1989, + '令和': 2019, + } + ERA_RE = '|'.join(map(re.escape, ERA_TABLE.keys())) + mobj = re.search(rf'({ERA_RE})?(\d+)年(\d+)月(\d+)日', re.sub(r'[\s\u3000]+', '', text)) + if not mobj: + return None + era, year, month, day = mobj.groups() + year, month, day = map(int, (year, month, day)) + if era: + # example input: 令和5年3月34日 + # even though each era have their end, don't check here + year += ERA_TABLE[era] + return '%04d%02d%02d' % (year, month, day) + + +def _parse_japanese_duration(text): + mobj = re.search(r'(?:(\d+)日間?)?(?:(\d+)時間?)?(?:(\d+)分)?(?:(\d+)秒)?', re.sub(r'[\s\u3000]+', '', text or '')) + if not mobj: + return + days, hours, mins, secs = [int_or_none(x, default=0) for x in mobj.groups()] + return secs + mins * 60 + hours * 60 * 60 + days * 24 * 60 * 60 + + +class ShugiinItvBaseIE(InfoExtractor): + _INDEX_ROOMS = None + + @classmethod + def _find_rooms(cls, webpage): + return [{ + '_type': 'url', + 'id': x.group(1), + 'title': clean_html(x.group(2)).strip(), + 'url': smuggle_url(f'https://www.shugiintv.go.jp/jp/index.php?room_id={x.group(1)}', {'g': x.groups()}), + 'ie_key': ShugiinItvLiveIE.ie_key(), + } for x in re.finditer(r'(?s)(.+?)', webpage)] + + def _fetch_rooms(self): + if not self._INDEX_ROOMS: + webpage = self._download_webpage( + 'https://www.shugiintv.go.jp/jp/index.php', None, + encoding='euc-jp', note='Downloading proceedings info') + ShugiinItvBaseIE._INDEX_ROOMS = self._find_rooms(webpage) + return self._INDEX_ROOMS + + +class ShugiinItvLiveIE(ShugiinItvBaseIE): + _VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)(?:/index\.php)?$' + IE_DESC = '衆議院インターネット審議中継' + + _TESTS = [{ + 'url': 'https://www.shugiintv.go.jp/jp/index.php', + 'info_dict': { + '_type': 'playlist', + 'title': 'All proceedings for today', + }, + # expect at least one proceedings is running + 'playlist_mincount': 1, + }] + + @classmethod + def suitable(cls, url): + return super().suitable(url) and not any(x.suitable(url) for x in (ShugiinItvLiveRoomIE, ShugiinItvVodIE)) + + def _real_extract(self, url): + self.to_screen( + 'Downloading all running proceedings. To specify one proceeding, use direct link from the website') + return self.playlist_result(self._fetch_rooms(), playlist_title='All proceedings for today') + + +class ShugiinItvLiveRoomIE(ShugiinItvBaseIE): + _VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?room_id=(?Proom\d+)' + IE_DESC = '衆議院インターネット審議中継 (中継)' + + _TESTS = [{ + 'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room01', + 'info_dict': { + 'id': 'room01', + 'title': '内閣委員会', + }, + 'skip': 'this runs for a time and not every day', + }, { + 'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room11', + 'info_dict': { + 'id': 'room11', + 'title': '外務委員会', + }, + 'skip': 'this runs for a time and not every day', + }] + + def _real_extract(self, url): + url, smug = unsmuggle_url(url, default={}) + if smug.get('g'): + room_id, title = smug['g'] + else: + room_id = self._match_id(url) + title = traverse_obj(self._fetch_rooms(), (lambda k, v: v['id'] == room_id, 'title'), get_all=False) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://hlslive.shugiintv.go.jp/{room_id}/amlst:{room_id}/playlist.m3u8', + room_id, ext='mp4') + self._sort_formats(formats) + + return { + 'id': room_id, + 'title': title, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + } + + +class ShugiinItvVodIE(ShugiinItvBaseIE): + _VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?ex=VL(?:\&[^=]+=[^&]*)*\&deli_id=(?P\d+)' + IE_DESC = '衆議院インターネット審議中継 (ビデオライブラリ)' + _TESTS = [{ + 'url': 'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id=53846', + 'info_dict': { + 'id': '53846', + 'title': 'ウクライナ大統領国会演説(オンライン)', + 'release_date': '20220323', + 'chapters': 'count:4', + } + }, { + 'url': 'https://www.shugiintv.go.jp/en/index.php?ex=VL&media_type=&deli_id=53846', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + f'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id={video_id}', video_id, + encoding='euc-jp') + + m3u8_url = self._search_regex( + r'id="vtag_src_base_vod"\s*value="(http.+?\.m3u8)"', webpage, 'm3u8 url') + m3u8_url = re.sub(r'^http://', 'https://', m3u8_url) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) + + title = self._html_search_regex( + (r'(.+)\s*\(\d+分\)', + r'(.+?)\s*\s*(.+?)', + webpage, 'title', fatal=False)) + + chapters = [] + for chp in re.finditer(r'(?i)(?!', webpage): + chapters.append({ + 'title': clean_html(chp.group(2)).strip(), + 'start_time': try_call(lambda: float(parse_qs(chp.group(1))['time'][0].strip())), + }) + # NOTE: there are blanks at the first and the end of the videos, + # so getting/providing the video duration is not possible + # also, the exact end_time for the last chapter is unknown (we can get at most minutes of granularity) + last_tr = re.findall(r'(?s)(.+?)', webpage)[-1] + if last_tr and chapters: + last_td = re.findall(r'', last_tr)[-1] + if last_td: + chapters[-1]['end_time'] = chapters[-1]['start_time'] + _parse_japanese_duration(clean_html(last_td)) + + return { + 'id': video_id, + 'title': title, + 'release_date': release_date, + 'chapters': chapters, + 'formats': formats, + 'subtitles': subtitles, + } + + +class SangiinInstructionIE(InfoExtractor): + _VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php' + IE_DESC = False # this shouldn't be listed as a supported site + + def _real_extract(self, url): + raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True) + + +class SangiinIE(InfoExtractor): + _VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/detail\.php\?sid=(?P\d+)' + IE_DESC = '参議院インターネット審議中継 (archive)' + + _TESTS = [{ + 'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7052', + 'info_dict': { + 'id': '7052', + 'title': '2022年10月7日 本会議', + 'description': 'md5:0a5fed523f95c88105a0b0bf1dd71489', + 'upload_date': '20221007', + 'ext': 'mp4', + }, + }, { + 'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7037', + 'info_dict': { + 'id': '7037', + 'title': '2022年10月3日 開会式', + 'upload_date': '20221003', + 'ext': 'mp4', + }, + }, { + 'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7076', + 'info_dict': { + 'id': '7076', + 'title': '2022年10月27日 法務委員会', + 'upload_date': '20221027', + 'ext': 'mp4', + 'is_live': True, + }, + 'skip': 'this live is turned into archive after it ends', + }, ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + date = self._html_search_regex( + r']*>\s*開会日\s*\s*]*>\s*(.+?)\s*
  • ', webpage, + 'date', fatal=False) + upload_date = _parse_japanese_date(date) + + title = self._html_search_regex( + r']*>\s*会議名\s*\s*]*>\s*(.+?)\s*', webpage, + 'date', fatal=False) + + # some videos don't have the elements, so assume it's missing + description = self._html_search_regex( + r'会議の経過\s*\s*]*>(.+?)', webpage, + 'description', default=None) + + # this row appears only when it's livestream + is_live = bool(self._html_search_regex( + r']*>\s*公報掲載時刻\s*\s*]*>\s*(.+?)\s*', webpage, + 'is_live', default=None)) + + m3u8_url = self._search_regex( + r'var\s+videopath\s*=\s*(["\'])([^"\']+)\1', webpage, + 'm3u8 url', group=2) + + formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': join_nonempty(date, title, delim=' '), + 'description': description, + 'upload_date': upload_date, + 'formats': formats, + 'subtitles': subs, + 'is_live': is_live, + } From 62b8dac4908bdb340e173bb70048f0f22e825007 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 31 Oct 2022 17:35:20 +0530 Subject: [PATCH 1697/2552] [extractor] Improve `_generic_title` --- yt_dlp/extractor/arte.py | 4 +--- yt_dlp/extractor/bbc.py | 8 ++------ yt_dlp/extractor/breitbart.py | 3 +-- yt_dlp/extractor/callin.py | 4 +--- yt_dlp/extractor/common.py | 8 +++++--- yt_dlp/extractor/cspan.py | 3 +-- yt_dlp/extractor/fivetv.py | 2 +- yt_dlp/extractor/generic.py | 3 +-- yt_dlp/extractor/genericembeds.py | 2 +- yt_dlp/extractor/glide.py | 2 +- yt_dlp/extractor/meipai.py | 4 +--- yt_dlp/extractor/nhk.py | 3 +-- yt_dlp/extractor/onenewsnz.py | 3 +-- yt_dlp/extractor/steam.py | 2 +- yt_dlp/extractor/tennistv.py | 2 +- yt_dlp/extractor/tv24ua.py | 2 +- 16 files changed, 21 insertions(+), 34 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index d3ec4a66c..b60fa0233 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -303,9 +303,7 @@ class ArteTVCategoryIE(ArteTVBaseIE): if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): items.append(video) - title = (self._og_search_title(webpage, default=None) - or self._html_search_regex(r']*>([^<]+)', default=None)) - title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url) + title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title, description=self._og_search_description(webpage, default=None)) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 9a0a4414e..89fce8d5a 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -898,12 +898,8 @@ class BBCIE(BBCCoUkIE): json_ld_info = self._search_json_ld(webpage, playlist_id, default={}) timestamp = json_ld_info.get('timestamp') - playlist_title = json_ld_info.get('title') - if not playlist_title: - playlist_title = (self._og_search_title(webpage, default=None) - or self._html_extract_title(webpage, 'playlist title', default=None)) - if playlist_title: - playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip() + playlist_title = json_ld_info.get('title') or re.sub( + r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None playlist_description = json_ld_info.get( 'description') or self._og_search_description(webpage, default=None) diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index a2b04fcce..ca5757374 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -27,8 +27,7 @@ class BreitBartIE(InfoExtractor): self._sort_formats(formats) return { 'id': video_id, - 'title': (self._og_search_title(webpage, default=None) - or self._html_extract_title(webpage, 'video title')), + 'title': self._generic_title('', webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'age_limit': self._rta_search(webpage), diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index fc5da7028..6c8129f06 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -51,9 +51,7 @@ class CallinIE(InfoExtractor): episode = next_data['props']['pageProps']['episode'] id = episode['id'] - title = (episode.get('title') - or self._og_search_title(webpage, fatal=False) - or self._html_extract_title(webpage)) + title = episode.get('title') or self._generic_title('', webpage) url = episode['m3u8'] formats = self._extract_m3u8_formats(url, display_id, ext='ts') self._sort_formats(formats) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index fb787a722..84a2b95af 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3820,9 +3820,11 @@ class InfoExtractor: def _generic_id(url): return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) - @staticmethod - def _generic_title(url): - return urllib.parse.unquote(os.path.splitext(url_basename(url))[0]) + def _generic_title(self, url='', webpage='', *, default=None): + return (self._og_search_title(webpage, default=None) + or self._html_extract_title(webpage, default=None) + or urllib.parse.unquote(os.path.splitext(url_basename(url))[0]) + or default) @staticmethod def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None): diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index 84393627a..1184633f5 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -275,8 +275,7 @@ class CSpanCongressIE(InfoExtractor): self._search_regex(r'jwsetup\s*=\s*({(?:.|\n)[^;]+});', webpage, 'player config'), video_id, transform_source=js_to_json) - title = (self._og_search_title(webpage, default=None) - or self._html_extract_title(webpage, 'video title')) + title = self._generic_title('', webpage) description = (self._og_search_description(webpage, default=None) or self._html_search_meta('description', webpage, 'description', default=None)) diff --git a/yt_dlp/extractor/fivetv.py b/yt_dlp/extractor/fivetv.py index 448c332b3..1f48cfd36 100644 --- a/yt_dlp/extractor/fivetv.py +++ b/yt_dlp/extractor/fivetv.py @@ -71,7 +71,7 @@ class FiveTVIE(InfoExtractor): r']+?href="([^"]+)"[^>]+?class="videoplayer"'], webpage, 'video url') - title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage) + title = self._generic_title('', webpage) duration = int_or_none(self._og_search_property( 'video:duration', webpage, 'duration', default=None)) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 5abde33a9..b0b26b61a 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2740,8 +2740,7 @@ class GenericIE(InfoExtractor): # Site Name | Video Title # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical - 'title': (self._og_search_title(webpage, default=None) - or self._html_extract_title(webpage, 'video title', default='video')), + 'title': self._generic_title('', webpage, default='video'), 'description': self._og_search_description(webpage, default=None), 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'age_limit': self._rta_search(webpage), diff --git a/yt_dlp/extractor/genericembeds.py b/yt_dlp/extractor/genericembeds.py index 1bffe275a..45e1618ba 100644 --- a/yt_dlp/extractor/genericembeds.py +++ b/yt_dlp/extractor/genericembeds.py @@ -20,7 +20,7 @@ class HTML5MediaEmbedIE(InfoExtractor): ] def _extract_from_webpage(self, url, webpage): - video_id, title = self._generic_id(url), self._generic_title(url) + video_id, title = self._generic_id(url), self._generic_title(url, webpage) entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') or [] for num, entry in enumerate(entries, start=1): entry.update({ diff --git a/yt_dlp/extractor/glide.py b/yt_dlp/extractor/glide.py index 2bffb26dc..d114f3494 100644 --- a/yt_dlp/extractor/glide.py +++ b/yt_dlp/extractor/glide.py @@ -20,7 +20,7 @@ class GlideIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._html_extract_title(webpage, default=None) or self._og_search_title(webpage) + title = self._generic_title('', webpage) video_url = self._proto_relative_url(self._search_regex( r']+src=(["\'])(?P.+?)\1', webpage, 'video URL', default=None, diff --git a/yt_dlp/extractor/meipai.py b/yt_dlp/extractor/meipai.py index 95b6dfe52..1a6f3cd74 100644 --- a/yt_dlp/extractor/meipai.py +++ b/yt_dlp/extractor/meipai.py @@ -48,9 +48,7 @@ class MeipaiIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title( - webpage, default=None) or self._html_search_regex( - r']*>([^<]+)', webpage, 'title') + title = self._generic_title('', webpage) formats = [] diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 60d76d1b1..517660ef1 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -321,8 +321,7 @@ class NhkForSchoolProgramListIE(InfoExtractor): webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id) - title = (self._og_search_title(webpage) - or self._html_extract_title(webpage) + title = (self._generic_title('', webpage) or self._html_search_regex(r'

    ([^<]+?)とは?\s*

    ', webpage, 'title', fatal=False)) title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None description = self._html_search_regex( diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py index 59d4490d0..a46211e77 100644 --- a/yt_dlp/extractor/onenewsnz.py +++ b/yt_dlp/extractor/onenewsnz.py @@ -106,7 +106,6 @@ class OneNewsNZIE(InfoExtractor): playlist_title = ( traverse_obj(fusion_metadata, ('headlines', 'basic')) - or self._og_search_title(webpage) - or self._html_extract_title(webpage) + or self._generic_title('', webpage) ) return self.playlist_result(entries, display_id, playlist_title) diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py index e15c22f2a..eea20ff85 100644 --- a/yt_dlp/extractor/steam.py +++ b/yt_dlp/extractor/steam.py @@ -166,7 +166,7 @@ class SteamCommunityBroadcastIE(InfoExtractor): self._sort_formats(formats) return { 'id': video_id, - 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), + 'title': self._generic_title('', webpage), 'formats': formats, 'live_status': 'is_live', 'view_count': json_data.get('num_view'), diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py index 5baa21d52..47cb0965e 100644 --- a/yt_dlp/extractor/tennistv.py +++ b/yt_dlp/extractor/tennistv.py @@ -142,7 +142,7 @@ class TennisTVIE(InfoExtractor): return { 'id': video_id, - 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), + 'title': self._generic_title('', webpage), 'description': self._html_search_regex( (r'', *self._og_regexes('description')), webpage, 'description', fatal=False), diff --git a/yt_dlp/extractor/tv24ua.py b/yt_dlp/extractor/tv24ua.py index 2f2571df7..8d2475296 100644 --- a/yt_dlp/extractor/tv24ua.py +++ b/yt_dlp/extractor/tv24ua.py @@ -74,6 +74,6 @@ class TV24UAVideoIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, 'thumbnail': thumbnail or self._og_search_thumbnail(webpage), - 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), + 'title': self._generic_title('', webpage), 'description': self._og_search_description(webpage, default=None), } From 58fb927ebd162daae2787ab8664a0991a70b0e85 Mon Sep 17 00:00:00 2001 From: James Woglom Date: Fri, 4 Nov 2022 07:45:47 -0400 Subject: [PATCH 1698/2552] [kaltura] Support playlists (#4986) Authored by: jwoglom, pukkandan --- yt_dlp/extractor/kaltura.py | 196 +++++++++++++++++++++++++++++++++--- 1 file changed, 181 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index f62c9791c..677f989a7 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -15,13 +15,14 @@ from ..utils import ( unsmuggle_url, smuggle_url, traverse_obj, + remove_start ) class KalturaIE(InfoExtractor): _VALID_URL = r'''(?x) (?: - kaltura:(?P\d+):(?P[0-9a-z_]+)| + kaltura:(?P\w+):(?P\w+)(?::(?P\w+))?| https?:// (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ (?: @@ -56,6 +57,7 @@ class KalturaIE(InfoExtractor): 'thumbnail': 're:^https?://.*/thumbnail/.*', 'timestamp': int, }, + 'skip': 'The access to this service is forbidden since the specified partner is blocked' }, { 'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4', @@ -108,6 +110,80 @@ class KalturaIE(InfoExtractor): # unavailable source format 'url': 'kaltura:513551:1_66x4rg7o', 'only_matching': True, + }, + { + # html5lib URL using kwidget player + 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.46/mwEmbedFrame.php/p/691292/uiconf_id/20499062/entry_id/0_c076mna6?wid=_691292&iframeembed=true&playerId=kaltura_player_1420508608&entry_id=0_c076mna6&flashvars%5BakamaiHD.loadingPolicy%5D=preInitialize&flashvars%5BakamaiHD.asyncInit%5D=true&flashvars%5BstreamerType%5D=hdnetwork', + 'info_dict': { + 'id': '0_c076mna6', + 'ext': 'mp4', + 'title': 'md5:4883e7acbcbf42583a2dddc97dee4855', + 'duration': 3608, + 'uploader_id': 'commons@swinburne.edu.au', + 'timestamp': 1408086874, + 'view_count': int, + 'upload_date': '20140815', + 'thumbnail': 'http://cfvod.kaltura.com/p/691292/sp/69129200/thumbnail/entry_id/0_c076mna6/version/100022', + } + }, + { + # html5lib playlist URL using kwidget player + 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.89/mwEmbedFrame.php/p/2019031/uiconf_id/40436601?wid=1_4j3m32cv&iframeembed=true&playerId=kaltura_player_&flashvars[playlistAPI.kpl0Id]=1_jovey5nu&flashvars[ks]=&&flashvars[imageDefaultDuration]=30&flashvars[localizationCode]=en&flashvars[leadWithHTML5]=true&flashvars[forceMobileHTML5]=true&flashvars[nextPrevBtn.plugin]=true&flashvars[hotspots.plugin]=true&flashvars[sideBarContainer.plugin]=true&flashvars[sideBarContainer.position]=left&flashvars[sideBarContainer.clickToClose]=true&flashvars[chapters.plugin]=true&flashvars[chapters.layout]=vertical&flashvars[chapters.thumbnailRotator]=false&flashvars[streamSelector.plugin]=true&flashvars[EmbedPlayer.SpinnerTarget]=videoHolder&flashvars[dualScreen.plugin]=true&flashvars[playlistAPI.playlistUrl]=https://canvasgatechtest.kaf.kaltura.com/playlist/details/{playlistAPI.kpl0Id}/categoryid/126428551', + 'info_dict': { + 'id': '1_jovey5nu', + 'title': '00-00 Introduction' + }, + 'playlist': [ + { + 'info_dict': { + 'id': '1_b1y5hlvx', + 'ext': 'mp4', + 'title': 'CS7646_00-00 Introductio_Introduction', + 'duration': 91, + 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_b1y5hlvx/version/100001', + 'view_count': int, + 'timestamp': 1533154447, + 'upload_date': '20180801', + 'uploader_id': 'djoyner3', + } + }, { + 'info_dict': { + 'id': '1_jfb7mdpn', + 'ext': 'mp4', + 'title': 'CS7646_00-00 Introductio_Three parts to the course', + 'duration': 63, + 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_jfb7mdpn/version/100001', + 'view_count': int, + 'timestamp': 1533154489, + 'upload_date': '20180801', + 'uploader_id': 'djoyner3', + } + }, { + 'info_dict': { + 'id': '1_8xflxdp7', + 'ext': 'mp4', + 'title': 'CS7646_00-00 Introductio_Textbooks', + 'duration': 37, + 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_8xflxdp7/version/100001', + 'view_count': int, + 'timestamp': 1533154512, + 'upload_date': '20180801', + 'uploader_id': 'djoyner3', + } + }, { + 'info_dict': { + 'id': '1_3hqew8kn', + 'ext': 'mp4', + 'title': 'CS7646_00-00 Introductio_Prerequisites', + 'duration': 49, + 'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_3hqew8kn/version/100001', + 'view_count': int, + 'timestamp': 1533154536, + 'upload_date': '20180801', + 'uploader_id': 'djoyner3', + } + } + ] } ] @@ -187,7 +263,14 @@ class KalturaIE(InfoExtractor): return data - def _get_video_info(self, video_id, partner_id, service_url=None): + def _get_video_info(self, video_id, partner_id, service_url=None, player_type='html5'): + assert player_type in ('html5', 'kwidget') + if player_type == 'kwidget': + return self._get_video_info_kwidget(video_id, partner_id, service_url) + + return self._get_video_info_html5(video_id, partner_id, service_url) + + def _get_video_info_html5(self, video_id, partner_id, service_url=None): actions = [ { 'apiVersion': '3.3.0', @@ -200,8 +283,9 @@ class KalturaIE(InfoExtractor): 'expiry': 86400, 'service': 'session', 'action': 'startWidgetSession', - 'widgetId': '_%s' % partner_id, + 'widgetId': self._build_widget_id(partner_id), }, + # info { 'action': 'list', 'filter': {'redirectFromEntryId': video_id}, @@ -212,12 +296,14 @@ class KalturaIE(InfoExtractor): 'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', }, }, + # flavor_assets { 'action': 'getbyentryid', 'entryId': video_id, 'service': 'flavorAsset', 'ks': '{1:result:ks}', }, + # captions { 'action': 'list', 'filter:entryIdEqual': video_id, @@ -226,17 +312,85 @@ class KalturaIE(InfoExtractor): }, ] return self._kaltura_api_call( - video_id, actions, service_url, note='Downloading video info JSON') + video_id, actions, service_url, note='Downloading video info JSON (Kaltura html5 player)') + + def _get_video_info_kwidget(self, video_id, partner_id, service_url=None): + actions = [ + { + 'service': 'multirequest', + 'apiVersion': '3.1', + 'expiry': 86400, + 'clientTag': 'kwidget:v2.89', + 'format': 1, # JSON, 2 = XML, 3 = PHP + 'ignoreNull': 1, + 'action': 'null', + }, + # header + { + 'expiry': 86400, + 'service': 'session', + 'action': 'startWidgetSession', + 'widgetId': self._build_widget_id(partner_id), + }, + # (empty) + { + 'expiry': 86400, + 'service': 'session', + 'action': 'startwidgetsession', + 'widgetId': self._build_widget_id(partner_id), + 'format': 9, + 'apiVersion': '3.1', + 'clientTag': 'kwidget:v2.89', + 'ignoreNull': 1, + 'ks': '{1:result:ks}' + }, + # info + { + 'action': 'list', + 'filter': {'redirectFromEntryId': video_id}, + 'service': 'baseentry', + 'ks': '{1:result:ks}', + 'responseProfile': { + 'type': 1, + 'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', + }, + }, + # flavor_assets + { + 'action': 'getbyentryid', + 'entryId': video_id, + 'service': 'flavorAsset', + 'ks': '{1:result:ks}', + }, + # captions + { + 'action': 'list', + 'filter:entryIdEqual': video_id, + 'service': 'caption_captionasset', + 'ks': '{1:result:ks}', + }, + ] + # second object (representing the second start widget session) is None + header, _, _info, flavor_assets, captions = self._kaltura_api_call( + video_id, actions, service_url, note='Downloading video info JSON (Kaltura kwidget player)') + info = _info['objects'][0] + return header, info, flavor_assets, captions + + def _build_widget_id(self, partner_id): + return partner_id if '_' in partner_id else f'_{partner_id}' + + IFRAME_PACKAGE_DATA_REGEX = r'window\.kalturaIframePackageData\s*=' def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) mobj = self._match_valid_url(url) - partner_id, entry_id = mobj.group('partner_id', 'id') - ks = None - captions = None + partner_id, entry_id, player_type = mobj.group('partner_id', 'id', 'player_type') + ks, captions = None, None + if not player_type: + player_type = 'kwidget' if 'html5lib/v2' in url else 'html5' if partner_id and entry_id: - _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url')) + _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'), player_type=player_type) else: path, query = mobj.group('path', 'query') if not path and not query: @@ -248,7 +402,7 @@ class KalturaIE(InfoExtractor): splitted_path = path.split('/') params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]])))) if 'wid' in params: - partner_id = params['wid'][0][1:] + partner_id = remove_start(params['wid'][0], '_') elif 'p' in params: partner_id = params['p'][0] elif 'partner_id' in params: @@ -257,14 +411,13 @@ class KalturaIE(InfoExtractor): raise ExtractorError('Invalid URL', expected=True) if 'entry_id' in params: entry_id = params['entry_id'][0] - _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id) + _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, player_type=player_type) elif 'uiconf_id' in params and 'flashvars[referenceId]' in params: reference_id = params['flashvars[referenceId]'][0] webpage = self._download_webpage(url, reference_id) - entry_data = self._parse_json(self._search_regex( - r'window\.kalturaIframePackageData\s*=\s*({.*});', - webpage, 'kalturaIframePackageData'), - reference_id)['entryResult'] + entry_data = self._search_json( + self.IFRAME_PACKAGE_DATA_REGEX, webpage, + 'kalturaIframePackageData', reference_id)['entryResult'] info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets'] entry_id = info['id'] # Unfortunately, data returned in kalturaIframePackageData lacks @@ -272,16 +425,29 @@ class KalturaIE(InfoExtractor): # regular approach since we now know the entry_id try: _, info, flavor_assets, captions = self._get_video_info( - entry_id, partner_id) + entry_id, partner_id, player_type=player_type) except ExtractorError: # Regular scenario failed but we already have everything # extracted apart from captions and can process at least # with this pass + elif 'uiconf_id' in params and 'flashvars[playlistAPI.kpl0Id]' in params: + playlist_id = params['flashvars[playlistAPI.kpl0Id]'][0] + webpage = self._download_webpage(url, playlist_id) + playlist_data = self._search_json( + self.IFRAME_PACKAGE_DATA_REGEX, webpage, + 'kalturaIframePackageData', playlist_id)['playlistResult'] + return self.playlist_from_matches( + traverse_obj(playlist_data, (playlist_id, 'items', ..., 'id')), + playlist_id, traverse_obj(playlist_data, (playlist_id, 'name')), + ie=KalturaIE, getter=lambda x: f'kaltura:{partner_id}:{x}:{player_type}') else: raise ExtractorError('Invalid URL', expected=True) ks = params.get('flashvars[ks]', [None])[0] + return self._per_video_extract(smuggled_data, entry_id, info, ks, flavor_assets, captions) + + def _per_video_extract(self, smuggled_data, entry_id, info, ks, flavor_assets, captions): source_url = smuggled_data.get('source_url') if source_url: referrer = base64.b64encode( From f72218c1992d1eed446b3236a91e7613cec6039a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 4 Nov 2022 19:38:38 +0530 Subject: [PATCH 1699/2552] [extractor/bitchute] Simplify extractor (#5066) * Check alternate domains when a URL does not work * Obey `--no-check-formats` * Remove webseeds (doesnt seem to exist anymore) Authored by: flashdagger, pukkandan Co-authored-by: Marcel --- yt_dlp/extractor/bitchute.py | 113 +++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 52 deletions(-) diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index c9cbb6d1d..87d04468a 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -4,8 +4,12 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, - GeoRestrictedError, + HEADRequest, + clean_html, + get_element_by_class, + int_or_none, orderedSet, + traverse_obj, unified_strdate, urlencode_postdata, ) @@ -18,7 +22,7 @@ class BitChuteIE(InfoExtractor): 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', 'md5': '7e427d7ed7af5a75b5855705ec750e2b', 'info_dict': { - 'id': 'szoMrox2JEI', + 'id': 'UGlrF9o9b-Q', 'ext': 'mp4', 'title': 'This is the first video on #BitChute !', 'description': 'md5:a0337e7b1fe39e32336974af8173a034', @@ -26,6 +30,21 @@ class BitChuteIE(InfoExtractor): 'uploader': 'BitChute', 'upload_date': '20170103', }, + }, { + # video not downloadable in browser, but we can recover it + 'url': 'https://www.bitchute.com/video/2s6B3nZjAk7R/', + 'md5': '05c12397d5354bf24494885b08d24ed1', + 'info_dict': { + 'id': '2s6B3nZjAk7R', + 'ext': 'mp4', + 'filesize': 71537926, + 'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control', + 'description': 'md5:228ee93bd840a24938f536aeac9cf749', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'BitChute', + 'upload_date': '20181113', + }, + 'params': {'check_formats': None}, }, { 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', 'only_matching': True, @@ -34,67 +53,57 @@ class BitChuteIE(InfoExtractor): 'only_matching': True, }] + _HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', + 'Referer': 'https://www.bitchute.com/', + } + + def _check_format(self, video_url, video_id): + urls = orderedSet( + re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url) + for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153')) + for url in urls: + try: + response = self._request_webpage( + HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS) + except ExtractorError as e: + self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}') + continue + return { + 'url': url, + 'filesize': int_or_none(response.headers.get('Content-Length')) + } + def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', - }) + f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) - title = self._html_search_regex( - (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'([^<]+)'), - webpage, 'title', default=None) or self._html_search_meta( - 'description', webpage, 'title', - default=None) or self._og_search_description(webpage) + publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) + entries = self._parse_html5_media_entries(url, webpage, video_id) - format_urls = [] - for mobj in re.finditer( - r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): - format_urls.append(mobj.group('url')) - format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) - - formats = [ - {'url': format_url} - for format_url in orderedSet(format_urls)] + formats = [] + for format_ in traverse_obj(entries, (0, 'formats', ...)): + if self.get_param('check_formats') is not False: + format_.update(self._check_format(format_.pop('url'), video_id) or {}) + if 'url' not in format_: + continue + formats.append(format_) if not formats: - entries = self._parse_html5_media_entries( - url, webpage, video_id) - if not entries: - error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video') - if error == 'Video Unavailable': - raise GeoRestrictedError(error) - raise ExtractorError(error, expected=True) - formats = entries[0]['formats'] - - self._check_formats(formats, video_id) - if not formats: - raise self.raise_no_formats('Video is unavailable', expected=True, video_id=video_id) + self.raise_no_formats( + 'Video is unavailable. Please make sure this video is playable in the browser ' + 'before reporting this issue.', expected=True, video_id=video_id) self._sort_formats(formats) - description = self._html_search_regex( - r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail( - webpage, default=None) or self._html_search_meta( - 'twitter:image:src', webpage, 'thumbnail') - uploader = self._html_search_regex( - (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', - r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), - webpage, 'uploader', fatal=False) - - upload_date = unified_strdate(self._search_regex( - r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', - webpage, 'upload date', fatal=False)) - return { 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, + 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage), + 'uploader': clean_html(get_element_by_class('owner', webpage)), + 'upload_date': unified_strdate(self._search_regex( + r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)), 'formats': formats, } From 78545664bf80086a011494b2010f949b2f182b04 Mon Sep 17 00:00:00 2001 From: lauren <lauren@selfisekai.rocks> Date: Fri, 4 Nov 2022 15:54:05 +0100 Subject: [PATCH 1700/2552] [extractor/agora] Add extractors (#5101) Authored by: selfisekai --- yt_dlp/extractor/_extractors.py | 6 + yt_dlp/extractor/agora.py | 253 ++++++++++++++++++++++++++++++++ yt_dlp/utils.py | 5 + 3 files changed, 264 insertions(+) create mode 100644 yt_dlp/extractor/agora.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index d7362df3a..0bcb6e185 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -71,6 +71,12 @@ from .afreecatv import ( AfreecaTVLiveIE, AfreecaTVUserIE, ) +from .agora import ( + TokFMAuditionIE, + TokFMPodcastIE, + WyborczaPodcastIE, + WyborczaVideoIE, +) from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE diff --git a/yt_dlp/extractor/agora.py b/yt_dlp/extractor/agora.py new file mode 100644 index 000000000..714414bd4 --- /dev/null +++ b/yt_dlp/extractor/agora.py @@ -0,0 +1,253 @@ +import functools +import uuid + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + OnDemandPagedList, + int_or_none, + month_by_name, + parse_duration, + try_call, +) + + +class WyborczaVideoIE(InfoExtractor): + # this id is not an article id, it has to be extracted from the article + _VALID_URL = r'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)' + IE_NAME = 'wyborcza:video' + _TESTS = [{ + 'url': 'wyborcza:video:26207634', + 'info_dict': { + 'id': '26207634', + 'ext': 'mp4', + 'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar', + 'description': ' ', + 'uploader': 'Dorota Roman', + 'duration': 2474, + 'thumbnail': r're:https://.+\.jpg', + }, + }, { + 'url': 'https://wyborcza.pl/video/26207634', + 'only_matching': True, + }, { + 'url': 'https://wyborcza.pl/api-video/26207634', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + meta = self._download_json(f'https://wyborcza.pl/api-video/{video_id}', video_id) + + formats = [] + base_url = meta['redirector'].replace('http://', 'https://') + meta['basePath'] + for quality in ('standard', 'high'): + if not meta['files'].get(quality): + continue + formats.append({ + 'url': base_url + meta['files'][quality], + 'height': int_or_none( + self._search_regex( + r'p(\d+)[a-z]+\.mp4$', meta['files'][quality], + 'mp4 video height', default=None)), + 'format_id': quality, + }) + if meta['files'].get('dash'): + formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id)) + + self._sort_formats(formats) + return { + 'id': video_id, + 'formats': formats, + 'title': meta.get('title'), + 'description': meta.get('lead'), + 'uploader': meta.get('signature'), + 'thumbnail': meta.get('imageUrl'), + 'duration': meta.get('duration'), + } + + +class WyborczaPodcastIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?:www\.)?(?: + wyborcza\.pl/podcast(?:/0,172673\.html)?| + wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html + )(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))? + ''' + _TESTS = [{ + 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast', + 'info_dict': { + 'id': '100720', + 'ext': 'mp3', + 'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ', + 'uploader': 'Michał Nogaś ', + 'upload_date': '20210117', + 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d', + 'duration': 3684.0, + 'thumbnail': r're:https://.+\.jpg', + }, + }, { + 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673', + 'info_dict': { + 'id': '100673', + 'ext': 'mp3', + 'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?', + 'uploader': 'Agnieszka Urazińska ', + 'upload_date': '20210115', + 'description': 'md5:c161dc035f8dbb60077011fc41274899', + 'duration': 1803.0, + 'thumbnail': r're:https://.+\.jpg', + }, + }, { + 'url': 'https://wyborcza.pl/podcast', + 'info_dict': { + 'id': '334', + 'title': 'Gościnnie: Wyborcza, 8:10', + 'series': 'Gościnnie: Wyborcza, 8:10', + }, + 'playlist_mincount': 370, + }, { + 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html', + 'info_dict': { + 'id': '395', + 'title': 'Gościnnie: Wysokie Obcasy', + 'series': 'Gościnnie: Wysokie Obcasy', + }, + 'playlist_mincount': 12, + }] + + def _real_extract(self, url): + podcast_id = self._match_id(url) + + if not podcast_id: # playlist + podcast_id = '395' if 'wysokieobcasy.pl/' in url else '334' + return self.url_result(TokFMAuditionIE._create_url(podcast_id), TokFMAuditionIE, podcast_id) + + meta = self._download_json('https://wyborcza.pl/api/podcast', podcast_id, + query={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None}) + + day, month, year = self._search_regex(r'^(\d\d?) (\w+) (\d{4})$', meta.get('publishedDate'), + 'upload date', group=(1, 2, 3), default=(None, None, None)) + return { + 'id': podcast_id, + 'url': meta['url'], + 'title': meta.get('title'), + 'description': meta.get('description'), + 'thumbnail': meta.get('imageUrl'), + 'duration': parse_duration(meta.get('duration')), + 'uploader': meta.get('author'), + 'upload_date': try_call(lambda: f'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'), + } + + +class TokFMPodcastIE(InfoExtractor): + _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?' + IE_NAME = 'tokfm:podcast' + _TESTS = [{ + 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych', + 'info_dict': { + 'id': '91275', + 'ext': 'aac', + 'title': 'md5:a9b15488009065556900169fb8061cce', + 'episode': 'md5:a9b15488009065556900169fb8061cce', + 'series': 'Analizy', + }, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + + # in case it breaks see this but it returns a lot of useless data + # https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true + metadata = self._download_json( + f'https://audycje.tokfm.pl/getp/3{media_id}', media_id, 'Downloading podcast metadata') + if not metadata: + raise ExtractorError('No such podcast', expected=True) + metadata = metadata[0] + + formats = [] + for ext in ('aac', 'mp3'): + url_data = self._download_json( + f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', + media_id, 'Downloading podcast %s URL' % ext) + # prevents inserting the mp3 (default) multiple times + if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: + formats.append({ + 'url': url_data['link_ssl'], + 'ext': ext, + 'vcodec': 'none', + 'acodec': ext, + }) + + self._sort_formats(formats) + return { + 'id': media_id, + 'formats': formats, + 'title': metadata.get('podcast_name'), + 'series': metadata.get('series_name'), + 'episode': metadata.get('podcast_name'), + } + + +class TokFMAuditionIE(InfoExtractor): + _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?' + IE_NAME = 'tokfm:audition' + _TESTS = [{ + 'url': 'https://audycje.tokfm.pl/audycja/218,Analizy', + 'info_dict': { + 'id': '218', + 'title': 'Analizy', + 'series': 'Analizy', + }, + 'playlist_count': 1635, + }] + + _PAGE_SIZE = 30 + _HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36', + } + + @staticmethod + def _create_url(id): + return f'https://audycje.tokfm.pl/audycja/{id}' + + def _real_extract(self, url): + audition_id = self._match_id(url) + + data = self._download_json( + f'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}', + audition_id, 'Downloading audition metadata', headers=self._HEADERS) + if not data: + raise ExtractorError('No such audition', expected=True) + data = data[0] + + entries = OnDemandPagedList(functools.partial( + self._fetch_page, audition_id, data), self._PAGE_SIZE) + + return { + '_type': 'playlist', + 'id': audition_id, + 'title': data.get('series_name'), + 'series': data.get('series_name'), + 'entries': entries, + } + + def _fetch_page(self, audition_id, data, page): + for retry in self.RetryManager(): + podcast_page = self._download_json( + f'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true', + audition_id, f'Downloading podcast list page {page + 1}', headers=self._HEADERS) + if not podcast_page: + retry.error = ExtractorError('Agora returned empty page', expected=True) + + for podcast in podcast_page: + yield { + '_type': 'url_transparent', + 'url': podcast['podcast_sharing_url'], + 'ie_key': TokFMPodcastIE.ie_key(), + 'title': podcast.get('podcast_name'), + 'episode': podcast.get('podcast_name'), + 'description': podcast.get('podcast_description'), + 'timestamp': int_or_none(podcast.get('podcast_timestamp')), + 'series': data.get('series_name'), + } diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 1e2342f3e..7eef2c9cd 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -149,6 +149,11 @@ MONTH_NAMES = { 'fr': [ 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], + # these follow the genitive grammatical case (dopełniacz) + # some websites might be using nominative, which will require another month list + # https://en.wikibooks.org/wiki/Polish/Noun_cases + 'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca', + 'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'], } # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42 From ed13a772d717c0df4f41fad6010369ad5d545005 Mon Sep 17 00:00:00 2001 From: sam <mail@samueljenks.me> Date: Sat, 5 Nov 2022 04:25:17 +1300 Subject: [PATCH 1701/2552] [extractor/bbc] Support onion domains (#5211) Authored by: DoubleCouponDay --- yt_dlp/extractor/bbc.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 89fce8d5a..fe122af85 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -591,7 +591,12 @@ class BBCCoUkIE(InfoExtractor): class BBCIE(BBCCoUkIE): IE_NAME = 'bbc' IE_DESC = 'BBC' - _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)' + _VALID_URL = r'''(?x) + https?://(?:www\.)?(?: + bbc\.(?:com|co\.uk)| + bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion| + bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion + )/(?:[^/]+/)+(?P<id>[^/#?]+)''' _MEDIA_SETS = [ 'pc', @@ -841,6 +846,12 @@ class BBCIE(BBCCoUkIE): 'upload_date': '20190604', 'categories': ['Psychology'], }, + }, { # onion routes + 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576', + 'only_matching': True, + }, { + 'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681', + 'only_matching': True, }] @classmethod From 68a9a450d432f67dc8c2531f053a5fd41b5f341a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 4 Nov 2022 15:37:45 +0000 Subject: [PATCH 1702/2552] [extractor/genius] Add extractors (#5221) Closes #5209 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/genius.py | 127 ++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 yt_dlp/extractor/genius.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0bcb6e185..020f3b454 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -636,6 +636,10 @@ from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE +from .genius import ( + GeniusIE, + GeniusLyricsIE, +) from .gettr import ( GettrIE, GettrStreamingIE, diff --git a/yt_dlp/extractor/genius.py b/yt_dlp/extractor/genius.py new file mode 100644 index 000000000..62f5a28ff --- /dev/null +++ b/yt_dlp/extractor/genius.py @@ -0,0 +1,127 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + js_to_json, + smuggle_url, + str_or_none, + traverse_obj, + unescapeHTML, +) + + +class GeniusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?genius\.com/videos/(?P<id>[^?/#]+)' + _TESTS = [{ + 'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly', + 'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c', + 'info_dict': { + 'id': '6313303597112', + 'ext': 'mp4', + 'title': 'Vince Staples Breaks Down The Meaning Of “When Sparks Fly”', + 'description': 'md5:bc15e00342c537c0039d414423ae5752', + 'tags': 'count:1', + 'uploader_id': '4863540648001', + 'duration': 388.416, + 'upload_date': '20221005', + 'timestamp': 1664982341, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, { + 'url': 'https://genius.com/videos/Breaking-down-drakes-certified-lover-boy-kanye-beef-way-2-sexy-cudi', + 'md5': 'b8ed87a5efd1473bd027c20a969d4060', + 'info_dict': { + 'id': '6271792014001', + 'ext': 'mp4', + 'title': 'md5:c6355f7fa8a70bc86492a3963919fc15', + 'description': 'md5:1774638c31548b31b037c09e9b821393', + 'tags': 'count:3', + 'uploader_id': '4863540648001', + 'duration': 2685.099, + 'upload_date': '20210909', + 'timestamp': 1631209167, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + metadata = self._search_json( + r'<meta content="', webpage, 'metadata', display_id, transform_source=unescapeHTML) + video_id = traverse_obj( + metadata, ('video', 'provider_id'), + ('dfp_kv', lambda _, x: x['name'] == 'brightcove_video_id', 'values', 0), get_all=False) + if not video_id: + raise ExtractorError('Brightcove video id not found in webpage') + + config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={}) + account_id = config.get('brightcove_account_id', '4863540648001') + player_id = traverse_obj( + config, 'brightcove_standard_web_player_id', 'brightcove_standard_no_autoplay_web_player_id', + 'brightcove_modal_web_player_id', 'brightcove_song_story_web_player_id', default='S1ZcmcOC1x') + + return self.url_result( + smuggle_url( + f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}', + {'referrer': url}), 'BrightcoveNew', video_id) + + +class GeniusLyricsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics[?/#]?' + _TESTS = [{ + 'url': 'https://genius.com/Lil-baby-heyy-lyrics', + 'playlist_mincount': 2, + 'info_dict': { + 'id': '8454545', + 'title': 'Heyy', + 'description': 'Heyy by Lil Baby', + }, + }, { + 'url': 'https://genius.com/Outkast-two-dope-boyz-in-a-cadillac-lyrics', + 'playlist_mincount': 1, + 'info_dict': { + 'id': '36239', + 'title': 'Two Dope Boyz (In a Cadillac)', + 'description': 'Two Dope Boyz (In a Cadillac) by OutKast', + }, + }, { + 'url': 'https://genius.com/Playboi-carti-rip-lyrics', + 'playlist_mincount': 1, + 'info_dict': { + 'id': '3710582', + 'title': 'R.I.P.', + 'description': 'R.I.P. by Playboi Carti', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + json_string = self._search_json( + r'window\.__PRELOADED_STATE__\s*=\s*JSON\.parse\(', webpage, 'json string', + display_id, transform_source=js_to_json, contains_pattern=r'\'{(?s:.+)}\'') + song_info = self._parse_json(json_string, display_id) + song_id = str_or_none(traverse_obj(song_info, ('songPage', 'song'))) + if not song_id: + raise ExtractorError('Song id not found in webpage') + + title = traverse_obj( + song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Title', 'value'), + get_all=False, default='untitled') + artist = traverse_obj( + song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Primary Artist', 'value'), + get_all=False, default='unknown artist') + media = traverse_obj( + song_info, ('entities', 'songs', song_id, 'media'), expected_type=list, default=[]) + + entries = [] + for m in media: + if m.get('type') in ('video', 'audio') and m.get('url'): + if m.get('provider') == 'spotify': + self.to_screen(f'{song_id}: Skipping Spotify audio embed') + else: + entries.append(self.url_result(m['url'])) + + return self.playlist_result(entries, song_id, title, f'{title} by {artist}') From 2e30b46fe4a04e82d1ec1a21f8d387e5f96405be Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 5 Nov 2022 15:34:53 +0530 Subject: [PATCH 1703/2552] [extractor/youtube] Improve chapter parsing from description Closes #5448 --- yt_dlp/extractor/youtube.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 77a8b93f3..555c94f97 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3027,9 +3027,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for contents in content_list)), []) def _extract_chapters_from_description(self, description, duration): + duration_re = r'(?:\d+:)?\d{1,2}:\d{2}' + sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$' return self._extract_chapters( - re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''), + re.findall(sep_re % (duration_re, r'.+?'), description or ''), chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1], + duration=duration, strict=False) or self._extract_chapters( + re.findall(sep_re % (r'.+?', duration_re), description or ''), + chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0], duration=duration, strict=False) def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True): From 0d113603ac2ccc869eb1d1b7419caed77f5f5d8a Mon Sep 17 00:00:00 2001 From: sam <mail@samueljenks.me> Date: Sat, 5 Nov 2022 23:13:05 +1300 Subject: [PATCH 1704/2552] [extractor/oftv] Add extractors (#5134) Closes #5017 Authored by: DoubleCouponDay --- yt_dlp/extractor/_extractors.py | 4 +++ yt_dlp/extractor/oftv.py | 54 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 yt_dlp/extractor/oftv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 020f3b454..0a9b1bce9 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1254,6 +1254,10 @@ from .nzherald import NZHeraldIE from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE +from .oftv import ( + OfTVIE, + OfTVPlaylistIE +) from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE from .on24 import On24IE diff --git a/yt_dlp/extractor/oftv.py b/yt_dlp/extractor/oftv.py new file mode 100644 index 000000000..3ae7278fb --- /dev/null +++ b/yt_dlp/extractor/oftv.py @@ -0,0 +1,54 @@ +from .common import InfoExtractor +from .zype import ZypeIE +from ..utils import traverse_obj + + +class OfTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?of.tv/video/(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://of.tv/video/627d7d95b353db0001dadd1a', + 'md5': 'cb9cd5db3bb9ee0d32bfd7e373d6ef0a', + 'info_dict': { + 'id': '627d7d95b353db0001dadd1a', + 'ext': 'mp4', + 'title': 'E1: Jacky vs Eric', + 'thumbnail': r're:^https?://.*\.jpg', + 'average_rating': 0, + 'description': 'md5:dd16e3e2a8d27d922e7a989f85986853', + 'display_id': '', + 'duration': 1423, + 'timestamp': 1652391300, + 'upload_date': '20220512', + 'view_count': 0, + 'creator': 'This is Fire' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + info = next(ZypeIE.extract_from_webpage(self._downloader, url, webpage)) + info['_type'] = 'url_transparent' + info['creator'] = self._search_regex(r'<a[^>]+class=\"creator-name\"[^>]+>([^<]+)', webpage, 'creator') + return info + + +class OfTVPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?of.tv/creators/(?P<id>[a-zA-Z0-9-]+)/.?' + _TESTS = [{ + 'url': 'https://of.tv/creators/this-is-fire/', + 'playlist_count': 8, + 'info_dict': { + 'id': 'this-is-fire' + } + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + json_match = self._search_json( + r'var\s*remaining_videos\s*=', webpage, 'oftv playlists', playlist_id, contains_pattern=r'\[.+\]') + + return self.playlist_from_matches( + traverse_obj(json_match, (..., 'discovery_url')), playlist_id) From da9a60ca0d9ed085ba3d60bf46e48bd2b53f1ecb Mon Sep 17 00:00:00 2001 From: Lesmiscore <nao20010128@gmail.com> Date: Sat, 5 Nov 2022 19:18:15 +0900 Subject: [PATCH 1705/2552] [extractor/twitcasting] Fix `data-movie-playlist` extraction (#5453) Authored by: Lesmiscore --- yt_dlp/extractor/twitcasting.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 0dbb97a36..9046f994d 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -1,3 +1,4 @@ +import base64 import itertools import re @@ -74,6 +75,16 @@ class TwitCastingIE(InfoExtractor): 'playlist_mincount': 2, }] + def _parse_data_movie_playlist(self, dmp, video_id): + # attempt 1: parse as JSON directly + try: + return self._parse_json(dmp, video_id) + except ExtractorError: + pass + # attempt 2: decode reversed base64 + decoded = base64.b64decode(dmp[::-1]) + return self._parse_json(decoded, video_id) + def _real_extract(self, url): uploader_id, video_id = self._match_valid_url(url).groups() @@ -100,7 +111,7 @@ class TwitCastingIE(InfoExtractor): video_js_data = try_get( webpage, - lambda x: self._parse_json(self._search_regex( + lambda x: self._parse_data_movie_playlist(self._search_regex( r'data-movie-playlist=\'([^\']+?)\'', x, 'movie playlist', default=None), video_id)['2'], list) From 59a0c35865124fa2e85d6ed0e01b61a53a6b1446 Mon Sep 17 00:00:00 2001 From: MMM <flashdagger@googlemail.com> Date: Sat, 5 Nov 2022 11:39:58 +0100 Subject: [PATCH 1706/2552] [extractor/lbry] Authenticate with cookies (#5435) Closes #5431 Authored by: flashdagger --- yt_dlp/extractor/lbry.py | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index 0e0ddbed8..b2b61abac 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -24,10 +24,14 @@ class LBRYBaseIE(InfoExtractor): _SUPPORTED_STREAM_TYPES = ['video', 'audio'] def _call_api_proxy(self, method, display_id, params, resource): + headers = {'Content-Type': 'application/json-rpc'} + token = try_get(self._get_cookies('https://odysee.com'), lambda x: x['auth_token'].value) + if token: + headers['x-lbry-auth-token'] = token response = self._download_json( 'https://api.lbry.tv/api/v1/proxy', display_id, 'Downloading %s JSON metadata' % resource, - headers={'Content-Type': 'application/json-rpc'}, + headers=headers, data=json.dumps({ 'method': method, 'params': params, @@ -159,6 +163,29 @@ class LBRYIE(LBRYBaseIE): 'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE', 'license': 'Copyrighted (contact publisher)', } + }, { + # HLS live stream (might expire) + 'url': 'https://odysee.com/@RT:fd/livestream_RT:d', + 'info_dict': { + 'id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66', + 'ext': 'mp4', + 'live_status': 'is_live', + 'title': 'startswith:RT News | Livestream 24/7', + 'description': 'md5:fe68d0056dfe79c1a6b8ce8c34d5f6fa', + 'timestamp': int, + 'upload_date': str, + 'release_timestamp': int, + 'release_date': str, + 'tags': list, + 'duration': None, + 'channel': 'RT', + 'channel_id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66', + 'channel_url': 'https://odysee.com/@RT:fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66', + 'formats': 'mincount:1', + 'thumbnail': 'startswith:https://thumb', + 'license': 'None', + }, + 'params': {'skip_download': True} }, { 'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e', 'only_matching': True, @@ -197,22 +224,24 @@ class LBRYIE(LBRYBaseIE): display_id = compat_urllib_parse_unquote(display_id) uri = 'lbry://' + display_id result = self._resolve_url(uri, display_id, 'stream') + headers = {'Referer': 'https://odysee.com/'} if result['value'].get('stream_type') in self._SUPPORTED_STREAM_TYPES: - claim_id, is_live, headers = result['claim_id'], False, {} + claim_id, is_live = result['claim_id'], False streaming_url = self._call_api_proxy( 'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url'] final_url = self._request_webpage( - HEADRequest(streaming_url), display_id, + HEADRequest(streaming_url), display_id, headers=headers, note='Downloading streaming redirect url info').geturl() elif result.get('value_type') == 'stream': claim_id, is_live = result['signing_channel']['claim_id'], True - headers = {'referer': 'https://player.odysee.live/'} live_data = self._download_json( 'https://api.odysee.live/livestream/is_live', claim_id, query={'channel_claim_id': claim_id}, note='Downloading livestream JSON metadata')['data'] streaming_url = final_url = live_data.get('VideoURL') - if not final_url and not live_data.get('Live'): + # Upcoming videos may still give VideoURL + if not live_data.get('Live'): + streaming_url = final_url = None self.raise_no_formats('This stream is not live', True, claim_id) else: raise UnsupportedError(url) From 6141346d18f45412f751a7c8ae21836eb61b5eb2 Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Sun, 6 Nov 2022 18:25:31 +1300 Subject: [PATCH 1707/2552] [extractor/youtube] Update playlist metadata extraction for new layout (#5376) Fixes https://github.com/yt-dlp/yt-dlp/issues/5373 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 153 +++++++++++++++++++----------------- 1 file changed, 82 insertions(+), 71 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 555c94f97..c387481cd 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -904,20 +904,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor): video_id = renderer.get('videoId') title = self._get_text(renderer, 'title') description = self._get_text(renderer, 'descriptionSnippet') - duration = parse_duration(self._get_text( - renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text'))) + + duration = int_or_none(renderer.get('lengthSeconds')) + if duration is None: + duration = parse_duration(self._get_text( + renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text'))) if duration is None: duration = parse_duration(self._search_regex( r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$', traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str), video_id, default=None, group='duration')) - view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText') + # videoInfo is a string like '50K views • 10 years ago'. + view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') channel_id = traverse_obj( renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False) - time_text = self._get_text(renderer, 'publishedTimeText') or '' + time_text = self._get_text(renderer, 'publishedTimeText', 'videoInfo') or '' scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False)) overlay_style = traverse_obj( renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), @@ -4583,50 +4587,36 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if fatal: raise ExtractorError('Unable to find selected tab') - def _extract_uploader(self, data): - uploader = {} - renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} - owner = try_get( - renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict) - if owner: - owner_text = owner.get('text') - uploader['uploader'] = self._search_regex( - r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text) - uploader['uploader_id'] = try_get( - owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str) - uploader['uploader_url'] = urljoin( - 'https://www.youtube.com/', - try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str)) - return filter_dict(uploader) - def _extract_from_tabs(self, item_id, ytcfg, data, tabs): playlist_id = title = description = channel_url = channel_name = channel_id = None tags = [] selected_tab = self._extract_selected_tab(tabs) + # Deprecated - remove when layout discontinued primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') - renderer = try_get( + playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict) + metadata_renderer = try_get( data, lambda x: x['metadata']['channelMetadataRenderer'], dict) - if renderer: - channel_name = renderer.get('title') - channel_url = renderer.get('channelUrl') - channel_id = renderer.get('externalId') + if metadata_renderer: + channel_name = metadata_renderer.get('title') + channel_url = metadata_renderer.get('channelUrl') + channel_id = metadata_renderer.get('externalId') else: - renderer = try_get( + metadata_renderer = try_get( data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) - if renderer: - title = renderer.get('title') - description = renderer.get('description', '') + if metadata_renderer: + title = metadata_renderer.get('title') + description = metadata_renderer.get('description', '') playlist_id = channel_id - tags = renderer.get('keywords', '').split() + tags = metadata_renderer.get('keywords', '').split() # We can get the uncropped banner/avatar by replacing the crop params with '=s0' # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714 def _get_uncropped(url): return url_or_none((url or '').split('=')[0] + '=s0') - avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar') + avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar') if avatar_thumbnails: uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url']) if uncropped_avatar: @@ -4650,14 +4640,33 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'preference': -5 }) + # Deprecated - remove when old layout is discontinued primary_thumbnails = self._extract_thumbnails( primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail')) + playlist_thumbnails = self._extract_thumbnails( + playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail')) + if playlist_id is None: playlist_id = item_id - playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats') - last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2)) + # Deprecated - remove primary_sidebar_renderer when old layout discontinued + # Playlist stats is a text runs array containing [video count, view count, last updated]. + # last updated or (view count and last updated) may be missing. + playlist_stats = get_first( + (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'),)) + last_updated_unix = self._parse_time_text( + self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued + or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text'))) + + view_count = self._get_count(playlist_stats, 1) + if view_count is None: + view_count = self._get_count(playlist_header_renderer, 'viewCountText') + + playlist_count = self._get_count(playlist_stats, 0) + if playlist_count is None: + playlist_count = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text')) + if title is None: title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id title += format_field(selected_tab, 'title', ' - %s') @@ -4670,16 +4679,29 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'uploader': channel_name, 'uploader_id': channel_id, 'uploader_url': channel_url, - 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners, + 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners, 'tags': tags, - 'view_count': self._get_count(playlist_stats, 1), + 'view_count': view_count, 'availability': self._extract_availability(data), 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'), - 'playlist_count': self._get_count(playlist_stats, 0), + 'playlist_count': playlist_count, 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), } if not channel_id: - metadata.update(self._extract_uploader(data)) + owner = traverse_obj(playlist_header_renderer, 'ownerText') + if not owner: + # Deprecated + owner = traverse_obj( + self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'), + ('videoOwner', 'videoOwnerRenderer', 'title')) + owner_text = self._get_text(owner) + browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {} + metadata.update(filter_dict({ + 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text), + 'uploader_id': browse_ep.get('browseId'), + 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')) + })) + metadata.update({ 'channel': metadata['uploader'], 'channel_id': metadata['uploader_id'], @@ -4751,19 +4773,21 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): Note: Unless YouTube tells us explicitly, we do not assume it is public @param data: response """ - renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {} - - player_header_privacy = traverse_obj( - data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str) + sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {} + playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {} + player_header_privacy = playlist_header_renderer.get('privacy') - badges = self._extract_badges(renderer) + badges = self._extract_badges(sidebar_renderer) # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge - privacy_setting_icon = traverse_obj( - renderer, ( - 'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries', - lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'), - get_all=False, expected_type=str) + privacy_setting_icon = get_first( + (playlist_header_renderer, sidebar_renderer), + ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries', + lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'), + expected_type=str) + + microformats_is_unlisted = traverse_obj( + data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool) return ( 'public' if ( @@ -4778,7 +4802,8 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): is_unlisted=( self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or player_header_privacy == 'UNLISTED' if player_header_privacy is not None - else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None), + else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None + else microformats_is_unlisted if microformats_is_unlisted is not None else None), needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None, needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None, needs_auth=False)) @@ -4794,39 +4819,23 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): def _reload_with_unavailable_videos(self, item_id, data, ytcfg): """ - Get playlist with unavailable videos if the 'show unavailable videos' button exists. + Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.) """ - browse_id = params = None - renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') - if not renderer: + is_playlist = bool(traverse_obj( + data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer'))) + if not is_playlist: return - menu_renderer = try_get( - renderer, lambda x: x['menu']['menuRenderer']['items'], list) or [] - for menu_item in menu_renderer: - if not isinstance(menu_item, dict): - continue - nav_item_renderer = menu_item.get('menuNavigationItemRenderer') - text = try_get( - nav_item_renderer, lambda x: x['text']['simpleText'], str) - if not text or text.lower() != 'show unavailable videos': - continue - browse_endpoint = try_get( - nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {} - browse_id = browse_endpoint.get('browseId') - params = browse_endpoint.get('params') - break - headers = self.generate_api_headers( ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), visitor_data=self._extract_visitor_data(data, ytcfg)) query = { - 'params': params or 'wgYCCAA=', - 'browseId': browse_id or 'VL%s' % item_id + 'params': 'wgYCCAA=', + 'browseId': f'VL{item_id}' } return self._extract_response( item_id=item_id, headers=headers, query=query, check_get_keys='contents', fatal=False, ytcfg=ytcfg, - note='Downloading API JSON with unavailable videos') + note='Redownloading playlist API JSON with unavailable videos') @functools.cached_property def skip_webpage(self): @@ -5324,6 +5333,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/user/Computerphile', 'channel': 'Computerphile', 'availability': 'public', + 'modified_date': '20190712', }, 'playlist_mincount': 11, }, { @@ -5659,6 +5669,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader': 'cole-dlp-test-acc', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel': 'cole-dlp-test-acc', + 'channel_follower_count': int, }, 'playlist_mincount': 1, 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}}, From d715b0e4135fca75b417ee876a4360c58fa3ef6d Mon Sep 17 00:00:00 2001 From: nixxo <nixxo@protonmail.com> Date: Sun, 6 Nov 2022 17:21:12 +0100 Subject: [PATCH 1708/2552] [extractor/skyit] Fix extractors (#5442) Closes #5392 Authored by: nixxo --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/skyit.py | 83 ++++++++++++++++----------------- 2 files changed, 39 insertions(+), 45 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0a9b1bce9..846c81f54 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1640,7 +1640,6 @@ from .skyit import ( SkyItVideoIE, SkyItVideoLiveIE, SkyItIE, - SkyItAcademyIE, SkyItArteIE, CieloTVItIE, TV8ItIE, diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index 438fb60e3..2daaaf75c 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -25,7 +25,6 @@ class SkyItPlayerIE(InfoExtractor): 'salesforce': 'C6D585FD1615272C98DE38235F38BD86', 'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE', 'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk', - 'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3', 'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd', 'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp', } @@ -42,11 +41,7 @@ class SkyItPlayerIE(InfoExtractor): if not hls_url and video.get('geoblock' if is_live else 'geob'): self.raise_geo_restricted(countries=['IT']) - if is_live: - formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4') - else: - formats = self._extract_akamai_formats( - hls_url, video_id, {'http': 'videoplatform.sky.it'}) + formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4') self._sort_formats(formats) return { @@ -80,14 +75,17 @@ class SkyItVideoIE(SkyItPlayerIE): _VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)' _TESTS = [{ 'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227', - 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd', + 'md5': '5b858a62d9ffe2ab77b397553024184a', 'info_dict': { 'id': '631227', 'ext': 'mp4', 'title': 'Uomo ucciso da uno squalo in Australia', 'timestamp': 1606036192, 'upload_date': '20201122', - } + 'duration': 26, + 'thumbnail': 'https://video.sky.it/captures/thumbs/631227/631227_thumb_880x494.jpg', + }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820', 'only_matching': True, @@ -110,7 +108,8 @@ class SkyItVideoLiveIE(SkyItPlayerIE): 'id': '1', 'ext': 'mp4', 'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}', - 'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.', + 'description': r're:(?:Clicca play e )?[Gg]uarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24\.', + 'live_status': 'is_live', }, 'params': { # m3u8 download @@ -132,15 +131,17 @@ class SkyItIE(SkyItPlayerIE): IE_NAME = 'sky.it' _VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)' _TESTS = [{ - 'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol', + 'url': 'https://sport.sky.it/calcio/serie-a/2022/11/03/brozovic-inter-news', 'info_dict': { - 'id': '631201', + 'id': '789222', 'ext': 'mp4', - 'title': 'Un rosso alla violenza: in campo per i diritti delle donne', - 'upload_date': '20201121', - 'timestamp': 1605995753, + 'title': 'Brozovic con il gruppo: verso convocazione per Juve-Inter', + 'upload_date': '20221103', + 'timestamp': 1667484130, + 'duration': 22, + 'thumbnail': 'https://videoplatform.sky.it/still/2022/11/03/1667480526353_brozovic_videostill_1.jpg', }, - 'expected_warnings': ['Unable to download f4m manifest'], + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo', 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd', @@ -150,7 +151,10 @@ class SkyItIE(SkyItPlayerIE): 'title': 'Uomo ucciso da uno squalo in Australia', 'timestamp': 1606036192, 'upload_date': '20201122', + 'duration': 26, + 'thumbnail': 'https://video.sky.it/captures/thumbs/631227/631227_thumb_880x494.jpg', }, + 'params': {'skip_download': 'm3u8'}, }] _VIDEO_ID_REGEX = r'data-videoid="(\d+)"' @@ -162,40 +166,25 @@ class SkyItIE(SkyItPlayerIE): return self._player_url_result(video_id) -class SkyItAcademyIE(SkyItIE): - IE_NAME = 'skyacademy.it' - _VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)' - _TESTS = [{ - 'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/', - 'md5': 'ced5c26638b7863190cbc44dd6f6ba08', - 'info_dict': { - 'id': '523458', - 'ext': 'mp4', - 'title': 'Sky Academy "The Best CineCamp 2019"', - 'timestamp': 1562843784, - 'upload_date': '20190711', - } - }] - _DOMAIN = 'skyacademy' - _VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"' - - class SkyItArteIE(SkyItIE): IE_NAME = 'arte.sky.it' _VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)' _TESTS = [{ - 'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/', + 'url': 'https://arte.sky.it/video/oliviero-toscani-torino-galleria-mazzoleni-788962', 'md5': '515aee97b87d7a018b6c80727d3e7e17', 'info_dict': { - 'id': '627926', + 'id': '788962', 'ext': 'mp4', - 'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani", - 'upload_date': '20201106', - 'timestamp': 1604664493, - } + 'title': 'La fotografia di Oliviero Toscani conquista Torino', + 'upload_date': '20221102', + 'timestamp': 1667399996, + 'duration': 12, + 'thumbnail': 'https://videoplatform.sky.it/still/2022/11/02/1667396388552_oliviero-toscani-torino-galleria-mazzoleni_videostill_1.jpg', + }, + 'params': {'skip_download': 'm3u8'}, }] _DOMAIN = 'skyarte' - _VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)' + _VIDEO_ID_REGEX = r'"embedUrl"\s*:\s*"(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)' class CieloTVItIE(SkyItIE): @@ -210,7 +199,10 @@ class CieloTVItIE(SkyItIE): 'title': 'Il lunedì è sempre un dramma', 'upload_date': '20190329', 'timestamp': 1553862178, - } + 'duration': 30, + 'thumbnail': 'https://videoplatform.sky.it/still/2019/03/29/1553858575610_lunedi_dramma_mant_videostill_1.jpg', + }, + 'params': {'skip_download': 'm3u8'}, }] _DOMAIN = 'cielo' _VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"' @@ -218,9 +210,9 @@ class CieloTVItIE(SkyItIE): class TV8ItIE(SkyItVideoIE): IE_NAME = 'tv8.it' - _VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/[0-9a-z-]+-(?P<id>\d+)' _TESTS = [{ - 'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/', + 'url': 'https://www.tv8.it/video/ogni-mattina-ucciso-asino-di-andrea-lo-cicero-630529', 'md5': '9ab906a3f75ea342ed928442f9dabd21', 'info_dict': { 'id': '630529', @@ -228,6 +220,9 @@ class TV8ItIE(SkyItVideoIE): 'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero', 'timestamp': 1605721374, 'upload_date': '20201118', - } + 'duration': 114, + 'thumbnail': 'https://videoplatform.sky.it/still/2020/11/18/1605717753954_ogni-mattina-ucciso-asino-di-andrea-lo-cicero_videostill_1.jpg', + }, + 'params': {'skip_download': 'm3u8'}, }] _DOMAIN = 'mtv8' From 5b9f253fa0aee996cf1ed30185d4b502e00609c4 Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Mon, 7 Nov 2022 05:37:23 +1300 Subject: [PATCH 1709/2552] Backport SSL configuration from Python 3.10 (#5437) Partial fix for https://github.com/yt-dlp/yt-dlp/pull/5294#issuecomment-1289363572, https://github.com/yt-dlp/yt-dlp/issues/4627 Authored by: coletdjnz --- yt_dlp/utils.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 7eef2c9cd..ef4cc904c 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -985,6 +985,18 @@ def make_HTTPS_handler(params, **kwargs): context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998 context.set_ciphers('DEFAULT') + elif sys.version_info < (3, 10) and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1): + # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1]. + # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting + # in some situations [2][3]. + # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely + # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe. + # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536 + # 2. https://github.com/yt-dlp/yt-dlp/issues/4627 + # 3. https://github.com/yt-dlp/yt-dlp/pull/5294 + # 4. https://peps.python.org/pep-0644/ + context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM') + context.minimum_version = ssl.TLSVersion.TLSv1_2 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE if opts_check_certificate: @@ -1982,12 +1994,13 @@ def system_identifier(): with contextlib.suppress(OSError): # We may not have access to the executable libc_ver = platform.libc_ver() - return 'Python %s (%s %s) - %s %s' % ( + return 'Python %s (%s %s) - %s (%s%s)' % ( platform.python_version(), python_implementation, platform.architecture()[0], platform.platform(), - format_field(join_nonempty(*libc_ver, delim=' '), None, '(%s)'), + ssl.OPENSSL_VERSION, + format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'), ) From cc1d3bf96b23855e76267a08479a065a0a95bdf3 Mon Sep 17 00:00:00 2001 From: CrankDatSouljaBoy <75489748+CrankDatSouljaBoy@users.noreply.github.com> Date: Sun, 6 Nov 2022 17:51:15 +0100 Subject: [PATCH 1710/2552] [extractor/deuxm] Add extractors (#5388) Authored by: CrankDatSouljaBoy --- yt_dlp/extractor/_extractors.py | 4 ++ yt_dlp/extractor/deuxm.py | 76 +++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 yt_dlp/extractor/deuxm.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 846c81f54..0508458f3 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -470,6 +470,10 @@ from .duboku import ( ) from .dumpert import DumpertIE from .defense import DefenseGouvFrIE +from .deuxm import ( + DeuxMIE, + DeuxMNewsIE +) from .digitalconcerthall import DigitalConcertHallIE from .discovery import DiscoveryIE from .disney import DisneyIE diff --git a/yt_dlp/extractor/deuxm.py b/yt_dlp/extractor/deuxm.py new file mode 100644 index 000000000..74a6da6c6 --- /dev/null +++ b/yt_dlp/extractor/deuxm.py @@ -0,0 +1,76 @@ +from .common import InfoExtractor +from ..utils import url_or_none + + +class DeuxMIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?2m\.ma/[^/]+/replay/single/(?P<id>([\w.]{1,24})+)' + + _TESTS = [{ + 'url': 'https://2m.ma/fr/replay/single/6351d439b15e1a613b3debe8', + 'md5': '5f761f04c9d686e553b685134dca5d32', + 'info_dict': { + 'id': '6351d439b15e1a613b3debe8', + 'ext': 'mp4', + 'title': 'Grand Angle : Jeudi 20 Octobre 2022', + 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$' + } + }, { + 'url': 'https://2m.ma/fr/replay/single/635c0aeab4eec832622356da', + 'md5': 'ad6af2f5e4d5b2ad2194a84b6e890b4c', + 'info_dict': { + 'id': '635c0aeab4eec832622356da', + 'ext': 'mp4', + 'title': 'Journal Amazigh : Vendredi 28 Octobre 2022', + 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video = self._download_json( + f'https://2m.ma/api/watchDetail/{video_id}', video_id)['response']['News'] + return { + 'id': video_id, + 'title': video.get('titre'), + 'url': video['url'], + 'description': video.get('description'), + 'thumbnail': url_or_none(video.get('image')), + } + + +class DeuxMNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?2m\.ma/(?P<lang>\w+)/news/(?P<id>[^/#?]+)' + + _TESTS = [{ + 'url': 'https://2m.ma/fr/news/Kan-Ya-Mkan-d%C3%A9poussi%C3%A8re-l-histoire-du-phare-du-Cap-Beddouza-20221028', + 'md5': '43d5e693a53fa0b71e8a5204c7d4542a', + 'info_dict': { + 'id': '635c5d1233b83834e35b282e', + 'ext': 'mp4', + 'title': 'Kan Ya Mkan d\u00e9poussi\u00e8re l\u2019histoire du phare du Cap Beddouza', + 'description': 'md5:99dcf29b82f1d7f2a4acafed1d487527', + 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$' + } + }, { + 'url': 'https://2m.ma/fr/news/Interview-Casablanca-hors-des-sentiers-battus-avec-Abderrahim-KASSOU-Replay--20221017', + 'md5': '7aca29f02230945ef635eb8290283c0c', + 'info_dict': { + 'id': '634d9e108b70d40bc51a844b', + 'ext': 'mp4', + 'title': 'Interview: Casablanca hors des sentiers battus avec Abderrahim KASSOU (Replay) ', + 'description': 'md5:3b8e78111de9fcc6ef7f7dd6cff2430c', + 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$' + } + }] + + def _real_extract(self, url): + article_name, lang = self._match_valid_url(url).group('id', 'lang') + video = self._download_json( + f'https://2m.ma/api/articlesByUrl?lang={lang}&url=/news/{article_name}', article_name)['response']['article'][0] + return { + 'id': video['id'], + 'title': video.get('title'), + 'url': video['image'][0], + 'description': video.get('content'), + 'thumbnail': url_or_none(video.get('cover')), + } From 049565df2e24d9611a9ffdd033c80a6dafdabbe0 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Mon, 7 Nov 2022 02:11:33 +0900 Subject: [PATCH 1711/2552] [extractor/swearnet] Add extractor (#5371) Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/swearnet.py | 73 +++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 yt_dlp/extractor/swearnet.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0508458f3..ec8ceb948 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1763,6 +1763,7 @@ from .svt import ( SVTPlayIE, SVTSeriesIE, ) +from .swearnet import SwearnetEpisodeIE from .swrmediathek import SWRMediathekIE from .syvdk import SYVDKIE from .syfy import SyfyIE diff --git a/yt_dlp/extractor/swearnet.py b/yt_dlp/extractor/swearnet.py new file mode 100644 index 000000000..86a303ec7 --- /dev/null +++ b/yt_dlp/extractor/swearnet.py @@ -0,0 +1,73 @@ +from .common import InfoExtractor +from ..utils import int_or_none, traverse_obj + + +class SwearnetEpisodeIE(InfoExtractor): + _VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)' + _TESTS = [{ + 'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1', + 'info_dict': { + 'id': '232819', + 'ext': 'mp4', + 'episode_number': 1, + 'episode': 'Episode 1', + 'duration': 719, + 'description': 'md5:c48ef71440ce466284c07085cd7bd761', + 'season': 'Season 1', + 'title': 'Episode 1 - Grilled Cheese Sammich', + 'season_number': 1, + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/232819/_RX04IKIq60a2V6rIRqq_Q_small.jpg', + } + }] + + def _get_formats_and_subtitle(self, video_source, video_id): + video_source = video_source or {} + formats, subtitles = [], {} + for key, value in video_source.items(): + if key == 'hls': + for video_hls in value: + fmts, subs = self._extract_m3u8_formats_and_subtitles(video_hls.get('url'), video_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.extend({ + 'url': video_mp4.get('url'), + 'ext': 'mp4' + } for video_mp4 in value) + + return formats, subtitles + + def _get_direct_subtitle(self, caption_json): + subs = {} + for caption in caption_json: + subs.setdefault(caption.get('language') or 'und', []).append({ + 'url': caption.get('vttUrl'), + 'name': caption.get('name') + }) + + return subs + + def _real_extract(self, url): + display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num') + webpage = self._download_webpage(url, display_id) + + external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid') + json_data = self._download_json( + f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0] + + formats, subtitles = self._get_formats_and_subtitle(json_data['sources'], display_id) + self._merge_subtitles(self._get_direct_subtitle(json_data.get('captions')), target=subtitles) + + return { + 'id': str(json_data['videoId']), + 'title': json_data.get('name') or self._html_search_meta(['og:title', 'twitter:title'], webpage), + 'description': (json_data.get('description') + or self._html_search_meta(['og:description', 'twitter:description'])), + 'duration': int_or_none(json_data.get('seconds')), + 'formats': formats, + 'subtitles': subtitles, + 'season_number': int_or_none(season_number), + 'episode_number': int_or_none(episode_number), + 'thumbnails': [{'url': thumbnail_url} + for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))] + } From 7053aa3a48dbdfe8f11b12fa0f442a9bf8b136b1 Mon Sep 17 00:00:00 2001 From: Richard Gibson <richard.gibson@gmail.com> Date: Sun, 6 Nov 2022 12:23:16 -0500 Subject: [PATCH 1712/2552] [extractor/epoch] Support videos without data-trailer (#5387) Closes #5359 Authored by: gibson042, pukkandan --- yt_dlp/extractor/epoch.py | 11 ++++++++++- yt_dlp/utils.py | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/epoch.py b/yt_dlp/extractor/epoch.py index 13eeabe3e..110e78c5b 100644 --- a/yt_dlp/extractor/epoch.py +++ b/yt_dlp/extractor/epoch.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..utils import extract_attributes, get_element_html_by_id class EpochIE(InfoExtractor): @@ -28,13 +29,21 @@ class EpochIE(InfoExtractor): 'title': 'Kash Patel: A ‘6-Year-Saga’ of Government Corruption, From Russiagate to Mar-a-Lago', } }, + { + 'url': 'https://www.theepochtimes.com/dick-morris-discusses-his-book-the-return-trumps-big-2024-comeback_4819205.html', + 'info_dict': { + 'id': '9489f994-2a20-4812-b233-ac0e5c345632', + 'ext': 'mp4', + 'title': 'Dick Morris Discusses His Book ‘The Return: Trump’s Big 2024 Comeback’', + } + }, ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - youmaker_video_id = self._search_regex(r'data-trailer="[\w-]+" data-id="([\w-]+)"', webpage, 'url') + youmaker_video_id = extract_attributes(get_element_html_by_id('videobox', webpage))['data-id'] formats, subtitles = self._extract_m3u8_formats_and_subtitles( f'http://vs1.youmaker.com/assets/{youmaker_video_id}/playlist.m3u8', video_id, 'mp4', m3u8_id='hls') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ef4cc904c..cfc7ba63a 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -524,6 +524,7 @@ class HTMLAttributeParser(html.parser.HTMLParser): def handle_starttag(self, tag, attrs): self.attrs = dict(attrs) + raise compat_HTMLParseError('done') class HTMLListAttrsParser(html.parser.HTMLParser): From e14ea7fbd92cc15ad0dccedc163f8c26f843c389 Mon Sep 17 00:00:00 2001 From: Bruno Guerreiro <Generator@users.noreply.github.com> Date: Sun, 6 Nov 2022 17:42:23 +0000 Subject: [PATCH 1713/2552] [extractor/youtube] Update piped instances (#5441) Closes #5286 Authored by: Generator --- yt_dlp/extractor/youtube.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index c387481cd..804d0ea34 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -369,14 +369,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances r'(?:www\.)?piped\.kavin\.rocks', - r'(?:www\.)?piped\.silkky\.cloud', r'(?:www\.)?piped\.tokhmi\.xyz', - r'(?:www\.)?piped\.moomoo\.me', - r'(?:www\.)?il\.ax', - r'(?:www\.)?piped\.syncpundit\.com', + r'(?:www\.)?piped\.syncpundit\.io', r'(?:www\.)?piped\.mha\.fi', + r'(?:www\.)?watch\.whatever\.social', + r'(?:www\.)?piped\.garudalinux\.org', + r'(?:www\.)?piped\.rivo\.lol', + r'(?:www\.)?piped-libre\.kavin\.rocks', + r'(?:www\.)?yt\.jae\.fi', r'(?:www\.)?piped\.mint\.lgbt', - r'(?:www\.)?piped\.privacy\.com\.de', + r'(?:www\.)?il\.ax', + r'(?:www\.)?piped\.esmailelbob\.xyz', + r'(?:www\.)?piped\.projectsegfau\.lt', + r'(?:www\.)?piped\.privacydev\.net', + r'(?:www\.)?piped\.palveluntarjoaja\.eu', + r'(?:www\.)?piped\.smnz\.de', + r'(?:www\.)?piped\.adminforge\.de', + r'(?:www\.)?watch\.whatevertinfoil\.de', + r'(?:www\.)?piped\.qdi\.fi', ) # extracted from account/account_menu ep From 8c188d5d09177ed213a05c900d3523867c5897fd Mon Sep 17 00:00:00 2001 From: Kevin Wood <endotronic@gmail.com> Date: Sun, 6 Nov 2022 09:45:45 -0800 Subject: [PATCH 1714/2552] [extractor/redgifs] Refresh auth token for 401 (#5352) Closes #5351 Authored by: endotronic, pukkandan --- yt_dlp/extractor/redgifs.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index 24ac9420e..92d996ca6 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -1,4 +1,5 @@ import functools +import urllib from .common import InfoExtractor from ..compat import compat_parse_qs @@ -72,14 +73,20 @@ class RedGifsBaseInfoExtractor(InfoExtractor): self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}' def _call_api(self, ep, video_id, *args, **kwargs): - if 'authorization' not in self._API_HEADERS: - self._fetch_oauth_token(video_id) - assert 'authorization' in self._API_HEADERS - - headers = dict(self._API_HEADERS) - headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}' - data = self._download_json( - f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs) + for attempt in range(2): + if 'authorization' not in self._API_HEADERS: + self._fetch_oauth_token(video_id) + try: + headers = dict(self._API_HEADERS) + headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}' + data = self._download_json( + f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs) + break + except ExtractorError as e: + if not attempt and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401: + del self._API_HEADERS['authorization'] # refresh the token + raise + if 'error' in data: raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id) return data From 728f4b5c2ef914f3b45d160883469502366d8eac Mon Sep 17 00:00:00 2001 From: lauren <lauren@selfisekai.rocks> Date: Sun, 6 Nov 2022 19:10:06 +0100 Subject: [PATCH 1715/2552] [extractor/tvp] Update extractors (#5346) Closes #5328 Authored by: selfisekai --- yt_dlp/extractor/_extractors.py | 3 +- yt_dlp/extractor/tvp.py | 224 ++++++++++++++++++++++---------- 2 files changed, 156 insertions(+), 71 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index ec8ceb948..d434a5460 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1964,7 +1964,8 @@ from .tvp import ( TVPEmbedIE, TVPIE, TVPStreamIE, - TVPWebsiteIE, + TVPVODSeriesIE, + TVPVODVideoIE, ) from .tvplay import ( TVPlayIE, diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index f1bc0fbba..c83b99762 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -4,40 +4,51 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, determine_ext, dict_get, ExtractorError, int_or_none, js_to_json, - orderedSet, str_or_none, + strip_or_none, + traverse_obj, try_get, + url_or_none, ) class TVPIE(InfoExtractor): IE_NAME = 'tvp' IE_DESC = 'Telewizja Polska' - _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|polandin\.com)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)' _TESTS = [{ # TVPlayer 2 in js wrapper - 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536', + 'url': 'https://swipeto.pl/64095316/uliczny-foxtrot-wypozyczalnia-kaset-kto-pamieta-dvdvideo', 'info_dict': { - 'id': '194536', + 'id': '64095316', 'ext': 'mp4', - 'title': 'Czas honoru, odc. 13 – Władek', - 'description': 'md5:437f48b93558370b031740546b696e24', - 'age_limit': 12, + 'title': 'Uliczny Foxtrot — Wypożyczalnia kaset. Kto pamięta DVD-Video?', + 'age_limit': 0, + 'duration': 374, + 'thumbnail': r're:https://.+', }, + 'expected_warnings': [ + 'Failed to download ISM manifest: HTTP Error 404: Not Found', + 'Failed to download m3u8 information: HTTP Error 404: Not Found', + ], }, { # TVPlayer legacy - 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', + 'url': 'https://www.tvp.pl/polska-press-video-uploader/wideo/62042351', 'info_dict': { - 'id': '17916176', + 'id': '62042351', 'ext': 'mp4', - 'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', - 'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', + 'title': 'Wideo', + 'description': 'Wideo Kamera', + 'duration': 24, + 'age_limit': 0, + 'thumbnail': r're:https://.+', }, }, { # TVPlayer 2 in iframe @@ -48,6 +59,8 @@ class TVPIE(InfoExtractor): 'title': 'Dzieci na sprzedaż dla homoseksualistów', 'description': 'md5:7d318eef04e55ddd9f87a8488ac7d590', 'age_limit': 12, + 'duration': 259, + 'thumbnail': r're:https://.+', }, }, { # TVPlayer 2 in client-side rendered website (regional; window.__newsData) @@ -58,7 +71,11 @@ class TVPIE(InfoExtractor): 'title': 'Studio Yayo', 'upload_date': '20160616', 'timestamp': 1466075700, - } + 'age_limit': 0, + 'duration': 20, + 'thumbnail': r're:https://.+', + }, + 'skip': 'Geo-blocked outside PL', }, { # TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData) 'url': 'https://www.tvp.info/52880236/09042021-0800', @@ -66,7 +83,10 @@ class TVPIE(InfoExtractor): 'id': '52880236', 'ext': 'mp4', 'title': '09.04.2021, 08:00', + 'age_limit': 0, + 'thumbnail': r're:https://.+', }, + 'skip': 'Geo-blocked outside PL', }, { # client-side rendered (regional) program (playlist) page 'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia', @@ -122,7 +142,7 @@ class TVPIE(InfoExtractor): 'url': 'https://www.tvpparlament.pl/retransmisje-vod/inne/wizyta-premiera-mateusza-morawieckiego-w-firmie-berotu-sp-z-oo/48857277', 'only_matching': True, }, { - 'url': 'https://polandin.com/47942651/pln-10-billion-in-subsidies-transferred-to-companies-pm', + 'url': 'https://tvpworld.com/48583640/tescos-polish-business-bought-by-danish-chain-netto', 'only_matching': True, }] @@ -151,16 +171,13 @@ class TVPIE(InfoExtractor): is_website = video_data.get('type') == 'website' if is_website: url = video_data['url'] - fucked_up_url_parts = re.match(r'https?://vod\.tvp\.pl/(\d+)/([^/?#]+)', url) - if fucked_up_url_parts: - url = f'https://vod.tvp.pl/website/{fucked_up_url_parts.group(2)},{fucked_up_url_parts.group(1)}' else: url = 'tvp:' + str_or_none(video_data.get('_id') or page_id) return { '_type': 'url_transparent', 'id': str_or_none(video_data.get('_id') or page_id), 'url': url, - 'ie_key': 'TVPEmbed' if not is_website else 'TVPWebsite', + 'ie_key': (TVPIE if is_website else TVPEmbedIE).ie_key(), 'title': str_or_none(video_data.get('title')), 'description': str_or_none(video_data.get('lead')), 'timestamp': int_or_none(video_data.get('release_date_long')), @@ -217,8 +234,9 @@ class TVPIE(InfoExtractor): # The URL may redirect to a VOD # example: https://vod.tvp.pl/48463890/wadowickie-spotkania-z-janem-pawlem-ii - if TVPWebsiteIE.suitable(urlh.url): - return self.url_result(urlh.url, ie=TVPWebsiteIE.ie_key(), video_id=page_id) + for ie_cls in (TVPVODSeriesIE, TVPVODVideoIE): + if ie_cls.suitable(urlh.url): + return self.url_result(urlh.url, ie=ie_cls.ie_key(), video_id=page_id) if re.search( r'window\.__(?:video|news|website|directory)Data\s*=', @@ -297,12 +315,13 @@ class TVPStreamIE(InfoExtractor): class TVPEmbedIE(InfoExtractor): IE_NAME = 'tvp:embed' IE_DESC = 'Telewizja Polska' + _GEO_BYPASS = False _VALID_URL = r'''(?x) (?: tvp: |https?:// (?:[^/]+\.)? - (?:tvp(?:parlament)?\.pl|tvp\.info|polandin\.com)/ + (?:tvp(?:parlament)?\.pl|tvp\.info|tvpworld\.com|swipeto\.pl)/ (?:sess/ (?:tvplayer\.php\?.*?object_id |TVPlayer2/(?:embed|api)\.php\?.*[Ii][Dd]) @@ -320,6 +339,12 @@ class TVPEmbedIE(InfoExtractor): 'title': 'Czas honoru, odc. 13 – Władek', 'description': 'md5:76649d2014f65c99477be17f23a4dead', 'age_limit': 12, + 'duration': 2652, + 'series': 'Czas honoru', + 'episode': 'Episode 13', + 'episode_number': 13, + 'season': 'sezon 1', + 'thumbnail': r're:https://.+', }, }, { 'url': 'https://www.tvp.pl/sess/tvplayer.php?object_id=51247504&autoplay=false', @@ -327,6 +352,9 @@ class TVPEmbedIE(InfoExtractor): 'id': '51247504', 'ext': 'mp4', 'title': 'Razmova 091220', + 'duration': 876, + 'age_limit': 0, + 'thumbnail': r're:https://.+', }, }, { # TVPlayer2 embed URL @@ -361,40 +389,48 @@ class TVPEmbedIE(InfoExtractor): # stripping JSONP padding datastr = webpage[15 + len(callback):-3] if datastr.startswith('null,'): - error = self._parse_json(datastr[5:], video_id) - raise ExtractorError(error[0]['desc']) + error = self._parse_json(datastr[5:], video_id, fatal=False) + error_desc = traverse_obj(error, (0, 'desc')) + + if error_desc == 'Obiekt wymaga płatności': + raise ExtractorError('Video requires payment and log-in, but log-in is not implemented') + + raise ExtractorError(error_desc or 'unexpected JSON error') content = self._parse_json(datastr, video_id)['content'] info = content['info'] is_live = try_get(info, lambda x: x['isLive'], bool) + if info.get('isGeoBlocked'): + # actual country list is not provided, we just assume it's always available in PL + self.raise_geo_restricted(countries=['PL']) + formats = [] for file in content['files']: - video_url = file.get('url') + video_url = url_or_none(file.get('url')) if not video_url: continue - if video_url.endswith('.m3u8'): + ext = determine_ext(video_url, None) + if ext == 'm3u8': formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False, live=is_live)) - elif video_url.endswith('.mpd'): + elif ext == 'mpd': if is_live: # doesn't work with either ffmpeg or native downloader continue formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False)) - elif video_url.endswith('.f4m'): + elif ext == 'f4m': formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False)) elif video_url.endswith('.ism/manifest'): formats.extend(self._extract_ism_formats(video_url, video_id, ism_id='mss', fatal=False)) else: - # mp4, wmv or something - quality = file.get('quality', {}) formats.append({ 'format_id': 'direct', 'url': video_url, - 'ext': determine_ext(video_url, file['type']), - 'fps': int_or_none(quality.get('fps')), - 'tbr': int_or_none(quality.get('bitrate')), - 'width': int_or_none(quality.get('width')), - 'height': int_or_none(quality.get('height')), + 'ext': ext or file.get('type'), + 'fps': int_or_none(traverse_obj(file, ('quality', 'fps'))), + 'tbr': int_or_none(traverse_obj(file, ('quality', 'bitrate')), scale=1000), + 'width': int_or_none(traverse_obj(file, ('quality', 'width'))), + 'height': int_or_none(traverse_obj(file, ('quality', 'height'))), }) self._sort_formats(formats) @@ -449,57 +485,105 @@ class TVPEmbedIE(InfoExtractor): return info_dict -class TVPWebsiteIE(InfoExtractor): - IE_NAME = 'tvp:series' - _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)' +class TVPVODBaseIE(InfoExtractor): + _API_BASE_URL = 'https://vod.tvp.pl/api/products' + + def _call_api(self, resource, video_id, **kwargs): + return self._download_json( + f'{self._API_BASE_URL}/{resource}', video_id, + query={'lang': 'pl', 'platform': 'BROWSER'}, **kwargs) + + def _parse_video(self, video): + return { + '_type': 'url', + 'url': 'tvp:' + video['externalUid'], + 'ie_key': TVPEmbedIE.ie_key(), + 'title': video.get('title'), + 'description': traverse_obj(video, ('lead', 'description')), + 'age_limit': int_or_none(video.get('rating')), + 'duration': int_or_none(video.get('duration')), + } + + +class TVPVODVideoIE(TVPVODBaseIE): + IE_NAME = 'tvp:vod' + _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$' _TESTS = [{ - # series - 'url': 'https://vod.tvp.pl/website/wspaniale-stulecie,17069012/video', + 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357', 'info_dict': { - 'id': '17069012', + 'id': '60468609', + 'ext': 'mp4', + 'title': 'Laboratorium alchemika, Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24', + 'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c', + 'duration': 300, + 'episode_number': 24, + 'episode': 'Episode 24', + 'age_limit': 0, + 'series': 'Laboratorium alchemika', + 'thumbnail': 're:https://.+', }, - 'playlist_count': 312, }, { - # film - 'url': 'https://vod.tvp.pl/website/krzysztof-krawczyk-cale-moje-zycie,51374466', + 'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667', 'info_dict': { - 'id': '51374509', + 'id': '51640077', 'ext': 'mp4', - 'title': 'Krzysztof Krawczyk – całe moje życie, Krzysztof Krawczyk – całe moje życie', - 'description': 'md5:2e80823f00f5fc263555482f76f8fa42', + 'title': 'Ukraiński sługa narodu, Ukraiński sługa narodu', + 'series': 'Ukraiński sługa narodu', + 'description': 'md5:b7940c0a8e439b0c81653a986f544ef3', 'age_limit': 12, + 'episode': 'Episode 0', + 'episode_number': 0, + 'duration': 3051, + 'thumbnail': 're:https://.+', }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['TVPEmbed'], - }, { - 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312', - 'only_matching': True, }] - def _entries(self, display_id, playlist_id): - url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id) - for page_num in itertools.count(1): - page = self._download_webpage( - url, display_id, 'Downloading page %d' % page_num, - query={'page': page_num}) + def _real_extract(self, url): + video_id = self._match_id(url) + + return self._parse_video(self._call_api(f'vods/{video_id}', video_id)) - video_ids = orderedSet(re.findall( - r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id, - page)) - if not video_ids: - break +class TVPVODSeriesIE(TVPVODBaseIE): + IE_NAME = 'tvp:vod:series' + _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+-odcinki,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$' + + _TESTS = [{ + 'url': 'https://vod.tvp.pl/seriale,18/ranczo-odcinki,316445', + 'info_dict': { + 'id': '316445', + 'title': 'Ranczo', + 'age_limit': 12, + 'categories': ['seriale'], + }, + 'playlist_count': 129, + }, { + 'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514', + 'only_matching': True, + }, { + 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338', + 'only_matching': True, + }] - for video_id in video_ids: - yield self.url_result( - 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(), - video_id=video_id) + def _entries(self, seasons, playlist_id): + for season in seasons: + episodes = self._call_api( + f'vods/serials/{playlist_id}/seasons/{season["id"]}/episodes', playlist_id, + note=f'Downloading episode list for {season["title"]}') + yield from map(self._parse_video, episodes) def _real_extract(self, url): - mobj = self._match_valid_url(url) - display_id, playlist_id = mobj.group('display_id', 'id') + playlist_id = self._match_id(url) + metadata = self._call_api( + f'vods/serials/{playlist_id}', playlist_id, + note='Downloading serial metadata') + seasons = self._call_api( + f'vods/serials/{playlist_id}/seasons', playlist_id, + note='Downloading season list') return self.playlist_result( - self._entries(display_id, playlist_id), playlist_id) + self._entries(seasons, playlist_id), playlist_id, strip_or_none(metadata.get('title')), + clean_html(traverse_obj(metadata, ('description', 'lead'), expected_type=strip_or_none)), + categories=[traverse_obj(metadata, ('mainCategory', 'name'))], + age_limit=int_or_none(metadata.get('rating')), + ) From c94df4d19d3af4120c9b674556acb1f1905c366f Mon Sep 17 00:00:00 2001 From: changren-wcr <105254603+changren-wcr@users.noreply.github.com> Date: Mon, 7 Nov 2022 02:11:53 +0800 Subject: [PATCH 1716/2552] [extractor/qingting] Add extractor (#5329) Closes #5323 Authored by: changren-wcr, bashonly --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/qingting.py | 47 +++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 yt_dlp/extractor/qingting.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index d434a5460..1960692ef 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1431,6 +1431,7 @@ from .prx import ( ) from .puls4 import Puls4IE from .pyvideo import PyvideoIE +from .qingting import QingTingIE from .qqmusic import ( QQMusicIE, QQMusicSingerIE, diff --git a/yt_dlp/extractor/qingting.py b/yt_dlp/extractor/qingting.py new file mode 100644 index 000000000..aa690d492 --- /dev/null +++ b/yt_dlp/extractor/qingting.py @@ -0,0 +1,47 @@ +from .common import InfoExtractor + +from ..utils import traverse_obj + + +class QingTingIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|m\.)?(?:qingting\.fm|qtfm\.cn)/v?channels/(?P<channel>\d+)/programs/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.qingting.fm/channels/378005/programs/22257411/', + 'md5': '47e6a94f4e621ed832c316fd1888fb3c', + 'info_dict': { + 'id': '22257411', + 'title': '用了十年才修改,谁在乎教科书?', + 'channel_id': '378005', + 'channel': '睡前消息', + 'uploader': '马督工', + 'ext': 'm4a', + } + }, { + 'url': 'https://m.qtfm.cn/vchannels/378005/programs/23023573/', + 'md5': '2703120b6abe63b5fa90b975a58f4c0e', + 'info_dict': { + 'id': '23023573', + 'title': '【睡前消息488】重庆山火之后,有图≠真相', + 'channel_id': '378005', + 'channel': '睡前消息', + 'uploader': '马督工', + 'ext': 'm4a', + } + }] + + def _real_extract(self, url): + channel_id, pid = self._match_valid_url(url).group('channel', 'id') + webpage = self._download_webpage( + f'https://m.qtfm.cn/vchannels/{channel_id}/programs/{pid}/', pid) + info = self._search_json(r'window\.__initStores\s*=', webpage, 'program info', pid) + return { + 'id': pid, + 'title': traverse_obj(info, ('ProgramStore', 'programInfo', 'title')), + 'channel_id': channel_id, + 'channel': traverse_obj(info, ('ProgramStore', 'channelInfo', 'title')), + 'uploader': traverse_obj(info, ('ProgramStore', 'podcasterInfo', 'podcaster', 'nickname')), + 'url': traverse_obj(info, ('ProgramStore', 'programInfo', 'audioUrl')), + 'vcodec': 'none', + 'acodec': 'm4a', + 'ext': 'm4a', + } From 0d2a0ecac3d721b4b01ebc2f00f922740961e515 Mon Sep 17 00:00:00 2001 From: Alex Karabanov <lksj@yandex.ru> Date: Sun, 6 Nov 2022 22:30:59 +0400 Subject: [PATCH 1717/2552] [extractor/listennotes] Add extractor (#5310) Closes #5262 Authored by: lksj, pukkandan --- yt_dlp/compat/__init__.py | 2 +- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/listennotes.py | 86 +++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 yt_dlp/extractor/listennotes.py diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index 6d85a6a1f..5d3db4b4c 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -14,7 +14,7 @@ passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn( # HTMLParseError has been deprecated in Python 3.3 and removed in # Python 3.5. Introducing dummy exception for Python >3.5 for compatible # and uniform cross-version exception handling -class compat_HTMLParseError(Exception): +class compat_HTMLParseError(ValueError): pass diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1960692ef..8c70d1585 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -912,6 +912,7 @@ from .linkedin import ( ) from .linuxacademy import LinuxAcademyIE from .liputan6 import Liputan6IE +from .listennotes import ListenNotesIE from .litv import LiTVIE from .livejournal import LiveJournalIE from .livestream import ( diff --git a/yt_dlp/extractor/listennotes.py b/yt_dlp/extractor/listennotes.py new file mode 100644 index 000000000..4ebc9be4d --- /dev/null +++ b/yt_dlp/extractor/listennotes.py @@ -0,0 +1,86 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_id, + get_element_text_and_html_by_tag, + parse_duration, + strip_or_none, + traverse_obj, + try_call, +) + + +class ListenNotesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?listennotes\.com/podcasts/[^/]+/[^/]+-(?P<id>.+)/' + _TESTS = [{ + 'url': 'https://www.listennotes.com/podcasts/thriving-on-overload/tim-oreilly-on-noticing-KrDgvNb_u1n/', + 'md5': '5b91a32f841e5788fb82b72a1a8af7f7', + 'info_dict': { + 'id': 'KrDgvNb_u1n', + 'ext': 'mp3', + 'title': 'md5:32236591a921adf17bbdbf0441b6c0e9', + 'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd', + 'duration': 2148.0, + 'channel': 'Thriving on Overload', + 'channel_id': 'ed84wITivxF', + 'episode_id': 'e1312583fa7b4e24acfbb5131050be00', + 'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg', + 'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/', + 'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'], + } + }, { + 'url': 'https://www.listennotes.com/podcasts/ask-noah-show/episode-177-wireguard-with-lwEA3154JzG/', + 'md5': '62fb4ffe7fc525632a1138bf72a5ce53', + 'info_dict': { + 'id': 'lwEA3154JzG', + 'ext': 'mp3', + 'title': 'Episode 177: WireGuard with Jason Donenfeld', + 'description': 'md5:24744f36456a3e95f83c1193a3458594', + 'duration': 3861.0, + 'channel': 'Ask Noah Show', + 'channel_id': '4DQTzdS5-j7', + 'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4', + 'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/', + 'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg', + 'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'], + } + }] + + def _clean_description(self, description): + return clean_html(re.sub(r'(</?(div|p)>\s*)+', '<br/><br/>', description or '')) + + def _real_extract(self, url): + audio_id = self._match_id(url) + webpage = self._download_webpage(url, audio_id) + data = self._search_json( + r'<script id="original-content"[^>]+\btype="application/json">', webpage, 'content', audio_id) + data.update(extract_attributes(get_element_html_by_id( + r'episode-play-button-toolbar|episode-no-play-button-toolbar', webpage, escape_value=False))) + + duration, description = self._search_regex( + r'(?P<duration>[\d:]+)\s*-\s*(?P<description>.+)', + self._html_search_meta(['og:description', 'description', 'twitter:description'], webpage), + 'description', fatal=False, group=('duration', 'description')) or (None, None) + + return { + 'id': audio_id, + 'url': data['audio'], + 'title': (data.get('data-title') + or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0]) + or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')), + 'description': (self._clean_description(get_element_by_class('ln-text-p', webpage)) + or strip_or_none(description)), + 'duration': parse_duration(traverse_obj(data, 'audio_length', 'data-duration') or duration), + 'episode_id': traverse_obj(data, 'uuid', 'data-episode-uuid'), + **traverse_obj(data, { + 'thumbnail': 'data-image', + 'channel': 'data-channel-title', + 'cast': ('nlp_entities', ..., 'name'), + 'channel_url': 'channel_url', + 'channel_id': 'channel_short_uuid', + }) + } From cb1553e96601e92765dd8d70d549b8d551191e70 Mon Sep 17 00:00:00 2001 From: Jeff Huffman <tejing@tejing.com> Date: Sun, 6 Nov 2022 10:48:55 -0800 Subject: [PATCH 1718/2552] [extractor/crunchyroll] Beta is now the only layout (#5294) Closes #5292 Authored by: tejing1 --- README.md | 6 +- yt_dlp/extractor/_extractors.py | 2 - yt_dlp/extractor/crunchyroll.py | 712 ++------------------------------ 3 files changed, 26 insertions(+), 694 deletions(-) diff --git a/README.md b/README.md index 260d67e7f..962543738 100644 --- a/README.md +++ b/README.md @@ -1733,11 +1733,7 @@ The following extractors use this feature: * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` * `version`: The video version to extract - `uncut` or `simulcast` -#### crunchyroll -* `language`: Audio languages to extract, e.g. `crunchyroll:language=jaJp` -* `hardsub`: Which hard-sub versions to extract, e.g. `crunchyroll:hardsub=None,enUS` - -#### crunchyrollbeta +#### crunchyrollbeta (Crunchyroll) * `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2` * `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None` diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 8c70d1585..7612d291d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -372,8 +372,6 @@ from .crowdbunker import ( CrowdBunkerChannelIE, ) from .crunchyroll import ( - CrunchyrollIE, - CrunchyrollShowPlaylistIE, CrunchyrollBetaIE, CrunchyrollBetaShowIE, ) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 4f209e670..35752f1bd 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -1,40 +1,16 @@ import base64 -import json -import re -import urllib.request -import xml.etree.ElementTree -import zlib -from hashlib import sha1 -from math import floor, pow, sqrt +import urllib.parse from .common import InfoExtractor -from .vrv import VRVBaseIE -from ..aes import aes_cbc_decrypt -from ..compat import ( - compat_b64decode, - compat_etree_fromstring, - compat_str, - compat_urllib_parse_urlencode, - compat_urlparse, -) from ..utils import ( ExtractorError, - bytes_to_intlist, - extract_attributes, float_or_none, format_field, - int_or_none, - intlist_to_bytes, join_nonempty, - lowercase_escape, - merge_dicts, parse_iso8601, qualities, - remove_end, - sanitized_Request, traverse_obj, try_get, - xpath_text, ) @@ -42,16 +18,7 @@ class CrunchyrollBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.crunchyroll.com/welcome/login' _API_BASE = 'https://api.crunchyroll.com' _NETRC_MACHINE = 'crunchyroll' - - def _call_rpc_api(self, method, video_id, note=None, data=None): - data = data or {} - data['req'] = 'RpcApi' + method - data = compat_urllib_parse_urlencode(data).encode('utf-8') - return self._download_xml( - 'https://www.crunchyroll.com/xml/', - video_id, note, fatal=False, data=data, headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) + params = None def _perform_login(self, username, password): if self._get_cookies(self._LOGIN_URL).get('etp_rt'): @@ -72,7 +39,7 @@ class CrunchyrollBaseIE(InfoExtractor): login_response = self._download_json( f'{self._API_BASE}/login.1.json', None, 'Logging in', - data=compat_urllib_parse_urlencode({ + data=urllib.parse.urlencode({ 'account': username, 'password': password, 'session_id': session_id @@ -82,652 +49,23 @@ class CrunchyrollBaseIE(InfoExtractor): if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): raise ExtractorError('Login succeeded but did not set etp_rt cookie') - # Beta-specific, but needed for redirects - def _get_beta_embedded_json(self, webpage, display_id): + def _get_embedded_json(self, webpage, display_id): initial_state = self._parse_json(self._search_regex( r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id) app_config = self._parse_json(self._search_regex( r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id) return initial_state, app_config - def _redirect_to_beta(self, webpage, iekey, video_id): - if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): - raise ExtractorError('Received a beta page from non-beta url when not logged in.') - initial_state, app_config = self._get_beta_embedded_json(webpage, video_id) - url = app_config['baseSiteUrl'] + initial_state['router']['locations']['current']['pathname'] - self.to_screen(f'{video_id}: Redirected to beta site - {url}') - return self.url_result(f'{url}', iekey, video_id) - - @staticmethod - def _add_skip_wall(url): - parsed_url = compat_urlparse.urlparse(url) - qs = compat_urlparse.parse_qs(parsed_url.query) - # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message: - # > This content may be inappropriate for some people. - # > Are you sure you want to continue? - # since it's not disabled by default in crunchyroll account's settings. - # See https://github.com/ytdl-org/youtube-dl/issues/7202. - qs['skip_wall'] = ['1'] - return compat_urlparse.urlunparse( - parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) - - -class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE): - IE_NAME = 'crunchyroll' - _VALID_URL = r'''(?x) - https?://(?:(?P<prefix>www|m)\.)?(?P<url> - crunchyroll\.(?:com|fr)/(?: - media(?:-|/\?id=)| - (?!series/|watch/)(?:[^/]+/){1,2}[^/?&#]*? - )(?P<id>[0-9]+) - )(?:[/?&#]|$)''' - - _TESTS = [{ - 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', - 'info_dict': { - 'id': '645513', - 'ext': 'mp4', - 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', - 'description': 'md5:2d17137920c64f2f49981a7797d275ef', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Yomiuri Telecasting Corporation (YTV)', - 'upload_date': '20131013', - 'url': 're:(?!.*&)', - }, - 'params': { - # rtmp - 'skip_download': True, - }, - 'skip': 'Video gone', - }, { - 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1', - 'info_dict': { - 'id': '589804', - 'ext': 'flv', - 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11', - 'description': 'md5:2fbc01f90b87e8e9137296f37b461c12', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Danny Choo Network', - 'upload_date': '20120213', - }, - 'params': { - # rtmp - 'skip_download': True, - }, - 'skip': 'Video gone', - }, { - 'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409', - 'info_dict': { - 'id': '702409', - 'ext': 'mp4', - 'title': compat_str, - 'description': compat_str, - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Re:Zero Partners', - 'timestamp': 1462098900, - 'upload_date': '20160501', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589', - 'info_dict': { - 'id': '727589', - 'ext': 'mp4', - 'title': compat_str, - 'description': compat_str, - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Kadokawa Pictures Inc.', - 'timestamp': 1484130900, - 'upload_date': '20170111', - 'series': compat_str, - 'season': "KONOSUBA -God's blessing on this wonderful world! 2", - 'season_number': 2, - 'episode': 'Give Me Deliverance From This Judicial Injustice!', - 'episode_number': 1, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', - 'only_matching': True, - }, { - # geo-restricted (US), 18+ maturity wall, non-premium available - 'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617', - 'only_matching': True, - }, { - # A description with double quotes - 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080', - 'info_dict': { - 'id': '535080', - 'ext': 'mp4', - 'title': compat_str, - 'description': compat_str, - 'uploader': 'Marvelous AQL Inc.', - 'timestamp': 1255512600, - 'upload_date': '20091014', - }, - 'params': { - # Just test metadata extraction - 'skip_download': True, - }, - }, { - # make sure we can extract an uploader name that's not a link - 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899', - 'info_dict': { - 'id': '606899', - 'ext': 'mp4', - 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors', - 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"', - 'uploader': 'Geneon Entertainment', - 'upload_date': '20120717', - }, - 'params': { - # just test metadata extraction - 'skip_download': True, - }, - 'skip': 'Video gone', - }, { - # A video with a vastly different season name compared to the series name - 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532', - 'info_dict': { - 'id': '590532', - 'ext': 'mp4', - 'title': compat_str, - 'description': compat_str, - 'uploader': 'TV TOKYO', - 'timestamp': 1330956000, - 'upload_date': '20120305', - 'series': 'Nyarko-san: Another Crawling Chaos', - 'season': 'Haiyoru! Nyaruani (ONA)', - }, - 'params': { - # Just test metadata extraction - 'skip_download': True, - }, - }, { - 'url': 'http://www.crunchyroll.com/media-723735', - 'only_matching': True, - }, { - 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921', - 'only_matching': True, - }] - - _FORMAT_IDS = { - '360': ('60', '106'), - '480': ('61', '106'), - '720': ('62', '106'), - '1080': ('80', '108'), - } - - def _download_webpage(self, url_or_request, *args, **kwargs): - request = (url_or_request if isinstance(url_or_request, urllib.request.Request) - else sanitized_Request(url_or_request)) - # Accept-Language must be set explicitly to accept any language to avoid issues - # similar to https://github.com/ytdl-org/youtube-dl/issues/6797. - # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction - # should be imposed or not (from what I can see it just takes the first language - # ignoring the priority and requires it to correspond the IP). By the way this causes - # Crunchyroll to not work in georestriction cases in some browsers that don't place - # the locale lang first in header. However allowing any language seems to workaround the issue. - request.add_header('Accept-Language', '*') - return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) - - def _decrypt_subtitles(self, data, iv, id): - data = bytes_to_intlist(compat_b64decode(data)) - iv = bytes_to_intlist(compat_b64decode(iv)) - id = int(id) - - def obfuscate_key_aux(count, modulo, start): - output = list(start) - for _ in range(count): - output.append(output[-1] + output[-2]) - # cut off start values - output = output[2:] - output = list(map(lambda x: x % modulo + 33, output)) - return output - - def obfuscate_key(key): - num1 = int(floor(pow(2, 25) * sqrt(6.9))) - num2 = (num1 ^ key) << 5 - num3 = key ^ num1 - num4 = num3 ^ (num3 >> 3) ^ num2 - prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2))) - shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) - # Extend 160 Bit hash to 256 Bit - return shaHash + [0] * 12 - - key = obfuscate_key(id) - - decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) - return zlib.decompress(decrypted_data) - - def _convert_subtitles_to_srt(self, sub_root): - output = '' - - for i, event in enumerate(sub_root.findall('./events/event'), 1): - start = event.attrib['start'].replace('.', ',') - end = event.attrib['end'].replace('.', ',') - text = event.attrib['text'].replace('\\N', '\n') - output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) - return output - - def _convert_subtitles_to_ass(self, sub_root): - output = '' - - def ass_bool(strvalue): - assvalue = '0' - if strvalue == '1': - assvalue = '-1' - return assvalue - - output = '[Script Info]\n' - output += 'Title: %s\n' % sub_root.attrib['title'] - output += 'ScriptType: v4.00+\n' - output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] - output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] - output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ -[V4+ Styles] -Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding -""" - for style in sub_root.findall('./styles/style'): - output += 'Style: ' + style.attrib['name'] - output += ',' + style.attrib['font_name'] - output += ',' + style.attrib['font_size'] - output += ',' + style.attrib['primary_colour'] - output += ',' + style.attrib['secondary_colour'] - output += ',' + style.attrib['outline_colour'] - output += ',' + style.attrib['back_colour'] - output += ',' + ass_bool(style.attrib['bold']) - output += ',' + ass_bool(style.attrib['italic']) - output += ',' + ass_bool(style.attrib['underline']) - output += ',' + ass_bool(style.attrib['strikeout']) - output += ',' + style.attrib['scale_x'] - output += ',' + style.attrib['scale_y'] - output += ',' + style.attrib['spacing'] - output += ',' + style.attrib['angle'] - output += ',' + style.attrib['border_style'] - output += ',' + style.attrib['outline'] - output += ',' + style.attrib['shadow'] - output += ',' + style.attrib['alignment'] - output += ',' + style.attrib['margin_l'] - output += ',' + style.attrib['margin_r'] - output += ',' + style.attrib['margin_v'] - output += ',' + style.attrib['encoding'] - output += '\n' - - output += """ -[Events] -Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text -""" - for event in sub_root.findall('./events/event'): - output += 'Dialogue: 0' - output += ',' + event.attrib['start'] - output += ',' + event.attrib['end'] - output += ',' + event.attrib['style'] - output += ',' + event.attrib['name'] - output += ',' + event.attrib['margin_l'] - output += ',' + event.attrib['margin_r'] - output += ',' + event.attrib['margin_v'] - output += ',' + event.attrib['effect'] - output += ',' + event.attrib['text'] - output += '\n' - - return output - - def _extract_subtitles(self, subtitle): - sub_root = compat_etree_fromstring(subtitle) - return [{ - 'ext': 'srt', - 'data': self._convert_subtitles_to_srt(sub_root), - }, { - 'ext': 'ass', - 'data': self._convert_subtitles_to_ass(sub_root), - }] - - def _get_subtitles(self, video_id, webpage): - subtitles = {} - for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage): - sub_doc = self._call_rpc_api( - 'Subtitle_GetXml', video_id, - 'Downloading subtitles for ' + sub_name, data={ - 'subtitle_script_id': sub_id, - }) - if not isinstance(sub_doc, xml.etree.ElementTree.Element): - continue - sid = sub_doc.get('id') - iv = xpath_text(sub_doc, 'iv', 'subtitle iv') - data = xpath_text(sub_doc, 'data', 'subtitle data') - if not sid or not iv or not data: - continue - subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8') - lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) - if not lang_code: - continue - subtitles[lang_code] = self._extract_subtitles(subtitle) - return subtitles - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - if mobj.group('prefix') == 'm': - mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') - webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url') - else: - webpage_url = 'http://www.' + mobj.group('url') - - webpage = self._download_webpage( - self._add_skip_wall(webpage_url), video_id, - headers=self.geo_verification_headers()) - if re.search(r'<div id="preload-data">', webpage): - return self._redirect_to_beta(webpage, CrunchyrollBetaIE.ie_key(), video_id) - note_m = self._html_search_regex( - r'<div class="showmedia-trailer-notice">(.+?)</div>', - webpage, 'trailer-notice', default='') - if note_m: - raise ExtractorError(note_m, expected=True) - - mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage) - if mobj: - msg = json.loads(mobj.group('msg')) - if msg.get('type') == 'error': - raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) - - if 'To view this, please log in to verify you are 18 or older.' in webpage: - self.raise_login_required() - - media = self._parse_json(self._search_regex( - r'vilos\.config\.media\s*=\s*({.+?});', - webpage, 'vilos media', default='{}'), video_id) - media_metadata = media.get('metadata') or {} - - language = self._search_regex( - r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1', - webpage, 'language', default=None, group='lang') - - video_title = self._html_search_regex( - (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>', - r'<title>(.+?),\s+-\s+.+? Crunchyroll'), - webpage, 'video_title', default=None) - if not video_title: - video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage)) - video_title = re.sub(r' {2,}', ' ', video_title) - video_description = (self._parse_json(self._html_search_regex( - r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id, - webpage, 'description', default='{}'), video_id) or media_metadata).get('description') - - thumbnails = [] - thumbnail_url = (self._parse_json(self._html_search_regex( - r'<script type="application\/ld\+json">\n\s*(.+?)<\/script>', - webpage, 'thumbnail_url', default='{}'), video_id)).get('image') - if thumbnail_url: - thumbnails.append({ - 'url': thumbnail_url, - 'width': 1920, - 'height': 1080 - }) - - if video_description: - video_description = lowercase_escape(video_description.replace(r'\r\n', '\n')) - video_uploader = self._html_search_regex( - # try looking for both an uploader that's a link and one that's not - [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'], - webpage, 'video_uploader', default=False) - - requested_languages = self._configuration_arg('language') - requested_hardsubs = [('' if val == 'none' else val) for val in self._configuration_arg('hardsub')] - language_preference = qualities((requested_languages or [language or ''])[::-1]) - hardsub_preference = qualities((requested_hardsubs or ['', language or ''])[::-1]) - - formats = [] - for stream in media.get('streams', []): - audio_lang = stream.get('audio_lang') or '' - hardsub_lang = stream.get('hardsub_lang') or '' - if (requested_languages and audio_lang.lower() not in requested_languages - or requested_hardsubs and hardsub_lang.lower() not in requested_hardsubs): - continue - vrv_formats = self._extract_vrv_formats( - stream.get('url'), video_id, stream.get('format'), - audio_lang, hardsub_lang) - for f in vrv_formats: - f['language_preference'] = language_preference(audio_lang) - f['quality'] = hardsub_preference(hardsub_lang) - formats.extend(vrv_formats) - if not formats: - available_fmts = [] - for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): - attrs = extract_attributes(a) - href = attrs.get('href') - if href and '/freetrial' in href: - continue - available_fmts.append(fmt) - if not available_fmts: - for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'): - available_fmts = re.findall(p, webpage) - if available_fmts: - break - if not available_fmts: - available_fmts = self._FORMAT_IDS.keys() - video_encode_ids = [] - - for fmt in available_fmts: - stream_quality, stream_format = self._FORMAT_IDS[fmt] - video_format = fmt + 'p' - stream_infos = [] - streamdata = self._call_rpc_api( - 'VideoPlayer_GetStandardConfig', video_id, - 'Downloading media info for %s' % video_format, data={ - 'media_id': video_id, - 'video_format': stream_format, - 'video_quality': stream_quality, - 'current_page': url, - }) - if isinstance(streamdata, xml.etree.ElementTree.Element): - stream_info = streamdata.find('./{default}preload/stream_info') - if stream_info is not None: - stream_infos.append(stream_info) - stream_info = self._call_rpc_api( - 'VideoEncode_GetStreamInfo', video_id, - 'Downloading stream info for %s' % video_format, data={ - 'media_id': video_id, - 'video_format': stream_format, - 'video_encode_quality': stream_quality, - }) - if isinstance(stream_info, xml.etree.ElementTree.Element): - stream_infos.append(stream_info) - for stream_info in stream_infos: - video_encode_id = xpath_text(stream_info, './video_encode_id') - if video_encode_id in video_encode_ids: - continue - video_encode_ids.append(video_encode_id) - - video_file = xpath_text(stream_info, './file') - if not video_file: - continue - if video_file.startswith('http'): - formats.extend(self._extract_m3u8_formats( - video_file, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - continue - - video_url = xpath_text(stream_info, './host') - if not video_url: - continue - metadata = stream_info.find('./metadata') - format_info = { - 'format': video_format, - 'height': int_or_none(xpath_text(metadata, './height')), - 'width': int_or_none(xpath_text(metadata, './width')), - } - - if '.fplive.net/' in video_url: - video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) - parsed_video_url = compat_urlparse.urlparse(video_url) - direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( - netloc='v.lvlt.crcdn.net', - path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) - if self._is_valid_url(direct_video_url, video_id, video_format): - format_info.update({ - 'format_id': 'http-' + video_format, - 'url': direct_video_url, - }) - formats.append(format_info) - continue - - format_info.update({ - 'format_id': 'rtmp-' + video_format, - 'url': video_url, - 'play_path': video_file, - 'ext': 'flv', - }) - formats.append(format_info) - self._sort_formats(formats) - - metadata = self._call_rpc_api( - 'VideoPlayer_GetMediaMetadata', video_id, - note='Downloading media info', data={ - 'media_id': video_id, - }) - - subtitles = {} - for subtitle in media.get('subtitles', []): - subtitle_url = subtitle.get('url') - if not subtitle_url: - continue - subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({ - 'url': subtitle_url, - 'ext': subtitle.get('format', 'ass'), - }) - if not subtitles: - subtitles = self.extract_subtitles(video_id, webpage) - - # webpage provide more accurate data than series_title from XML - series = self._html_search_regex( - r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d', - webpage, 'series', fatal=False) - - season = episode = episode_number = duration = None - - if isinstance(metadata, xml.etree.ElementTree.Element): - season = xpath_text(metadata, 'series_title') - episode = xpath_text(metadata, 'episode_title') - episode_number = int_or_none(xpath_text(metadata, 'episode_number')) - duration = float_or_none(media_metadata.get('duration'), 1000) - - if not episode: - episode = media_metadata.get('title') - if not episode_number: - episode_number = int_or_none(media_metadata.get('episode_number')) - thumbnail_url = try_get(media, lambda x: x['thumbnail']['url']) - if thumbnail_url: - thumbnails.append({ - 'url': thumbnail_url, - 'width': 640, - 'height': 360 - }) - - season_number = int_or_none(self._search_regex( - r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', - webpage, 'season number', default=None)) - - info = self._search_json_ld(webpage, video_id, default={}) - - return merge_dicts({ - 'id': video_id, - 'title': video_title, - 'description': video_description, - 'duration': duration, - 'thumbnails': thumbnails, - 'uploader': video_uploader, - 'series': series, - 'season': season, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, - 'subtitles': subtitles, - 'formats': formats, - }, info) - - -class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): - IE_NAME = 'crunchyroll:playlist' - _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:\w{2}(?:-\w{2})?/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)' - - _TESTS = [{ - 'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', - 'info_dict': { - 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', - 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi' - }, - 'playlist_count': 13, - }, { - # geo-restricted (US), 18+ maturity wall, non-premium available - 'url': 'http://www.crunchyroll.com/cosplay-complex-ova', - 'info_dict': { - 'id': 'cosplay-complex-ova', - 'title': 'Cosplay Complex OVA' - }, - 'playlist_count': 3, - 'skip': 'Georestricted', - }, { - # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14 - 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1', - 'only_matching': True, - }, { - 'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers', - 'only_matching': True, - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - - webpage = self._download_webpage( - # https:// gives a 403, but http:// does not - self._add_skip_wall(url).replace('https://', 'http://'), show_id, - headers=self.geo_verification_headers()) - if re.search(r'<div id="preload-data">', webpage): - return self._redirect_to_beta(webpage, CrunchyrollBetaShowIE.ie_key(), show_id) - title = self._html_search_meta('name', webpage, default=None) - - episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"' - season_re = r'<a [^>]+season-dropdown[^>]+>([^<]+)' - paths = re.findall(f'(?s){episode_re}|{season_re}', webpage) - - entries, current_season = [], None - for ep_id, ep, season in paths: - if season: - current_season = season - continue - entries.append(self.url_result( - f'http://www.crunchyroll.com{ep}', CrunchyrollIE.ie_key(), ep_id, season=current_season)) - - return { - '_type': 'playlist', - 'id': show_id, - 'title': title, - 'entries': reversed(entries), - } - - -class CrunchyrollBetaBaseIE(CrunchyrollBaseIE): - params = None - def _get_params(self, lang): - if not CrunchyrollBetaBaseIE.params: - if self._get_cookies(f'https://beta.crunchyroll.com/{lang}').get('etp_rt'): + if not CrunchyrollBaseIE.params: + if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'): grant_type, key = 'etp_rt_cookie', 'accountAuthClientId' else: grant_type, key = 'client_id', 'anonClientId' - initial_state, app_config = self._get_beta_embedded_json(self._download_webpage( - f'https://beta.crunchyroll.com/{lang}', None, note='Retrieving main page'), None) - api_domain = app_config['cxApiParams']['apiDomain'] + initial_state, app_config = self._get_embedded_json(self._download_webpage( + f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None) + api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com') auth_response = self._download_json( f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}', @@ -739,7 +77,7 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE): headers={ 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] }) - cms = traverse_obj(policy_response, 'cms_beta', 'cms') + cms = policy_response.get('cms_web') bucket = cms['bucket'] params = { 'Policy': cms['policy'], @@ -749,19 +87,19 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE): locale = traverse_obj(initial_state, ('localization', 'locale')) if locale: params['locale'] = locale - CrunchyrollBetaBaseIE.params = (api_domain, bucket, params) - return CrunchyrollBetaBaseIE.params + CrunchyrollBaseIE.params = (api_domain, bucket, params) + return CrunchyrollBaseIE.params -class CrunchyrollBetaIE(CrunchyrollBetaBaseIE): - IE_NAME = 'crunchyroll:beta' +class CrunchyrollBetaIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll' _VALID_URL = r'''(?x) - https?://beta\.crunchyroll\.com/ + https?://(?:beta|www)\.crunchyroll\.com/ (?P<lang>(?:\w{2}(?:-\w{2})?/)?) watch/(?P<id>\w+) (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)''' _TESTS = [{ - 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future', + 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future', 'info_dict': { 'id': 'GY2P1Q98Y', 'ext': 'mp4', @@ -777,11 +115,11 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE): 'season_number': 1, 'episode': 'To the Future', 'episode_number': 73, - 'thumbnail': r're:^https://beta.crunchyroll.com/imgsrv/.*\.jpeg$', + 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$', }, 'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'}, }, { - 'url': 'https://beta.crunchyroll.com/watch/GYE5WKQGR', + 'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR', 'info_dict': { 'id': 'GYE5WKQGR', 'ext': 'mp4', @@ -797,12 +135,12 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE): 'season_number': 1, 'episode': 'Porter Robinson presents Shelter the Animation', 'episode_number': 0, - 'thumbnail': r're:^https://beta.crunchyroll.com/imgsrv/.*\.jpeg$', + 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$', }, 'params': {'skip_download': True}, 'skip': 'Video is Premium only', }, { - 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y', + 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y', 'only_matching': True, }, { 'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy', @@ -901,15 +239,15 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE): } -class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE): - IE_NAME = 'crunchyroll:playlist:beta' +class CrunchyrollBetaShowIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll:playlist' _VALID_URL = r'''(?x) - https?://beta\.crunchyroll\.com/ + https?://(?:beta|www)\.crunchyroll\.com/ (?P<lang>(?:\w{2}(?:-\w{2})?/)?) series/(?P<id>\w+) (?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)''' _TESTS = [{ - 'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', + 'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', 'info_dict': { 'id': 'GY19NQ2QR', 'title': 'Girl Friend BETA', @@ -942,7 +280,7 @@ class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE): episode_display_id = episode['slug_title'] yield { '_type': 'url', - 'url': f'https://beta.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}', + 'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}', 'ie_key': CrunchyrollBetaIE.ie_key(), 'id': episode_id, 'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')), From 96b9e9cf62c81b005242da418f092e45709a5123 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 6 Nov 2022 19:05:09 +0000 Subject: [PATCH 1719/2552] [extractor/telegram] Add playlist support and more metadata (#5358) Authored by: bashonly, bsun0000 --- yt_dlp/extractor/telegram.py | 146 +++++++++++++++++++++++++++++------ yt_dlp/utils.py | 4 +- 2 files changed, 123 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py index bb9ca8c45..39f1a628a 100644 --- a/yt_dlp/extractor/telegram.py +++ b/yt_dlp/extractor/telegram.py @@ -1,41 +1,137 @@ +import re + from .common import InfoExtractor -from ..utils import clean_html, get_element_by_class +from ..utils import ( + clean_html, + format_field, + get_element_by_class, + parse_duration, + parse_qs, + traverse_obj, + unified_timestamp, + update_url_query, + url_basename, +) class TelegramEmbedIE(InfoExtractor): IE_NAME = 'telegram:embed' - _VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)' + _VALID_URL = r'https?://t\.me/(?P<channel_id>[^/]+)/(?P<id>\d+)' _TESTS = [{ 'url': 'https://t.me/europa_press/613', + 'md5': 'dd707708aea958c11a590e8068825f22', 'info_dict': { 'id': '613', 'ext': 'mp4', - 'title': 'Europa Press', - 'description': '6ce2d7e8d56eda16d80607b23db7b252', - 'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+', + 'title': 'md5:6ce2d7e8d56eda16d80607b23db7b252', + 'description': 'md5:6ce2d7e8d56eda16d80607b23db7b252', + 'channel_id': 'europa_press', + 'channel': 'Europa Press ✔', + 'thumbnail': r're:^https?://.+', + 'timestamp': 1635631203, + 'upload_date': '20211030', + 'duration': 61, + }, + }, { + # 2-video post + 'url': 'https://t.me/vorposte/29342', + 'info_dict': { + 'id': 'vorposte-29342', + 'title': 'Форпост 29342', + 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc', + }, + 'playlist_count': 2, + 'params': { + 'skip_download': True, + }, + }, { + # 2-video post with --no-playlist + 'url': 'https://t.me/vorposte/29343', + 'md5': '1724e96053c18e788c8464038876e245', + 'info_dict': { + 'id': '29343', + 'ext': 'mp4', + 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc', + 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc', + 'channel_id': 'vorposte', + 'channel': 'Форпост', + 'thumbnail': r're:^https?://.+', + 'timestamp': 1666384480, + 'upload_date': '20221021', + 'duration': 35, + }, + 'params': { + 'noplaylist': True, + } + }, { + # 2-video post with 'single' query param + 'url': 'https://t.me/vorposte/29342?single', + 'md5': 'd20b202f1e41400a9f43201428add18f', + 'info_dict': { + 'id': '29342', + 'ext': 'mp4', + 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc', + 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc', + 'channel_id': 'vorposte', + 'channel': 'Форпост', + 'thumbnail': r're:^https?://.+', + 'timestamp': 1666384480, + 'upload_date': '20221021', + 'duration': 33, }, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, query={'embed': 0}) - webpage_embed = self._download_webpage(url, video_id, query={'embed': 1}, note='Downloading ermbed page') + channel_id, msg_id = self._match_valid_url(url).group('channel_id', 'id') + embed = self._download_webpage( + url, msg_id, query={'embed': '1', 'single': []}, note='Downloading embed frame') - formats = [{ - 'url': self._proto_relative_url(self._search_regex( - '<video[^>]+src="([^"]+)"', webpage_embed, 'source')), - 'ext': 'mp4', - }] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), - 'description': self._html_search_meta( - ['og:description', 'twitter:description'], webpage, - default=clean_html(get_element_by_class('tgme_widget_message_text', webpage_embed))), - 'thumbnail': self._search_regex( - r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', - webpage_embed, 'thumbnail'), - 'formats': formats, + def clean_text(html_class, html): + text = clean_html(get_element_by_class(html_class, html)) + return text.replace('\n', ' ') if text else None + + description = clean_text('tgme_widget_message_text', embed) + message = { + 'title': description or '', + 'description': description, + 'channel': clean_text('tgme_widget_message_author', embed), + 'channel_id': channel_id, + 'timestamp': unified_timestamp(self._search_regex( + r'<time[^>]*datetime="([^"]*)"', embed, 'timestamp', fatal=False)), } + + videos = [] + for video in re.findall(r'<a class="tgme_widget_message_video_player(?s:.+?)</time>', embed): + video_url = self._search_regex( + r'<video[^>]+src="([^"]+)"', video, 'video URL', fatal=False) + webpage_url = self._search_regex( + r'<a class="tgme_widget_message_video_player[^>]+href="([^"]+)"', + video, 'webpage URL', fatal=False) + if not video_url or not webpage_url: + continue + formats = [{ + 'url': video_url, + 'ext': 'mp4', + }] + self._sort_formats(formats) + videos.append({ + 'id': url_basename(webpage_url), + 'webpage_url': update_url_query(webpage_url, {'single': True}), + 'duration': parse_duration(self._search_regex( + r'<time[^>]+duration[^>]*>([\d:]+)</time>', video, 'duration', fatal=False)), + 'thumbnail': self._search_regex( + r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', + video, 'thumbnail', fatal=False), + 'formats': formats, + **message, + }) + + playlist_id = None + if len(videos) > 1 and 'single' not in parse_qs(url, keep_blank_values=True): + playlist_id = f'{channel_id}-{msg_id}' + + if self._yes_playlist(playlist_id, msg_id): + return self.playlist_result( + videos, playlist_id, format_field(message, 'channel', f'%s {msg_id}'), description) + else: + return traverse_obj(videos, lambda _, x: x['id'] == msg_id, get_all=False) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index cfc7ba63a..84a8ecd6e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3092,8 +3092,8 @@ def escape_url(url): ).geturl() -def parse_qs(url): - return urllib.parse.parse_qs(urllib.parse.urlparse(url).query) +def parse_qs(url, **kwargs): + return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs) def read_batch_urls(batch_fd): From ac8e69dd3238c03eb40c267a090173abaac99a3a Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Mon, 7 Nov 2022 09:30:55 +1300 Subject: [PATCH 1720/2552] Do not backport Python 3.10 SSL configuration for LibreSSL (#5464) Until further investigation. Fixes regression in https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4 Authored by: coletdjnz --- yt_dlp/utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 84a8ecd6e..1532d22ac 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -986,16 +986,23 @@ def make_HTTPS_handler(params, **kwargs): context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998 context.set_ciphers('DEFAULT') - elif sys.version_info < (3, 10) and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1): + elif ( + sys.version_info < (3, 10) + and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1) + and not ssl.OPENSSL_VERSION.startswith('LibreSSL') + ): # Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1]. # This is to ensure consistent behavior across Python versions, and help avoid fingerprinting # in some situations [2][3]. # Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely # untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe. + # LibreSSL is excluded until further investigation due to cipher support issues [5][6]. # 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536 # 2. https://github.com/yt-dlp/yt-dlp/issues/4627 # 3. https://github.com/yt-dlp/yt-dlp/pull/5294 # 4. https://peps.python.org/pep-0644/ + # 5. https://peps.python.org/pep-0644/#libressl-support + # 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368 context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM') context.minimum_version = ssl.TLSVersion.TLSv1_2 From a349d4d6415e9aa0fb11c674e405d57fa13cc7fd Mon Sep 17 00:00:00 2001 From: zulaport <70630440+zulaport@users.noreply.github.com> Date: Sun, 6 Nov 2022 12:39:09 -0800 Subject: [PATCH 1721/2552] [extractor/stripchat] Fix hostname for HLS stream (#5445) Closes #5227 Authored by: zulaport --- yt_dlp/extractor/stripchat.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index 2e84729bd..8cd8388aa 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -1,22 +1,15 @@ from .common import InfoExtractor -from ..compat import ( - compat_str, -) -from ..utils import ( - ExtractorError, - lowercase_escape, - try_get, -) +from ..utils import ExtractorError, lowercase_escape, traverse_obj class StripchatIE(InfoExtractor): _VALID_URL = r'https?://stripchat\.com/(?P<id>[^/?#]+)' _TESTS = [{ - 'url': 'https://stripchat.com/feel_me', + 'url': 'https://stripchat.com/Joselin_Flower', 'info_dict': { - 'id': 'feel_me', + 'id': 'Joselin_Flower', 'ext': 'mp4', - 'title': 're:^feel_me [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'title': 're:^Joselin_Flower [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': str, 'is_live': True, 'age_limit': 18, @@ -39,18 +32,22 @@ class StripchatIE(InfoExtractor): if not data: raise ExtractorError('Unable to find configuration for stream.') - if try_get(data, lambda x: x['viewCam']['show'], dict): + if traverse_obj(data, ('viewCam', 'show'), expected_type=dict): raise ExtractorError('Model is in private show', expected=True) - elif not try_get(data, lambda x: x['viewCam']['model']['isLive'], bool): + elif not traverse_obj(data, ('viewCam', 'model', 'isLive'), expected_type=bool): raise ExtractorError('Model is offline', expected=True) - server = try_get(data, lambda x: x['viewCam']['viewServers']['flashphoner-hls'], compat_str) - host = try_get(data, lambda x: x['config']['data']['hlsStreamHost'], compat_str) - model_id = try_get(data, lambda x: x['viewCam']['model']['id'], int) + server = traverse_obj(data, ('viewCam', 'viewServers', 'flashphoner-hls'), expected_type=str) + model_id = traverse_obj(data, ('viewCam', 'model', 'id'), expected_type=int) + + for host in traverse_obj(data, ( + 'config', 'data', (('featuresV2', 'hlsFallback', 'fallbackDomains', ...), 'hlsStreamHost'))): + formats = self._extract_m3u8_formats( + f'https://b-{server}.{host}/hls/{model_id}/{model_id}.m3u8', + video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) + if formats: + break - formats = self._extract_m3u8_formats( - 'https://b-%s.%s/hls/%d/%d.m3u8' % (server, host, model_id, model_id), - video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) self._sort_formats(formats) return { From db4678e448d6e7da9743f4028c94b540fcafc528 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 7 Nov 2022 01:16:33 +0530 Subject: [PATCH 1722/2552] Update to ytdl-commit-de39d128 [extractor/ceskatelevize] Back-port extractor from yt-dlp https://github.com/ytdl-org/youtube-dl/commit/de39d1281cea499cb1adfce5ff7e0a56f1bad5fe Closes #5361, Closes #4634, Closes #5210 --- test/test_aes.py | 4 +- yt_dlp/aes.py | 20 +++- yt_dlp/compat/_legacy.py | 1 + yt_dlp/extractor/adn.py | 44 ++++---- yt_dlp/extractor/ceskatelevize.py | 71 +++++++++---- yt_dlp/extractor/manyvids.py | 123 +++++++++++++++++----- yt_dlp/extractor/motherless.py | 29 +++--- yt_dlp/extractor/neteasemusic.py | 164 ++++++++++++++++++++++-------- yt_dlp/extractor/nrk.py | 3 +- yt_dlp/extractor/vimeo.py | 2 +- yt_dlp/extractor/zdf.py | 130 +++++++++++------------ yt_dlp/utils.py | 3 +- 12 files changed, 389 insertions(+), 205 deletions(-) diff --git a/test/test_aes.py b/test/test_aes.py index b26af5605..8e8fc0b3e 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -11,7 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import base64 from yt_dlp.aes import ( - BLOCK_SIZE_BYTES, aes_cbc_decrypt, aes_cbc_decrypt_bytes, aes_cbc_encrypt, @@ -103,8 +102,7 @@ class TestAES(unittest.TestCase): def test_ecb_encrypt(self): data = bytes_to_intlist(self.secret_msg) - data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES) - encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv)) + encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) self.assertEqual( encrypted, b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index b428c682b..60ce99cb1 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -28,11 +28,23 @@ def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) +BLOCK_SIZE_BYTES = 16 + + def unpad_pkcs7(data): return data[:-compat_ord(data[-1])] -BLOCK_SIZE_BYTES = 16 +def pkcs7_padding(data): + """ + PKCS#7 padding + + @param {int[]} data cleartext + @returns {int[]} padding data + """ + + remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES + return data + [remaining_length] * remaining_length def pad_block(block, padding_mode): @@ -64,7 +76,7 @@ def pad_block(block, padding_mode): def aes_ecb_encrypt(data, key, iv=None): """ - Encrypt with aes in ECB mode + Encrypt with aes in ECB mode. Using PKCS#7 padding @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @@ -77,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None): encrypted_data = [] for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - encrypted_data += aes_encrypt(block, expanded_key) - encrypted_data = encrypted_data[:len(data)] + encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key) return encrypted_data @@ -551,5 +562,6 @@ __all__ = [ 'key_expansion', 'pad_block', + 'pkcs7_padding', 'unpad_pkcs7', ] diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index 09259c988..d19333d31 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -48,6 +48,7 @@ def compat_setenv(key, value, env=os.environ): compat_basestring = str +compat_casefold = str.casefold compat_chr = chr compat_collections_abc = collections.abc compat_cookiejar = http.cookiejar diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 18ddc5729..16f648de3 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -28,30 +28,34 @@ from ..utils import ( class ADNIE(InfoExtractor): - IE_DESC = 'Anime Digital Network' - _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' - _TEST = { - 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', - 'md5': '0319c99885ff5547565cacb4f3f9348d', + IE_DESC = 'Animation Digital Network' + _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir', + 'md5': '1c9ef066ceb302c86f80c2b371615261', 'info_dict': { - 'id': '7778', + 'id': '9841', 'ext': 'mp4', - 'title': 'Blue Exorcist - Kyôto Saga - Episode 1', - 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', - 'series': 'Blue Exorcist - Kyôto Saga', - 'duration': 1467, - 'release_date': '20170106', + 'title': 'Fruits Basket - Episode 1', + 'description': 'md5:14be2f72c3c96809b0ca424b0097d336', + 'series': 'Fruits Basket', + 'duration': 1437, + 'release_date': '20190405', 'comment_count': int, 'average_rating': float, - 'season_number': 2, - 'episode': 'Début des hostilités', + 'season_number': 1, + 'episode': 'À ce soir !', 'episode_number': 1, - } - } + }, + 'skip': 'Only available in region (FR, ...)', + }, { + 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', + 'only_matching': True, + }] - _NETRC_MACHINE = 'animedigitalnetwork' - _BASE_URL = 'http://animedigitalnetwork.fr' - _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/' + _NETRC_MACHINE = 'animationdigitalnetwork' + _BASE = 'animationdigitalnetwork.fr' + _API_BASE_URL = 'https://gw.api.' + _BASE + '/' _PLAYER_BASE_URL = _API_BASE_URL + 'player/' _HEADERS = {} _LOGIN_ERR_MESSAGE = 'Unable to log in' @@ -75,11 +79,11 @@ class ADNIE(InfoExtractor): if subtitle_location: enc_subtitles = self._download_webpage( subtitle_location, video_id, 'Downloading subtitles data', - fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'}) + fatal=False, headers={'Origin': 'https://' + self._BASE}) if not enc_subtitles: return None - # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js + # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( compat_b64decode(enc_subtitles[24:]), binascii.unhexlify(self._K + '7fac1178830cfe0c'), diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 331b350f1..5f4c447f2 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -9,6 +9,7 @@ from ..utils import ( ExtractorError, float_or_none, sanitized_Request, + str_or_none, traverse_obj, urlencode_postdata, USER_AGENTS, @@ -16,13 +17,13 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'info_dict': { 'id': '61924494877028507', 'ext': 'mp4', - 'title': 'Hyde Park Civilizace: Bonus 01 - En', + 'title': 'Bonus 01 - En - Hyde Park Civilizace', 'description': 'English Subtittles', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 81.3, @@ -33,18 +34,29 @@ class CeskaTelevizeIE(InfoExtractor): }, }, { # live stream - 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', + 'url': 'http://www.ceskatelevize.cz/zive/ct1/', 'info_dict': { - 'id': 402, + 'id': '102', 'ext': 'mp4', - 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'title': r'ČT1 - živé vysílání online', + 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, - 'skip': 'Georestricted to Czech Republic', + }, { + # another + 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', + 'only_matching': True, + 'info_dict': { + 'id': 402, + 'ext': 'mp4', + 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'is_live': True, + }, + # 'skip': 'Georestricted to Czech Republic', }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 'only_matching': True, @@ -53,21 +65,21 @@ class CeskaTelevizeIE(InfoExtractor): 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 'info_dict': { 'id': '215562210900007-bogotart', - 'title': 'Queer: Bogotart', - 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko', + 'title': 'Bogotart - Queer', + 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti', }, 'playlist': [{ 'info_dict': { 'id': '61924494877311053', 'ext': 'mp4', - 'title': 'Queer: Bogotart (Varování 18+)', + 'title': 'Bogotart - Queer (Varování 18+)', 'duration': 11.9, }, }, { 'info_dict': { 'id': '61924494877068022', 'ext': 'mp4', - 'title': 'Queer: Bogotart (Queer)', + 'title': 'Bogotart - Queer (Queer)', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 1558.3, }, @@ -84,28 +96,42 @@ class CeskaTelevizeIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - parsed_url = compat_urllib_parse_urlparse(url) - webpage = self._download_webpage(url, playlist_id) - site_name = self._og_search_property('site_name', webpage, fatal=False, default=None) + webpage, urlh = self._download_webpage_handle(url, playlist_id) + parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) + site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') playlist_title = self._og_search_title(webpage, default=None) if site_name and playlist_title: - playlist_title = playlist_title.replace(f' — {site_name}', '', 1) + playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] playlist_description = self._og_search_description(webpage, default=None) if playlist_description: playlist_description = playlist_description.replace('\xa0', ' ') - if parsed_url.path.startswith('/porady/'): + type_ = 'IDEC' + if re.search(r'(^/porady|/zive)/', parsed_url.path): next_data = self._search_nextjs_data(webpage, playlist_id) - idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) + if '/zive/' in parsed_url.path: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) + else: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) + if not idec: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) + if idec: + type_ = 'bonus' if not idec: raise ExtractorError('Failed to find IDEC id') - iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id) - webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, - query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec}) + iframe_hash = self._download_webpage( + 'https://www.ceskatelevize.cz/v-api/iframe-hash/', + playlist_id, note='Getting IFRAME hash') + query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } + webpage = self._download_webpage( + 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', + playlist_id, note='Downloading player', query=query) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s</p>' % NOT_AVAILABLE_STRING in webpage: - raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + self.raise_geo_restricted(NOT_AVAILABLE_STRING) + if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): + raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) type_ = None episode_id = None @@ -174,7 +200,6 @@ class CeskaTelevizeIE(InfoExtractor): is_live = item.get('type') == 'LIVE' formats = [] for format_id, stream_url in item.get('streamUrls', {}).items(): - stream_url = stream_url.replace('https://', 'http://') if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', 'm3u8_native', @@ -196,7 +221,7 @@ class CeskaTelevizeIE(InfoExtractor): entries[num]['formats'].extend(formats) continue - item_id = item.get('id') or item['assetId'] + item_id = str_or_none(item.get('id') or item['assetId']) title = item['title'] duration = float_or_none(item.get('duration')) @@ -227,6 +252,8 @@ class CeskaTelevizeIE(InfoExtractor): for e in entries: self._sort_formats(e['formats']) + if len(entries) == 1: + return entries[0] return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) def _get_subtitles(self, episode_id, subs): diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index 1f537d267..c713805c5 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -1,8 +1,12 @@ +import re + from .common import InfoExtractor from ..utils import ( determine_ext, + extract_attributes, int_or_none, str_to_int, + url_or_none, urlencode_postdata, ) @@ -17,17 +21,20 @@ class ManyVidsIE(InfoExtractor): 'id': '133957', 'ext': 'mp4', 'title': 'everthing about me (Preview)', + 'uploader': 'ellyxxix', 'view_count': int, 'like_count': int, }, }, { # full video 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', - 'md5': 'f3e8f7086409e9b470e2643edb96bdcc', + 'md5': 'bb47bab0e0802c2a60c24ef079dfe60f', 'info_dict': { 'id': '935718', 'ext': 'mp4', 'title': 'MY FACE REVEAL', + 'description': 'md5:ec5901d41808b3746fed90face161612', + 'uploader': 'Sarah Calanthe', 'view_count': int, 'like_count': int, }, @@ -36,17 +43,50 @@ class ManyVidsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, ) + try: + webpage = self._download_webpage(real_url, video_id) + except Exception: + # probably useless fallback + webpage = self._download_webpage(url, video_id) + + info = self._search_regex( + r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''', + webpage, 'meta details', default='') + info = extract_attributes(info) + + player = self._search_regex( + r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''', + webpage, 'player details', default='') + player = extract_attributes(player) + + video_urls_and_ids = ( + (info.get('data-meta-video'), 'video'), + (player.get('data-video-transcoded'), 'transcoded'), + (player.get('data-video-filepath'), 'filepath'), + (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'), + ) + + def txt_or_none(s, default=None): + return (s.strip() or default) if isinstance(s, compat_str) else default + + uploader = txt_or_none(info.get('data-meta-author')) - video_url = self._search_regex( - r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1', - webpage, 'video URL', group='url') + def mung_title(s): + if uploader: + s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s) + return txt_or_none(s) - title = self._html_search_regex( - (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)', - r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'), - webpage, 'title', default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', fatal=True) + title = ( + mung_title(info.get('data-meta-title')) + or self._html_search_regex( + (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)', + r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'), + webpage, 'title', default=None) + or self._html_search_meta( + 'twitter:title', webpage, 'title', fatal=True)) + + title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title if any(p in webpage for p in ('preview_videos', '_preview.mp4')): title += ' (Preview)' @@ -59,7 +99,8 @@ class ManyVidsIE(InfoExtractor): # Sets some cookies self._download_webpage( 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', - video_id, fatal=False, data=urlencode_postdata({ + video_id, note='Setting format cookies', fatal=False, + data=urlencode_postdata({ 'mvtoken': mv_token, 'vid': video_id, }), headers={ @@ -67,24 +108,56 @@ class ManyVidsIE(InfoExtractor): 'X-Requested-With': 'XMLHttpRequest' }) - if determine_ext(video_url) == 'm3u8': - formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - else: - formats = [{'url': video_url}] + formats = [] + for v_url, fmt in video_urls_and_ids: + v_url = url_or_none(v_url) + if not v_url: + continue + if determine_ext(v_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + v_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls')) + else: + formats.append({ + 'url': v_url, + 'format_id': fmt, + }) + + self._remove_duplicate_formats(formats) + + for f in formats: + if f.get('height') is None: + f['height'] = int_or_none( + self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None)) + if '/preview/' in f['url']: + f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview'))) + f['preference'] = -10 + if 'transcoded' in f['format_id']: + f['preference'] = f.get('preference', -1) - 1 + + self._sort_formats(formats) + + def get_likes(): + likes = self._search_regex( + r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ), + webpage, 'likes', default='') + likes = extract_attributes(likes) + return int_or_none(likes.get('data-likes')) - like_count = int_or_none(self._search_regex( - r'data-likes=["\'](\d+)', webpage, 'like count', default=None)) - view_count = str_to_int(self._html_search_regex( - r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage, - 'view count', default=None)) + def get_views(): + return str_to_int(self._html_search_regex( + r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''', + webpage, 'view count', default=None)) return { 'id': video_id, 'title': title, - 'view_count': view_count, - 'like_count': like_count, 'formats': formats, - 'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'), + 'description': txt_or_none(info.get('data-meta-description')), + 'uploader': txt_or_none(info.get('data-meta-author')), + 'thumbnail': ( + url_or_none(info.get('data-meta-image')) + or url_or_none(player.get('data-video-screenshot'))), + 'view_count': get_views(), + 'like_count': get_likes(), } diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py index 9e53a8a97..c24ef9b0d 100644 --- a/yt_dlp/extractor/motherless.py +++ b/yt_dlp/extractor/motherless.py @@ -69,7 +69,7 @@ class MotherlessIE(InfoExtractor): 'title': 'a/ Hot Teens', 'categories': list, 'upload_date': '20210104', - 'uploader_id': 'yonbiw', + 'uploader_id': 'anonymous', 'thumbnail': r're:https?://.*\.jpg', 'age_limit': 18, }, @@ -123,11 +123,12 @@ class MotherlessIE(InfoExtractor): kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta} upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d') - comment_count = webpage.count('class="media-comment-contents"') + comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage)) uploader_id = self._html_search_regex( - (r'"media-meta-member">\s+<a href="/m/([^"]+)"', - r'<span\b[^>]+\bclass="username">([^<]+)</span>'), + (r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''', + r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''), webpage, 'uploader_id', fatal=False) + categories = self._html_search_meta('keywords', webpage, default=None) if categories: categories = [cat.strip() for cat in categories.split(',')] @@ -217,23 +218,23 @@ class MotherlessGroupIE(InfoExtractor): r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False) description = self._html_search_meta( 'description', webpage, fatal=False) - page_count = self._int(self._search_regex( - r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">', - webpage, 'page_count', default=0), 'page_count') + page_count = str_to_int(self._search_regex( + r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b', + webpage, 'page_count', default=0)) if not page_count: message = self._search_regex( - r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*', + r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''', webpage, 'error_msg', default=None) or 'This group has no videos.' self.report_warning(message, group_id) + page_count = 1 PAGE_SIZE = 80 def _get_page(idx): - if not page_count: - return - webpage = self._download_webpage( - page_url, group_id, query={'page': idx + 1}, - note='Downloading page %d/%d' % (idx + 1, page_count) - ) + if idx > 0: + webpage = self._download_webpage( + page_url, group_id, query={'page': idx + 1}, + note='Downloading page %d/%d' % (idx + 1, page_count) + ) for entry in self._extract_entries(webpage, url): yield entry diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index f9a67876a..44fa60ce9 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -1,12 +1,25 @@ -import itertools +import json import re +import time from base64 import b64encode +from binascii import hexlify from datetime import datetime from hashlib import md5 +from random import randint from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_urlencode -from ..utils import float_or_none, sanitized_Request +from ..aes import aes_ecb_encrypt, pkcs7_padding +from ..compat import compat_urllib_parse_urlencode +from ..utils import ( + ExtractorError, + bytes_to_intlist, + error_to_compat_str, + float_or_none, + int_or_none, + intlist_to_bytes, + sanitized_Request, + try_get, +) class NetEaseMusicBaseIE(InfoExtractor): @@ -17,7 +30,7 @@ class NetEaseMusicBaseIE(InfoExtractor): @classmethod def _encrypt(cls, dfsid): salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8')) - string_bytes = bytearray(compat_str(dfsid).encode('ascii')) + string_bytes = bytearray(str(dfsid).encode('ascii')) salt_len = len(salt_bytes) for i in range(len(string_bytes)): string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len] @@ -26,32 +39,106 @@ class NetEaseMusicBaseIE(InfoExtractor): result = b64encode(m.digest()).decode('ascii') return result.replace('/', '_').replace('+', '-') + @classmethod + def make_player_api_request_data_and_headers(cls, song_id, bitrate): + KEY = b'e82ckenh8dichen8' + URL = '/api/song/enhance/player/url' + now = int(time.time() * 1000) + rand = randint(0, 1000) + cookie = { + 'osver': None, + 'deviceId': None, + 'appver': '8.0.0', + 'versioncode': '140', + 'mobilename': None, + 'buildver': '1623435496', + 'resolution': '1920x1080', + '__csrf': '', + 'os': 'pc', + 'channel': None, + 'requestId': '{0}_{1:04}'.format(now, rand), + } + request_text = json.dumps( + {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie}, + separators=(',', ':')) + message = 'nobody{0}use{1}md5forencrypt'.format( + URL, request_text).encode('latin1') + msg_digest = md5(message).hexdigest() + + data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format( + URL, request_text, msg_digest) + data = pkcs7_padding(bytes_to_intlist(data)) + encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY))) + encrypted_params = hexlify(encrypted).decode('ascii').upper() + + cookie = '; '.join( + ['{0}={1}'.format(k, v if v is not None else 'undefined') + for [k, v] in cookie.items()]) + + headers = { + 'User-Agent': self.extractor.get_param('http_headers')['User-Agent'], + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': 'https://music.163.com', + 'Cookie': cookie, + } + return ('params={0}'.format(encrypted_params), headers) + + def _call_player_api(self, song_id, bitrate): + url = 'https://interface3.music.163.com/eapi/song/enhance/player/url' + data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate) + try: + msg = 'empty result' + result = self._download_json( + url, song_id, data=data.encode('ascii'), headers=headers) + if result: + return result + except ExtractorError as e: + if type(e.cause) in (ValueError, TypeError): + # JSON load failure + raise + except Exception as e: + msg = error_to_compat_str(e) + self.report_warning('%s API call (%s) failed: %s' % ( + song_id, bitrate, msg)) + return {} + def extract_formats(self, info): + err = 0 formats = [] + song_id = info['id'] for song_format in self._FORMATS: details = info.get(song_format) if not details: continue - song_file_path = '/%s/%s.%s' % ( - self._encrypt(details['dfsId']), details['dfsId'], details['extension']) - - # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature - # from NetEase's CDN provider that can be used if m5.music.126.net does not - # work, especially for users outside of Mainland China - # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880 - for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net', - 'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'): - song_url = host + song_file_path + + bitrate = int_or_none(details.get('bitrate')) or 999000 + data = self._call_player_api(song_id, bitrate) + for song in try_get(data, lambda x: x['data'], list) or []: + song_url = try_get(song, lambda x: x['url']) + if not song_url: + continue if self._is_valid_url(song_url, info['id'], 'song'): formats.append({ 'url': song_url, 'ext': details.get('extension'), - 'abr': float_or_none(details.get('bitrate'), scale=1000), + 'abr': float_or_none(song.get('br'), scale=1000), 'format_id': song_format, - 'filesize': details.get('size'), - 'asr': details.get('sr') + 'filesize': int_or_none(song.get('size')), + 'asr': int_or_none(details.get('sr')), }) - break + elif err == 0: + err = try_get(song, lambda x: x['code'], int) + + if not formats: + msg = 'No media links found' + if err != 0 and (err < 200 or err >= 400): + raise ExtractorError( + '%s (site code %d)' % (msg, err, ), expected=True) + else: + self.raise_geo_restricted( + msg + ': probably this video is not available from your location due to geo restriction.', + countries=['CN']) + return formats @classmethod @@ -67,33 +154,19 @@ class NetEaseMusicBaseIE(InfoExtractor): class NetEaseMusicIE(NetEaseMusicBaseIE): IE_NAME = 'netease:song' IE_DESC = '网易云音乐' - _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://music.163.com/#/song?id=32102397', - 'md5': 'f2e97280e6345c74ba9d5677dd5dcb45', + 'md5': '3e909614ce09b1ccef4a3eb205441190', 'info_dict': { 'id': '32102397', 'ext': 'mp3', - 'title': 'Bad Blood (feat. Kendrick Lamar)', + 'title': 'Bad Blood', 'creator': 'Taylor Swift / Kendrick Lamar', - 'upload_date': '20150517', - 'timestamp': 1431878400, - 'description': 'md5:a10a54589c2860300d02e1de821eb2ef', - }, - 'skip': 'Blocked outside Mainland China', - }, { - 'note': 'No lyrics translation.', - 'url': 'http://music.163.com/#/song?id=29822014', - 'info_dict': { - 'id': '29822014', - 'ext': 'mp3', - 'title': '听见下雨的声音', - 'creator': '周杰伦', - 'upload_date': '20141225', - 'timestamp': 1419523200, - 'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c', + 'upload_date': '20150516', + 'timestamp': 1431792000, + 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c', }, - 'skip': 'Blocked outside Mainland China', }, { 'note': 'No lyrics.', 'url': 'http://music.163.com/song?id=17241424', @@ -103,9 +176,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'title': 'Opus 28', 'creator': 'Dustin O\'Halloran', 'upload_date': '20080211', + 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4', 'timestamp': 1202745600, }, - 'skip': 'Blocked outside Mainland China', }, { 'note': 'Has translated name.', 'url': 'http://music.163.com/#/song?id=22735043', @@ -119,7 +192,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'timestamp': 1264608000, 'alt_title': '说出愿望吧(Genie)', }, - 'skip': 'Blocked outside Mainland China', + }, { + 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', + 'md5': '95826c73ea50b1c288b22180ec9e754d', + 'info_dict': { + 'id': '95670', + 'ext': 'mp3', + 'title': '国际歌', + 'creator': '马备', + 'upload_date': '19911130', + 'timestamp': 691516800, + 'description': 'md5:1ba2f911a2b0aa398479f595224f2141', + }, }] def _process_lyrics(self, lyrics_info): diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index fcbafe418..7eb5b21cb 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -58,8 +58,7 @@ class NRKBaseIE(InfoExtractor): return self._download_json( urljoin('https://psapi.nrk.no/', path), video_id, note or 'Downloading %s JSON' % item, - fatal=fatal, query=query, - headers={'Accept-Encoding': 'gzip, deflate, br'}) + fatal=fatal, query=query) class NRKIE(NRKBaseIE): diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 25d2f200f..2e36b8861 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -870,7 +870,7 @@ class VimeoIE(VimeoBaseInfoExtractor): if '://player.vimeo.com/video/' in url: config = self._parse_json(self._search_regex( - r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) + r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) if config.get('view') == 4: config = self._verify_player_video_password( redirect_url, video_id, headers) diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 3a7f01f7a..1eab384b9 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -3,13 +3,14 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + NO_DEFAULT, + ExtractorError, determine_ext, + extract_attributes, float_or_none, int_or_none, join_nonempty, merge_dicts, - NO_DEFAULT, - orderedSet, parse_codecs, qualities, traverse_obj, @@ -188,7 +189,7 @@ class ZDFIE(ZDFBaseIE): }, }, { 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', - 'md5': '57af4423db0455a3975d2dc4578536bc', + 'md5': '1b93bdec7d02fc0b703c5e7687461628', 'info_dict': { 'ext': 'mp4', 'id': 'video_funk_1770473', @@ -250,17 +251,15 @@ class ZDFIE(ZDFBaseIE): title = content.get('title') or content['teaserHeadline'] t = content['mainVideoContent']['http://zdf.de/rels/target'] - - ptmd_path = t.get('http://zdf.de/rels/streams/ptmd') - + ptmd_path = traverse_obj(t, ( + (('streams', 'default'), None), + ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template') + ), get_all=False) if not ptmd_path: - ptmd_path = traverse_obj( - t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'), - 'http://zdf.de/rels/streams/ptmd-template').replace( - '{playerId}', 'ngplayer_2_4') + raise ExtractorError('Could not extract ptmd_path') info = self._extract_ptmd( - urljoin(url, ptmd_path), video_id, player['apiToken'], url) + urljoin(url, ptmd_path.replace('{playerId}', 'ngplayer_2_4')), video_id, player['apiToken'], url) thumbnails = [] layouts = try_get( @@ -309,15 +308,16 @@ class ZDFIE(ZDFBaseIE): 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, video_id) - document = video['document'] - - title = document['titel'] - content_id = document['basename'] - formats = [] - format_urls = set() - for f in document['formitaeten']: - self._extract_format(content_id, formats, format_urls, f) + formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list) + document = formitaeten and video['document'] + if formitaeten: + title = document['titel'] + content_id = document['basename'] + + format_urls = set() + for f in formitaeten or []: + self._extract_format(content_id, formats, format_urls, f) self._sort_formats(formats) thumbnails = [] @@ -364,9 +364,9 @@ class ZDFChannelIE(ZDFBaseIE): 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', 'info_dict': { 'id': 'das-aktuelle-sportstudio', - 'title': 'das aktuelle sportstudio | ZDF', + 'title': 'das aktuelle sportstudio', }, - 'playlist_mincount': 23, + 'playlist_mincount': 18, }, { 'url': 'https://www.zdf.de/dokumentation/planet-e', 'info_dict': { @@ -374,6 +374,14 @@ class ZDFChannelIE(ZDFBaseIE): 'title': 'planet e.', }, 'playlist_mincount': 50, + }, { + 'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest', + 'info_dict': { + 'id': 'aktenzeichen-xy-ungeloest', + 'title': 'Aktenzeichen XY... ungelöst', + 'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)", + }, + 'playlist_mincount': 2, }, { 'url': 'https://www.zdf.de/filme/taunuskrimi/', 'only_matching': True, @@ -383,60 +391,36 @@ class ZDFChannelIE(ZDFBaseIE): def suitable(cls, url): return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url) + def _og_search_title(self, webpage, fatal=False): + title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal) + return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None + def _real_extract(self, url): channel_id = self._match_id(url) webpage = self._download_webpage(url, channel_id) - entries = [ - self.url_result(item_url, ie=ZDFIE.ie_key()) - for item_url in orderedSet(re.findall( - r'data-plusbar-url=["\'](http.+?\.html)', webpage))] - - return self.playlist_result( - entries, channel_id, self._og_search_title(webpage, fatal=False)) - - r""" - player = self._extract_player(webpage, channel_id) - - channel_id = self._search_regex( - r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage, - 'channel id', group='id') - - channel = self._call_api( - 'https://api.zdf.de/content/documents/%s.json' % channel_id, - player, url, channel_id) - - items = [] - for module in channel['module']: - for teaser in try_get(module, lambda x: x['teaser'], list) or []: - t = try_get( - teaser, lambda x: x['http://zdf.de/rels/target'], dict) - if not t: - continue - items.extend(try_get( - t, - lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'], - list) or []) - items.extend(try_get( - module, - lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'], - list) or []) - - entries = [] - entry_urls = set() - for item in items: - t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict) - if not t: - continue - sharing_url = t.get('http://zdf.de/rels/sharing-url') - if not sharing_url or not isinstance(sharing_url, compat_str): - continue - if sharing_url in entry_urls: - continue - entry_urls.add(sharing_url) - entries.append(self.url_result( - sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id'))) - - return self.playlist_result(entries, channel_id, channel.get('title')) - """ + matches = re.finditer( + r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL, + webpage) + + if self._downloader.params.get('noplaylist', False): + entry = next( + (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches), + None) + self.to_screen('Downloading just the main video because of --no-playlist') + if entry: + return entry + else: + self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, )) + + def check_video(m): + v_ref = self._search_regex( + r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ), + webpage, 'check id', default='') + v_ref = extract_attributes(v_ref) + return v_ref.get('data-target-video-type') != 'novideo' + + return self.playlist_from_matches( + (m.group('url') for m in matches if check_video(m)), + channel_id, self._og_search_title(webpage, fatal=False)) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 1532d22ac..4d1247eea 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -685,7 +685,8 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT): return '\0_' return char - if restricted and is_id is NO_DEFAULT: + # Replace look-alike Unicode glyphs + if restricted and (is_id is NO_DEFAULT or not is_id): s = unicodedata.normalize('NFKC', s) s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps result = ''.join(map(replace_insane, s)) From 46d09f87072e112c363f4a573966d8e48a788562 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 7 Nov 2022 02:29:58 +0530 Subject: [PATCH 1723/2552] [cleanup] Lint and misc cleanup --- README.md | 6 ++-- supportedsites.md | 52 ++++++++++++++++---------------- test/helper.py | 4 +-- yt_dlp/extractor/acfun.py | 2 +- yt_dlp/extractor/bilibili.py | 2 +- yt_dlp/extractor/common.py | 3 +- yt_dlp/extractor/manyvids.py | 2 +- yt_dlp/extractor/neteasemusic.py | 4 +-- yt_dlp/extractor/yandexvideo.py | 4 +-- yt_dlp/options.py | 9 +++--- yt_dlp/utils.py | 1 + 11 files changed, 46 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 962543738..e094ccba7 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") [![CI Status](https://img.shields.io/github/workflow/status/yt-dlp/yt-dlp/Core%20Tests/master?label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") -[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") +[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") </div> <!-- MANPAGE: END EXCLUDED SECTION --> @@ -1642,9 +1642,9 @@ The metadata obtained by the extractors can be modified by using `--parse-metada `--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use. -The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. +The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. -Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. +Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. This option also has a few special uses: diff --git a/supportedsites.md b/supportedsites.md index 48888f61f..44fc1d484 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -23,7 +23,7 @@ - **9now.com.au** - **abc.net.au** - **abc.net.au:iview** - - **abc.net.au:iview:showseries** + - **abc.net.au:​iview:showseries** - **abcnews** - **abcnews:video** - **abcotvs**: ABC Owned Television Stations @@ -124,8 +124,8 @@ - **bbc**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC - **bbc.co.uk**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC iPlayer - **bbc.co.uk:article**: BBC articles - - **bbc.co.uk:iplayer:episodes** - - **bbc.co.uk:iplayer:group** + - **bbc.co.uk:​iplayer:episodes** + - **bbc.co.uk:​iplayer:group** - **bbc.co.uk:playlist** - **BBVTV**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>] - **BBVTVLive**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>] @@ -274,7 +274,7 @@ - **crunchyroll**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **crunchyroll:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **crunchyroll:playlist**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - - **crunchyroll:playlist:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] + - **crunchyroll:​playlist:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 @@ -483,7 +483,7 @@ - **Golem** - **goodgame:stream** - **google:podcasts** - - **google:podcasts:feed** + - **google:​podcasts:feed** - **GoogleDrive** - **GoogleDrive:Folder** - **GoPlay**: [<abbr title="netrc machine"><em>goplay</em></abbr>] @@ -618,7 +618,7 @@ - **kuwo:singer**: 酷我音乐 - 歌手 - **kuwo:song**: 酷我音乐 - **la7.it** - - **la7.it:pod:episode** + - **la7.it:​pod:episode** - **la7.it:podcast** - **laola1tv** - **laola1tv:embed** @@ -652,7 +652,7 @@ - **LineLiveChannel** - **LinkedIn**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] - **linkedin:learning**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] - - **linkedin:learning:course**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] + - **linkedin:​learning:course**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] - **LinuxAcademy**: [<abbr title="netrc machine"><em>linuxacademy</em></abbr>] - **Liputan6** - **LiTV** @@ -673,7 +673,7 @@ - **MagentaMusik360** - **mailru**: Видео@Mail.Ru - **mailru:music**: Музыка@Mail.Ru - - **mailru:music:search**: Музыка@Mail.Ru + - **mailru:​music:search**: Музыка@Mail.Ru - **MainStreaming**: MainStreaming Player - **MallTV** - **mangomolo:live** @@ -718,7 +718,7 @@ - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom - **mildom:clip**: Clip in Mildom - - **mildom:user:vod**: Download all VODs from specific user in Mildom + - **mildom:​user:vod**: Download all VODs from specific user in Mildom - **mildom:vod**: VOD in Mildom - **minds** - **minds:channel** @@ -803,7 +803,7 @@ - **navernow** - **NBA** - **nba:watch** - - **nba:watch:collection** + - **nba:​watch:collection** - **NBAChannel** - **NBAEmbed** - **NBAWatchEmbed** @@ -817,7 +817,7 @@ - **NBCStations** - **ndr**: NDR.de - Norddeutscher Rundfunk - **ndr:embed** - - **ndr:embed:base** + - **ndr:​embed:base** - **NDTV** - **Nebula**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>] - **nebula:channel**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>] @@ -869,7 +869,7 @@ - **niconico:tag**: NicoNico video tag URLs - **NiconicoUser** - **nicovideo:search**: Nico video search; "nicosearch:" prefix - - **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix + - **nicovideo:​search:date**: Nico video search, newest first; "nicosearchdate:" prefix - **nicovideo:search_url**: Nico video search URLs - **Nintendo** - **Nitter** @@ -892,7 +892,7 @@ - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **npo.nl:live** - **npo.nl:radio** - - **npo.nl:radio:fragment** + - **npo.nl:​radio:fragment** - **Npr** - **NRK** - **NRKPlaylist** @@ -933,7 +933,7 @@ - **openrec:capture** - **openrec:movie** - **OraTV** - - **orf:fm4:story**: fm4.orf.at stories + - **orf:​fm4:story**: fm4.orf.at stories - **orf:iptv**: iptv.ORF.at - **orf:radio** - **orf:tvthek**: ORF TVthek @@ -981,7 +981,7 @@ - **Pinterest** - **PinterestCollection** - **pixiv:sketch** - - **pixiv:sketch:user** + - **pixiv:​sketch:user** - **Pladform** - **PlanetMarathi** - **Platzi**: [<abbr title="netrc machine"><em>platzi</em></abbr>] @@ -1010,7 +1010,7 @@ - **polskieradio:kierowcow** - **polskieradio:player** - **polskieradio:podcast** - - **polskieradio:podcast:list** + - **polskieradio:​podcast:list** - **PolskieRadioCategory** - **Popcorntimes** - **PopcornTV** @@ -1122,7 +1122,7 @@ - **rtl.nl**: rtl.nl and rtlxl.nl - **rtl2** - **rtl2:you** - - **rtl2:you:series** + - **rtl2:​you:series** - **RTLLuLive** - **RTLLuRadio** - **RTNews** @@ -1198,9 +1198,9 @@ - **Skeb** - **sky.it** - **sky:news** - - **sky:news:story** + - **sky:​news:story** - **sky:sports** - - **sky:sports:news** + - **sky:​sports:news** - **skyacademy.it** - **SkylineWebcams** - **skynewsarabia:article** @@ -1289,7 +1289,7 @@ - **Teachable**: [<abbr title="netrc machine"><em>teachable</em></abbr>] - **TeachableCourse**: [<abbr title="netrc machine"><em>teachable</em></abbr>] - **teachertube**: teachertube.com videos - - **teachertube:user:collection**: teachertube.com user and collection videos + - **teachertube:​user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - **TeamTreeHouse**: [<abbr title="netrc machine"><em>teamtreehouse</em></abbr>] @@ -1614,12 +1614,12 @@ - **XXXYMovies** - **Yahoo**: Yahoo screen and movies - **yahoo:gyao** - - **yahoo:gyao:player** + - **yahoo:​gyao:player** - **yahoo:japannews**: Yahoo! Japan News - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - - **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы - - **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки + - **yandexmusic:​artist:albums**: Яндекс.Музыка - Артист - Альбомы + - **yandexmusic:​artist:tracks**: Яндекс.Музыка - Артист - Треки - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек - **YandexVideo** @@ -1641,14 +1641,14 @@ - **youtube:clip** - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) - - **youtube:music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs + - **youtube:​music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies) - **youtube:playlist**: YouTube playlists - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword - **youtube:search**: YouTube search; "ytsearch:" prefix - - **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix + - **youtube:​search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix - **youtube:search_url**: YouTube search URLs with sorting and filter support - - **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) + - **youtube:​shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) - **youtube:stories**: YouTube channel stories; "ytstories:" prefix - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) - **youtube:tab**: YouTube Tabs diff --git a/test/helper.py b/test/helper.py index e918d8c46..3b3b44580 100644 --- a/test/helper.py +++ b/test/helper.py @@ -260,8 +260,8 @@ def expect_info_dict(self, got_dict, expected_dict): info_dict_str += ''.join( f' {_repr(k)}: {_repr(test_info_dict[k])},\n' for k in missing_keys) - write_string( - '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) + info_dict_str = '\n\'info_dict\': {\n' + info_dict_str + '},\n' + write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr) self.assertFalse( missing_keys, 'Missing keys in test definition: %s' % ( diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py index 92b905fa7..9ec259a75 100644 --- a/yt_dlp/extractor/acfun.py +++ b/yt_dlp/extractor/acfun.py @@ -161,7 +161,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) ac_idx = parse_qs(url).get('ac', [None])[-1] - video_id = f'{video_id}{format_field(ac_idx, template="__%s")}' + video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}' webpage = self._download_webpage(url, video_id) json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a237343c6..de28aa4b7 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -368,7 +368,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): or '正在观看预览,大会员免费看全片' in webpage): self.raise_login_required('This video is for premium members only') - play_info = self._search_json(r'window\.__playinfo__\s*=\s*', webpage, 'play info', video_id)['data'] + play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] formats = self.extract_formats(play_info) if (not formats and '成为大会员抢先看' in webpage and play_info.get('durl') and not play_info.get('dash')): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 84a2b95af..20ed52216 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3725,7 +3725,8 @@ class InfoExtractor: if not cls.working(): desc += ' (**Currently broken**)' if markdown else ' (Currently broken)' - name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME + # Escape emojis. Ref: https://github.com/github/markup/issues/1153 + name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME return f'{name}:{desc}' if desc else name def extract_subtitles(self, *args, **kwargs): diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index c713805c5..63ff5f054 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -68,7 +68,7 @@ class ManyVidsIE(InfoExtractor): ) def txt_or_none(s, default=None): - return (s.strip() or default) if isinstance(s, compat_str) else default + return (s.strip() or default) if isinstance(s, str) else default uploader = txt_or_none(info.get('data-meta-author')) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 44fa60ce9..5cf96ad7e 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -1,3 +1,4 @@ +import itertools import json import re import time @@ -39,8 +40,7 @@ class NetEaseMusicBaseIE(InfoExtractor): result = b64encode(m.digest()).decode('ascii') return result.replace('/', '_').replace('+', '-') - @classmethod - def make_player_api_request_data_and_headers(cls, song_id, bitrate): + def make_player_api_request_data_and_headers(self, song_id, bitrate): KEY = b'e82ckenh8dichen8' URL = '/api/song/enhance/player/url' now = int(time.time() * 1000) diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index 7932edf33..5e6cf6edd 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -255,7 +255,7 @@ class ZenYandexIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - redirect = self._search_json(r'var it\s*=\s*', webpage, 'redirect', id, default={}).get('retpath') + redirect = self._search_json(r'var it\s*=', webpage, 'redirect', id, default={}).get('retpath') if redirect: video_id = self._match_id(redirect) webpage = self._download_webpage(redirect, video_id, note='Redirecting') @@ -373,7 +373,7 @@ class ZenYandexChannelIE(InfoExtractor): item_id = self._match_id(url) webpage = self._download_webpage(url, item_id) redirect = self._search_json( - r'var it\s*=\s*', webpage, 'redirect', item_id, default={}).get('retpath') + r'var it\s*=', webpage, 'redirect', item_id, default={}).get('retpath') if redirect: item_id = self._match_id(redirect) webpage = self._download_webpage(redirect, item_id, note='Redirecting') diff --git a/yt_dlp/options.py b/yt_dlp/options.py index d3dfee820..bee867aa9 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -294,9 +294,10 @@ def create_parser(): aliases = (x if x.startswith('-') else f'--{x}' for x in map(str.strip, aliases.split(','))) try: + args = [f'ARG{i}' for i in range(nargs)] alias_group.add_option( - *aliases, help=opts, nargs=nargs, dest=parser.ALIAS_DEST, type='str' if nargs else None, - metavar=' '.join(f'ARG{i}' for i in range(nargs)), action='callback', + *aliases, nargs=nargs, dest=parser.ALIAS_DEST, type='str' if nargs else None, + metavar=' '.join(args), help=opts.format(*args), action='callback', callback=_alias_callback, callback_kwargs={'opts': opts, 'nargs': nargs}) except Exception as err: raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}') @@ -549,11 +550,11 @@ def create_parser(): selection.add_option( '--min-filesize', metavar='SIZE', dest='min_filesize', default=None, - help='Do not download any videos smaller than SIZE, e.g. 50k or 44.6M') + help='Abort download if filesize is smaller than SIZE, e.g. 50k or 44.6M') selection.add_option( '--max-filesize', metavar='SIZE', dest='max_filesize', default=None, - help='Do not download any videos larger than SIZE, e.g. 50k or 44.6M') + help='Abort download if filesize if larger than SIZE, e.g. 50k or 44.6M') selection.add_option( '--date', metavar='DATE', dest='date', default=None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4d1247eea..d0513496e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -480,6 +480,7 @@ class HTMLBreakOnClosingTagParser(html.parser.HTMLParser): raise self.HTMLBreakOnClosingTagException() +# XXX: This should be far less strict def get_element_text_and_html_by_tag(tag, html): """ For the first element with the specified tag in the passed HTML document From ff48fc04d0001b98a7dcbd30cce67aa1135ef355 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 7 Nov 2022 02:37:23 +0530 Subject: [PATCH 1724/2552] [update] Use error code 100 for update errors This error code was previously used for "Exiting to finish update", but is no longer used Closes #5198 --- yt_dlp/update.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 026bc12aa..70a1d6f7f 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -174,6 +174,7 @@ class Updater: def _report_error(self, msg, expected=False): self.ydl.report_error(msg, tb=False if expected else None) + self.ydl._download_retcode = 100 def _report_permission_error(self, file): self._report_error(f'Unable to write to {file}; Try running as administrator', True) From 5da08bde9e073987d1aae2683235721e4813f9c6 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 7 Nov 2022 02:38:38 +0530 Subject: [PATCH 1725/2552] [extractor/vlive] Extract `release_timestamp` Closes #5424 --- yt_dlp/extractor/vlive.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index c60801417..f4bb079b2 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -13,6 +13,7 @@ from ..utils import ( merge_dicts, str_or_none, strip_or_none, + traverse_obj, try_get, urlencode_postdata, url_or_none, @@ -81,6 +82,13 @@ class VLiveIE(VLiveBaseIE): 'upload_date': '20150817', 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', 'timestamp': 1439816449, + 'like_count': int, + 'channel': 'Girl\'s Day', + 'channel_id': 'FDF27', + 'comment_count': int, + 'release_timestamp': 1439818140, + 'release_date': '20150817', + 'duration': 1014, }, 'params': { 'skip_download': True, @@ -98,6 +106,13 @@ class VLiveIE(VLiveBaseIE): 'upload_date': '20161112', 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', 'timestamp': 1478923074, + 'like_count': int, + 'channel': 'EXO', + 'channel_id': 'F94BD', + 'comment_count': int, + 'release_timestamp': 1478924280, + 'release_date': '20161112', + 'duration': 906, }, 'params': { 'skip_download': True, @@ -169,6 +184,7 @@ class VLiveIE(VLiveBaseIE): 'like_count': int_or_none(video.get('likeCount')), 'comment_count': int_or_none(video.get('commentCount')), 'timestamp': int_or_none(video.get('createdAt'), scale=1000), + 'release_timestamp': int_or_none(traverse_obj(video, 'onAirStartAt', 'willStartAt'), scale=1000), 'thumbnail': video.get('thumb'), } From e9ce4e92501fbe8cc0761ec94f16346d8ba65434 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 7 Nov 2022 02:59:53 +0530 Subject: [PATCH 1726/2552] [extractor/foxnews] Add `FoxNewsVideo` extractor Closes #5133 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/foxnews.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 7612d291d..acec580d5 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -588,6 +588,7 @@ from .foxgay import FoxgayIE from .foxnews import ( FoxNewsIE, FoxNewsArticleIE, + FoxNewsVideoIE, ) from .foxsports import FoxSportsIE from .fptplay import FptplayIE diff --git a/yt_dlp/extractor/foxnews.py b/yt_dlp/extractor/foxnews.py index a0b116608..52172aace 100644 --- a/yt_dlp/extractor/foxnews.py +++ b/yt_dlp/extractor/foxnews.py @@ -75,6 +75,29 @@ class FoxNewsIE(AMPIE): return info +class FoxNewsVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?foxnews\.com/video/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.foxnews.com/video/6313058664112', + 'info_dict': { + 'id': '6313058664112', + 'ext': 'mp4', + 'thumbnail': r're:https://.+/1280x720/match/image\.jpg', + 'upload_date': '20220930', + 'description': 'New York City, Kids Therapy, Biden', + 'duration': 2415, + 'title': 'Gutfeld! - Thursday, September 29', + 'timestamp': 1664527538, + }, + 'expected_warnings': ['Ignoring subtitle tracks'], + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.url_result(f'https://video.foxnews.com/v/{video_id}', FoxNewsIE, video_id) + + class FoxNewsArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)' IE_NAME = 'foxnews:article' From c7e4ab278a19e0d4e0eb9626660a4634df964364 Mon Sep 17 00:00:00 2001 From: Lesmiscore <nao20010128@gmail.com> Date: Mon, 7 Nov 2022 14:56:28 +0900 Subject: [PATCH 1727/2552] [extractor/niconico] Always use HTTPS for requests This prevents MITM attacks from malicious parties like insane ISPs Closes #5469 --- yt_dlp/extractor/niconico.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 82b60b476..e131b044a 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -231,7 +231,7 @@ class NiconicoIE(InfoExtractor): or self._parse_json( self._html_search_regex( 'data-api-data="([^"]+)"', - self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id), + self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id), 'API data', default='{}'), video_id)) @@ -390,7 +390,7 @@ class NiconicoIE(InfoExtractor): try: webpage, handle = self._download_webpage_handle( - 'http://www.nicovideo.jp/watch/' + video_id, video_id) + 'https://www.nicovideo.jp/watch/' + video_id, video_id) if video_id.startswith('so'): video_id = self._match_id(handle.geturl()) @@ -728,7 +728,7 @@ class NicovideoSearchBaseIE(InfoExtractor): webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num}) results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage) for item in results: - yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item) + yield self.url_result(f'https://www.nicovideo.jp/watch/{item}', 'Niconico', item) if not results: break From fbb0ee7747b8e3657c9c50d26b728eb4c75d1899 Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Mon, 7 Nov 2022 23:24:30 +0800 Subject: [PATCH 1728/2552] [compat] Fix `shutils.move` in restricted ACL mode on BSD (#5309) Authored by: ClosedPort22, pukkandan --- yt_dlp/compat/shutil.py | 30 +++++++++++++++++++ .../postprocessor/movefilesafterdownload.py | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 yt_dlp/compat/shutil.py diff --git a/yt_dlp/compat/shutil.py b/yt_dlp/compat/shutil.py new file mode 100644 index 000000000..23239d5ce --- /dev/null +++ b/yt_dlp/compat/shutil.py @@ -0,0 +1,30 @@ +# flake8: noqa: F405 +from shutil import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'shutil') +del passthrough_module + + +import sys + +if sys.platform.startswith('freebsd'): + import errno + import os + import shutil + + # Workaround for PermissionError when using restricted ACL mode on FreeBSD + def copy2(src, dst, *args, **kwargs): + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + shutil.copyfile(src, dst, *args, **kwargs) + try: + shutil.copystat(src, dst, *args, **kwargs) + except PermissionError as e: + if e.errno != getattr(errno, 'EPERM', None): + raise + return dst + + def move(*args, copy_function=copy2, **kwargs): + return shutil.move(*args, copy_function=copy_function, **kwargs) diff --git a/yt_dlp/postprocessor/movefilesafterdownload.py b/yt_dlp/postprocessor/movefilesafterdownload.py index 436d13227..23b09248c 100644 --- a/yt_dlp/postprocessor/movefilesafterdownload.py +++ b/yt_dlp/postprocessor/movefilesafterdownload.py @@ -1,7 +1,7 @@ import os -import shutil from .common import PostProcessor +from ..compat import shutil from ..utils import ( PostProcessingError, decodeFilename, From 9b383177c99185d66efb5dd1c1bee2eb025a6386 Mon Sep 17 00:00:00 2001 From: m4tu4g <71326926+m4tu4g@users.noreply.github.com> Date: Mon, 7 Nov 2022 21:29:53 +0530 Subject: [PATCH 1729/2552] [extractor/mxplayer] Improve extractor (#5303) Closes #5276 Authored by: m4tu4g --- yt_dlp/extractor/mxplayer.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py index affdba10c..1fdb08edf 100644 --- a/yt_dlp/extractor/mxplayer.py +++ b/yt_dlp/extractor/mxplayer.py @@ -4,6 +4,7 @@ from ..utils import ( int_or_none, traverse_obj, try_get, + urljoin, ) @@ -147,6 +148,17 @@ class MxplayerIE(InfoExtractor): 'format': 'bv', 'skip_download': True, }, + }, { + 'url': 'https://www.mxplayer.in/movie/watch-deewane-huye-paagal-movie-online-4f9175c40a11c3994182a65afdd37ec6?watch=true', + 'info_dict': { + 'id': '4f9175c40a11c3994182a65afdd37ec6', + 'display_id': 'watch-deewane-huye-paagal-movie-online', + 'title': 'Deewane Huye Paagal', + 'duration': 9037, + 'ext': 'mp4', + 'description': 'md5:d17bd5c651016c4ed2e6f8a4ace15534', + }, + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): @@ -157,21 +169,24 @@ class MxplayerIE(InfoExtractor): data_json = self._download_json( f'https://api.mxplay.com/v1/web/detail/video?type={video_type}&id={video_id}', display_id) - streams = traverse_obj(data_json, ('stream', {'m3u8': ('hls', 'high'), 'mpd': ('dash', 'high')})) - formats, dash_subs = self._extract_mpd_formats_and_subtitles( - f'https://llvod.mxplay.com/{streams["mpd"]}', display_id, fatal=False) - hls_frmts, hls_subs = self._extract_m3u8_formats_and_subtitles( - f'https://llvod.mxplay.com/{streams["m3u8"]}', display_id, fatal=False) - - formats.extend(hls_frmts) - self._sort_formats(formats) + formats, subtitles = [], {} + m3u8_url = urljoin('https://llvod.mxplay.com/', traverse_obj( + data_json, ('stream', (('thirdParty', 'hlsUrl'), ('hls', 'high'))), get_all=False)) + if m3u8_url: + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, display_id, 'mp4', fatal=False) + mpd_url = urljoin('https://llvod.mxplay.com/', traverse_obj( + data_json, ('stream', (('thirdParty', 'dashUrl'), ('dash', 'high'))), get_all=False)) + if mpd_url: + fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, display_id, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) season = traverse_obj(data_json, ('container', 'title')) return { 'id': video_id, 'title': data_json.get('title'), 'formats': formats, - 'subtitles': self._merge_subtitles(dash_subs, hls_subs), + 'subtitles': subtitles, 'display_id': display_id, 'duration': data_json.get('duration'), 'series': traverse_obj(data_json, ('container', 'container', 'title')), From 8196182a12ca2358c09903a9c4abd9c06e3f8e95 Mon Sep 17 00:00:00 2001 From: megapro17 <megapro17@gmail.com> Date: Mon, 7 Nov 2022 19:02:42 +0300 Subject: [PATCH 1730/2552] [extractor/odnoklassniki] Support boosty.to embeds (#5105) Closes #4212 Authored by: megapro17, Lesmiscore, pukkandan --- yt_dlp/extractor/odnoklassniki.py | 94 ++++++++++++++++++++++++------- 1 file changed, 75 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 4faec914e..195563bbb 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -8,10 +8,12 @@ from ..compat import ( from ..utils import ( ExtractorError, float_or_none, - unified_strdate, int_or_none, qualities, + smuggle_url, unescapeHTML, + unified_strdate, + unsmuggle_url, urlencode_postdata, ) @@ -22,7 +24,7 @@ class OdnoklassnikiIE(InfoExtractor): (?:(?:www|m|mobile)\.)? (?:odnoklassniki|ok)\.ru/ (?: - video(?:embed)?/| + video(?P<embed>embed)?/| web-api/video/moviePlayer/| live/| dk\?.*?st\.mvId= @@ -38,7 +40,7 @@ class OdnoklassnikiIE(InfoExtractor): 'ext': 'mp4', 'timestamp': 1545580896, 'view_count': int, - 'thumbnail': 'https://coub-anubis-a.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg', + 'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg', 'title': 'Народная забава', 'uploader': 'Nevata', 'upload_date': '20181223', @@ -65,11 +67,12 @@ class OdnoklassnikiIE(InfoExtractor): }, { # metadata in JSON 'url': 'http://ok.ru/video/20079905452', - 'md5': '0b62089b479e06681abaaca9d204f152', + 'md5': '5d2b64756e2af296e3b383a0bc02a6aa', 'info_dict': { 'id': '20079905452', 'ext': 'mp4', 'title': 'Культура меняет нас (прекрасный ролик!))', + 'thumbnail': str, 'duration': 100, 'upload_date': '20141207', 'uploader_id': '330537914540', @@ -80,11 +83,12 @@ class OdnoklassnikiIE(InfoExtractor): }, { # metadataUrl 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5', - 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc', + 'md5': 'f8c951122516af72e6e6ffdd3c41103b', 'info_dict': { 'id': '63567059965189-0', 'ext': 'mp4', 'title': 'Девушка без комплексов ...', + 'thumbnail': str, 'duration': 191, 'upload_date': '20150518', 'uploader_id': '534380003155', @@ -95,18 +99,32 @@ class OdnoklassnikiIE(InfoExtractor): }, }, { # YouTube embed (metadataUrl, provider == USER_YOUTUBE) - 'url': 'http://ok.ru/video/64211978996595-1', - 'md5': '2f206894ffb5dbfcce2c5a14b909eea5', + 'url': 'https://ok.ru/video/3952212382174', + 'md5': '91749d0bd20763a28d083fa335bbd37a', 'info_dict': { - 'id': 'V_VztHT5BzY', + 'id': '5axVgHHDBvU', 'ext': 'mp4', - 'title': 'Космическая среда от 26 августа 2015', - 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0', - 'duration': 440, - 'upload_date': '20150826', - 'uploader_id': 'tvroscosmos', - 'uploader': 'Телестудия Роскосмоса', + 'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide', + 'description': 'md5:b57209eeb9d5c2f20c984dfb58862097', + 'uploader': 'Lod Mer', + 'uploader_id': '575186401502', + 'duration': 1529, 'age_limit': 0, + 'upload_date': '20210405', + 'comment_count': int, + 'live_status': 'not_live', + 'view_count': int, + 'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8', + 'uploader_url': 'http://www.youtube.com/user/MrKewlkid94', + 'channel_follower_count': int, + 'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'], + 'channel_id': 'UCVGtvURtEURYHtJFUegdSug', + 'like_count': int, + 'availability': 'public', + 'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug', + 'categories': ['Education'], + 'playable_in_embed': True, + 'channel': 'BornToReact', }, }, { # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field) @@ -126,10 +144,12 @@ class OdnoklassnikiIE(InfoExtractor): }, 'skip': 'Video has not been found', }, { + # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading 'note': 'Only available in mobile webpage', 'url': 'https://m.ok.ru/video/2361249957145', 'info_dict': { 'id': '2361249957145', + 'ext': 'mp4', 'title': 'Быковское крещение', 'duration': 3038.181, }, @@ -158,8 +178,37 @@ class OdnoklassnikiIE(InfoExtractor): # Paid video 'url': 'https://ok.ru/video/954886983203', 'only_matching': True, + }, { + 'url': 'https://ok.ru/videoembed/2932705602075', + 'info_dict': { + 'id': '2932705602075', + 'ext': 'mp4', + 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8', + 'title': 'Boosty для тебя!', + 'uploader_id': '597811038747', + 'like_count': 0, + 'duration': 35, + }, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167', + 'info_dict': { + 'id': '3950343629563', + 'ext': 'mp4', + 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8', + 'title': 'Заяц Бусти.mp4', + 'uploader_id': '571368965883', + 'like_count': 0, + 'duration': 10444, + }, + }] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + for x in super()._extract_embed_urls(url, webpage): + yield smuggle_url(x, {'referrer': url}) + def _real_extract(self, url): try: return self._extract_desktop(url) @@ -174,16 +223,23 @@ class OdnoklassnikiIE(InfoExtractor): start_time = int_or_none(compat_parse_qs( compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) - video_id = self._match_id(url) + url, smuggled = unsmuggle_url(url, {}) + video_id, is_embed = self._match_valid_url(url).group('id', 'embed') + mode = 'videoembed' if is_embed else 'video' webpage = self._download_webpage( - 'http://ok.ru/video/%s' % video_id, video_id, - note='Downloading desktop webpage') + f'https://ok.ru/{mode}/{video_id}', video_id, + note='Downloading desktop webpage', + headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {}) error = self._search_regex( r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', webpage, 'error', default=None) - if error: + # Direct link from boosty + if (error == 'The author of this video has not been found or is blocked' + and not smuggled.get('referrer') and mode == 'videoembed'): + return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'})) + elif error: raise ExtractorError(error, expected=True) player = self._parse_json( @@ -270,7 +326,7 @@ class OdnoklassnikiIE(InfoExtractor): if provider == 'LIVE_TV_APP': info['title'] = title - quality = qualities(('4', '0', '1', '2', '3', '5')) + quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7')) formats = [{ 'url': f['url'], From 581e86b512bbe39c1252bd696d0db8a906bce355 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 7 Nov 2022 21:46:47 +0530 Subject: [PATCH 1731/2552] [extractor/uktvplay] Fix `_VALID_URL` Closes #5472 --- yt_dlp/extractor/uktvplay.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/uktvplay.py b/yt_dlp/extractor/uktvplay.py index 819ac5a35..ab22a8e2d 100644 --- a/yt_dlp/extractor/uktvplay.py +++ b/yt_dlp/extractor/uktvplay.py @@ -2,7 +2,7 @@ from .common import InfoExtractor class UKTVPlayIE(InfoExtractor): - _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)' + _VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*)(?P<id>\d+)' _TESTS = [{ 'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001', 'info_dict': { @@ -22,6 +22,9 @@ class UKTVPlayIE(InfoExtractor): }, { 'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001', 'only_matching': True, + }, { + 'url': 'https://uktvplay.co.uk/shows/hornby-a-model-world/series-1/episode-1/6276739790001?autoplaying=true', + 'only_matching': True, }] # BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s' From 3b87f4d9439e28cf568113409eafb304a519b2e1 Mon Sep 17 00:00:00 2001 From: Anant Murmu <freezboltz@gmail.com> Date: Tue, 8 Nov 2022 12:14:47 +0530 Subject: [PATCH 1732/2552] [extractor/stripchat] Improve error message (#5475) Authored by: freezboltz --- yt_dlp/extractor/stripchat.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index 8cd8388aa..d04aa1db0 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -40,6 +40,7 @@ class StripchatIE(InfoExtractor): server = traverse_obj(data, ('viewCam', 'viewServers', 'flashphoner-hls'), expected_type=str) model_id = traverse_obj(data, ('viewCam', 'model', 'id'), expected_type=int) + formats = [] for host in traverse_obj(data, ( 'config', 'data', (('featuresV2', 'hlsFallback', 'fallbackDomains', ...), 'hlsStreamHost'))): formats = self._extract_m3u8_formats( @@ -47,6 +48,8 @@ class StripchatIE(InfoExtractor): video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) if formats: break + if not formats: + self.raise_no_formats('No active streams found', expected=True) self._sort_formats(formats) From db6fa6960caa1ac3c85f5e77ef9eb95f8eda8cb3 Mon Sep 17 00:00:00 2001 From: m4tu4g <71326926+m4tu4g@users.noreply.github.com> Date: Wed, 9 Nov 2022 08:33:10 +0530 Subject: [PATCH 1733/2552] [extractor/hotstar] Add season support (#5479) Closes #5473 Authored by: m4tu4g --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/hotstar.py | 63 +++++++++++++++++++++++++++------ 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index acec580d5..6d5fc033e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -699,6 +699,7 @@ from .hotstar import ( HotStarIE, HotStarPrefixIE, HotStarPlaylistIE, + HotStarSeasonIE, HotStarSeriesIE, ) from .howcast import HowcastIE diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index d9223a416..a2901de49 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -258,16 +258,16 @@ class HotStarPrefixIE(InfoExtractor): 'url': 'hotstar:1000076273', 'only_matching': True, }, { - 'url': 'hotstar:movies:1000057157', + 'url': 'hotstar:movies:1260009879', 'info_dict': { - 'id': '1000057157', + 'id': '1260009879', 'ext': 'mp4', - 'title': 'Radha Gopalam', - 'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22', - 'timestamp': 1140805800, - 'upload_date': '20060224', - 'duration': 9182, - 'episode': 'Radha Gopalam', + 'title': 'Nuvvu Naaku Nachav', + 'description': 'md5:d43701b1314e6f8233ce33523c043b7d', + 'timestamp': 1567525674, + 'upload_date': '20190903', + 'duration': 10787, + 'episode': 'Nuvvu Naaku Nachav', }, }, { 'url': 'hotstar:episode:1000234847', @@ -289,7 +289,7 @@ class HotStarPrefixIE(InfoExtractor): class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' - _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/tv(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)' _TESTS = [{ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', 'info_dict': { @@ -299,6 +299,9 @@ class HotStarPlaylistIE(HotStarBaseIE): }, { 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', 'only_matching': True, + }, { + 'url': 'https://www.hotstar.com/in/tv/karthika-deepam/15457/list/popular-clips/t-3_2_1272', + 'only_matching': True, }] def _real_extract(self, url): @@ -312,9 +315,47 @@ class HotStarPlaylistIE(HotStarBaseIE): return self.playlist_result(entries, playlist_id) +class HotStarSeasonIE(HotStarBaseIE): + IE_NAME = 'hotstar:season' + _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028', + 'info_dict': { + 'id': '8028', + }, + 'playlist_mincount': 35, + }, { + 'url': 'https://www.hotstar.com/in/tv/ishqbaaz/9567/seasons/season-2/ss-4357', + 'info_dict': { + 'id': '4357', + }, + 'playlist_mincount': 30, + }, { + 'url': 'https://www.hotstar.com/in/tv/bigg-boss/14714/seasons/season-4/ss-8208/', + 'info_dict': { + 'id': '8208', + }, + 'playlist_mincount': 19, + }] + + def _real_extract(self, url): + url, season_id = self._match_valid_url(url).groups() + headers = { + 'x-country-code': 'IN', + 'x-platform-code': 'PCTV', + } + item_json = self._download_json( + f'{self._API_URL}/o/v1/season/asset?tao=0&tas=0&size=10000&id={season_id}', season_id, headers=headers)['body']['results'] + entries = [ + self.url_result(HotStarIE._video_url(video['contentId'], root=url), HotStarIE, video['contentId']) + for video in item_json['items'] if video.get('contentId')] + + return self.playlist_result(entries, season_id) + + class HotStarSeriesIE(HotStarBaseIE): IE_NAME = 'hotstar:series' - _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))' + _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))/?(?:[#?]|$)' _TESTS = [{ 'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646', 'info_dict': { @@ -332,7 +373,7 @@ class HotStarSeriesIE(HotStarBaseIE): 'info_dict': { 'id': '435', }, - 'playlist_mincount': 269, + 'playlist_mincount': 267, }] def _real_extract(self, url): From fad689c7b61b8afd1a18de167ab0a74105b98c47 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 9 Nov 2022 08:35:34 +0530 Subject: [PATCH 1734/2552] [extractor/hotstar] Refactor v1 API calls --- yt_dlp/extractor/hotstar.py | 81 +++++++++++++------------------------ 1 file changed, 29 insertions(+), 52 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index a2901de49..48aa6e94a 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,22 +1,19 @@ import hashlib import hmac +import json import re import time import uuid -import json from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str -) +from ..compat import compat_HTTPError, compat_str from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, join_nonempty, str_or_none, - try_get, + traverse_obj, url_or_none, ) @@ -26,6 +23,11 @@ class HotStarBaseIE(InfoExtractor): _API_URL = 'https://api.hotstar.com' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' + def _call_api_v1(self, path, *args, **kwargs): + return self._download_json( + f'{self._API_URL}/o/v1/{path}', *args, **kwargs, + headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'}) + def _call_api_impl(self, path, video_id, query, st=None, cookies=None): st = int_or_none(st) or int(time.time()) exp = st + 6000 @@ -59,17 +61,6 @@ class HotStarBaseIE(InfoExtractor): response['message'], expected=True) return response['data'] - def _call_api(self, path, video_id, query_name='contentId'): - return self._download_json( - f'{self._API_URL}/{path}', video_id=video_id, - query={ - query_name: video_id, - 'tas': 10000, - }, headers={ - 'x-country-code': 'IN', - 'x-platform-code': 'PCTV', - }) - def _call_api_v2(self, path, video_id, st=None, cookies=None): return self._call_api_impl( f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={ @@ -79,6 +70,13 @@ class HotStarBaseIE(InfoExtractor): 'os-version': '10', }) + def _playlist_entries(self, path, item_id, root=None, **kwargs): + results = self._call_api_v1(path, item_id, **kwargs)['body']['results'] + for video in traverse_obj(results, (('assets', None), 'items', ...)): + if video.get('contentId'): + yield self.url_result( + HotStarIE._video_url(video['contentId'], root=root), HotStarIE, video['contentId']) + class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' @@ -104,6 +102,7 @@ class HotStarIE(HotStarBaseIE): 'duration': 381, 'episode': 'Can You Not Spread Rumours?', }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847', 'info_dict': { @@ -161,7 +160,8 @@ class HotStarIE(HotStarBaseIE): video_type = self._TYPE.get(video_type, video_type) cookies = self._get_cookies(url) # Cookies before any request - video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item'] + video_data = self._call_api_v1(f'{video_type}/detail', video_id, + query={'tas': 10000, 'contentId': video_id})['body']['results']['item'] if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): self.report_drm(video_id) @@ -305,14 +305,9 @@ class HotStarPlaylistIE(HotStarBaseIE): }] def _real_extract(self, url): - playlist_id = self._match_id(url) - - collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')['body']['results'] - entries = [ - self.url_result(HotStarIE._video_url(video['contentId']), HotStarIE, video['contentId']) - for video in collection['assets']['items'] if video.get('contentId')] - - return self.playlist_result(entries, playlist_id) + id_ = self._match_id(url) + return self.playlist_result( + self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_) class HotStarSeasonIE(HotStarBaseIE): @@ -340,17 +335,8 @@ class HotStarSeasonIE(HotStarBaseIE): def _real_extract(self, url): url, season_id = self._match_valid_url(url).groups() - headers = { - 'x-country-code': 'IN', - 'x-platform-code': 'PCTV', - } - item_json = self._download_json( - f'{self._API_URL}/o/v1/season/asset?tao=0&tas=0&size=10000&id={season_id}', season_id, headers=headers)['body']['results'] - entries = [ - self.url_result(HotStarIE._video_url(video['contentId'], root=url), HotStarIE, video['contentId']) - for video in item_json['items'] if video.get('contentId')] - - return self.playlist_result(entries, season_id) + return self.playlist_result(self._playlist_entries( + 'season/asset', season_id, url, query={'tao': 0, 'tas': 0, 'size': 10000, 'id': season_id}), season_id) class HotStarSeriesIE(HotStarBaseIE): @@ -378,17 +364,8 @@ class HotStarSeriesIE(HotStarBaseIE): def _real_extract(self, url): url, series_id = self._match_valid_url(url).groups() - headers = { - 'x-country-code': 'IN', - 'x-platform-code': 'PCTV', - } - detail_json = self._download_json( - f'{self._API_URL}/o/v1/show/detail?contentId={series_id}', series_id, headers=headers) - id = try_get(detail_json, lambda x: x['body']['results']['item']['id'], int) - item_json = self._download_json( - f'{self._API_URL}/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid={id}', series_id, headers=headers) - - return self.playlist_result([ - self.url_result(HotStarIE._video_url(video['contentId'], root=url), HotStarIE, video['contentId']) - for video in item_json['body']['results']['items'] if video.get('contentId') - ], series_id) + id_ = self._call_api_v1( + 'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id'] + + return self.playlist_result(self._playlist_entries( + 'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id) From 8fddc232bfe99eee847a4c4fa57ed7a334ebd62c Mon Sep 17 00:00:00 2001 From: zulaport <70630440+zulaport@users.noreply.github.com> Date: Tue, 8 Nov 2022 19:23:24 -0800 Subject: [PATCH 1735/2552] [extractor/camsoda] Add extractor (#5465) Authored by: zulaport --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/camsoda.py | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 yt_dlp/extractor/camsoda.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 6d5fc033e..97da309c5 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -255,6 +255,7 @@ from .camdemy import ( CamdemyFolderIE ) from .cammodels import CamModelsIE +from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE from .camwithher import CamWithHerIE from .canalalpha import CanalAlphaIE diff --git a/yt_dlp/extractor/camsoda.py b/yt_dlp/extractor/camsoda.py new file mode 100644 index 000000000..1b47b0584 --- /dev/null +++ b/yt_dlp/extractor/camsoda.py @@ -0,0 +1,59 @@ +import random + +from .common import InfoExtractor +from ..utils import ExtractorError, traverse_obj + + +class CamsodaIE(InfoExtractor): + _VALID_URL = r'https?://www\.camsoda\.com/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://www.camsoda.com/lizzhopf', + 'info_dict': { + 'id': 'lizzhopf', + 'ext': 'mp4', + 'title': 'lizzhopf (lizzhopf) Nude on Cam. Free Live Sex Chat Room - CamSoda', + 'description': str, + 'is_live': True, + 'age_limit': 18, + }, + 'skip': 'Room is offline', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id, headers=self.geo_verification_headers()) + + data = self._download_json( + f'https://camsoda.com/api/v1/video/vtoken/{video_id}', video_id, + query={'username': f'guest_{random.randrange(10000, 99999)}'}, + headers=self.geo_verification_headers()) + if not data: + raise ExtractorError('Unable to find configuration for stream.') + elif data.get('private_servers'): + raise ExtractorError('Model is in private show.', expected=True) + elif not data.get('stream_name'): + raise ExtractorError('Model is offline.', expected=True) + + stream_name = traverse_obj(data, 'stream_name', expected_type=str) + token = traverse_obj(data, 'token', expected_type=str) + + formats = [] + for server in traverse_obj(data, ('edge_servers', ...)): + formats = self._extract_m3u8_formats( + f'https://{server}/{stream_name}_v1/index.m3u8?token={token}', + video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) + if formats: + break + if not formats: + self.raise_no_formats('No active streams found', expected=True) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._html_extract_title(webpage), + 'description': self._html_search_meta('description', webpage, default=None), + 'is_live': True, + 'formats': formats, + 'age_limit': 18, + } From c61473c1d617a4d5432248815f22dcb46906acaf Mon Sep 17 00:00:00 2001 From: MMM <flashdagger@googlemail.com> Date: Wed, 9 Nov 2022 04:30:15 +0100 Subject: [PATCH 1736/2552] [extractor/bitchute] Improve `BitChuteChannelIE` (#5066) Authored by: flashdagger, pukkandan --- yt_dlp/extractor/bitchute.py | 138 ++++++++++++++++++++++++----------- yt_dlp/utils.py | 2 + 2 files changed, 99 insertions(+), 41 deletions(-) diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index 87d04468a..f4b6a9a0e 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -1,14 +1,18 @@ -import itertools +import functools import re from .common import InfoExtractor from ..utils import ( ExtractorError, HEADRequest, + OnDemandPagedList, clean_html, get_element_by_class, + get_elements_html_by_class, int_or_none, orderedSet, + parse_count, + parse_duration, traverse_obj, unified_strdate, urlencode_postdata, @@ -109,51 +113,103 @@ class BitChuteIE(InfoExtractor): class BitChuteChannelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' - _TEST = { - 'url': 'https://www.bitchute.com/channel/victoriaxrave/', - 'playlist_mincount': 185, + _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.bitchute.com/channel/bitchute/', 'info_dict': { - 'id': 'victoriaxrave', + 'id': 'bitchute', + 'title': 'BitChute', + 'description': 'md5:5329fb3866125afa9446835594a9b138', }, - } + 'playlist': [ + { + 'md5': '7e427d7ed7af5a75b5855705ec750e2b', + 'info_dict': { + 'id': 'UGlrF9o9b-Q', + 'ext': 'mp4', + 'filesize': None, + 'title': 'This is the first video on #BitChute !', + 'description': 'md5:a0337e7b1fe39e32336974af8173a034', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'BitChute', + 'upload_date': '20170103', + 'duration': 16, + 'view_count': int, + }, + } + ], + 'params': { + 'skip_download': True, + 'playlist_items': '-1', + }, + }, { + 'url': 'https://www.bitchute.com/playlist/wV9Imujxasw9/', + 'playlist_mincount': 20, + 'info_dict': { + 'id': 'wV9Imujxasw9', + 'title': 'Bruce MacDonald and "The Light of Darkness"', + 'description': 'md5:04913227d2714af1d36d804aa2ab6b1e', + } + }] _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' + PAGE_SIZE = 25 + HTML_CLASS_NAMES = { + 'channel': { + 'container': 'channel-videos-container', + 'title': 'channel-videos-title', + 'description': 'channel-videos-text', + }, + 'playlist': { + 'container': 'playlist-video', + 'title': 'title', + 'description': 'description', + } + + } - def _entries(self, channel_id): - channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id - offset = 0 - for page_num in itertools.count(1): - data = self._download_json( - '%sextend/' % channel_url, channel_id, - 'Downloading channel page %d' % page_num, - data=urlencode_postdata({ - 'csrfmiddlewaretoken': self._TOKEN, - 'name': '', - 'offset': offset, - }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Referer': channel_url, - 'X-Requested-With': 'XMLHttpRequest', - 'Cookie': 'csrftoken=%s' % self._TOKEN, - }) - if data.get('success') is False: - break - html = data.get('html') - if not html: - break - video_ids = re.findall( - r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', - html) - if not video_ids: - break - offset += len(video_ids) - for video_id in video_ids: - yield self.url_result( - 'https://www.bitchute.com/video/%s' % video_id, - ie=BitChuteIE.ie_key(), video_id=video_id) + @staticmethod + def _make_url(playlist_id, playlist_type): + return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/' + + def _fetch_page(self, playlist_id, playlist_type, page_num): + playlist_url = self._make_url(playlist_id, playlist_type) + data = self._download_json( + f'{playlist_url}extend/', playlist_id, f'Downloading page {page_num}', + data=urlencode_postdata({ + 'csrfmiddlewaretoken': self._TOKEN, + 'name': '', + 'offset': page_num * self.PAGE_SIZE, + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Referer': playlist_url, + 'X-Requested-With': 'XMLHttpRequest', + 'Cookie': f'csrftoken={self._TOKEN}', + }) + if not data.get('success'): + return + classes = self.HTML_CLASS_NAMES[playlist_type] + for video_html in get_elements_html_by_class(classes['container'], data.get('html')): + video_id = self._search_regex( + r'<a\s[^>]*\bhref=["\']/video/([^"\'/]+)', video_html, 'video id', default=None) + if not video_id: + continue + yield self.url_result( + f'https://www.bitchute.com/video/{video_id}', BitChuteIE, video_id, url_transparent=True, + title=clean_html(get_element_by_class(classes['title'], video_html)), + description=clean_html(get_element_by_class(classes['description'], video_html)), + duration=parse_duration(get_element_by_class('video-duration', video_html)), + view_count=parse_count(clean_html(get_element_by_class('video-views', video_html)))) def _real_extract(self, url): - channel_id = self._match_id(url) + playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id') + webpage = self._download_webpage(self._make_url(playlist_id, playlist_type), playlist_id) + + page_func = functools.partial(self._fetch_page, playlist_id, playlist_type) return self.playlist_result( - self._entries(channel_id), playlist_id=channel_id) + OnDemandPagedList(page_func, self.PAGE_SIZE), playlist_id, + title=self._html_extract_title(webpage, default=None), + description=self._html_search_meta( + ('description', 'og:description', 'twitter:description'), webpage, default=None), + playlist_count=int_or_none(self._html_search_regex( + r'<span>(\d+)\s+videos?</span>', webpage, 'playlist count', default=None))) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index d0513496e..b7e7cb7d7 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -418,6 +418,8 @@ def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w Return the text (content) and the html (whole) of the tag with the specified attribute in the passed HTML document """ + if not value: + return quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?' From 86973308cdf670956a61b3ba6d2c124576843954 Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Wed, 9 Nov 2022 21:58:44 +1300 Subject: [PATCH 1737/2552] [extractor/youtube:tab] Update tab handling for redesign (#5439) Closes #5432, #5430, #5419 Authored by: coletdjnz, pukkandan --- README.md | 4 +- test/test_youtube_lists.py | 13 ++ yt_dlp/extractor/youtube.py | 328 ++++++++++++++++++++++++++---------- 3 files changed, 253 insertions(+), 92 deletions(-) diff --git a/README.md b/README.md index e094ccba7..e9ea99ebf 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * Supports some (but not all) age-gated content without cookies * Download livestreams from the start using `--live-from-start` (*experimental*) * `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given - * Redirect channel's home URL automatically to `/video` to preserve the old behaviour + * Channel URLs download all uploads of the channel, including shorts and live * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]` @@ -142,7 +142,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this * Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading -* YouTube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections +* YouTube channel URLs download all uploads of the channel. To download only the videos in a specific tab, pass the tab's URL. If the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections * Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this * The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date. * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index c2dd0ac30..b3f323e21 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE, YoutubeTabIE +from yt_dlp.utils import ExtractorError @is_download_test @@ -53,6 +54,18 @@ class TestYoutubeLists(unittest.TestCase): self.assertEqual(video['duration'], 10) self.assertEqual(video['uploader'], 'Philipp Hagemeister') + def test_youtube_channel_no_uploads(self): + dl = FakeYDL() + dl.params['extract_flat'] = True + ie = YoutubeTabIE(dl) + # no uploads + with self.assertRaisesRegex(ExtractorError, r'no uploads'): + ie.extract('https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA') + + # no uploads and no UCID given + with self.assertRaisesRegex(ExtractorError, r'no uploads'): + ie.extract('https://www.youtube.com/news') + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 804d0ea34..33419e74a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4589,13 +4589,16 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): @staticmethod def _extract_selected_tab(tabs, fatal=True): - for tab in tabs: - renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {} - if renderer.get('selected') is True: - return renderer - else: - if fatal: - raise ExtractorError('Unable to find selected tab') + for tab_renderer in tabs: + if tab_renderer.get('selected'): + return tab_renderer + if fatal: + raise ExtractorError('Unable to find selected tab') + + @staticmethod + def _extract_tab_renderers(response): + return traverse_obj( + response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict) def _extract_from_tabs(self, item_id, ytcfg, data, tabs): playlist_id = title = description = channel_url = channel_name = channel_id = None @@ -4897,8 +4900,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) # Reject webpage data if redirected to home page without explicitly requesting - selected_tab = self._extract_selected_tab(traverse_obj( - data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {} + selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {} if (url != 'https://www.youtube.com/feed/recommended' and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])): @@ -5392,18 +5394,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'categories': ['News & Politics'], 'tags': list, 'like_count': int, - 'release_timestamp': 1642502819, + 'release_timestamp': int, 'channel': 'Sky News', 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ', 'age_limit': 0, 'view_count': int, - 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg', + 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg', 'playable_in_embed': True, - 'release_date': '20220118', + 'release_date': r're:\d+', 'availability': 'public', 'live_status': 'is_live', 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ', - 'channel_follower_count': int + 'channel_follower_count': int, + 'concurrent_view_count': int, }, 'params': { 'skip_download': True, @@ -5538,16 +5541,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): ], 'playlist_mincount': 101, }, { - 'note': 'Topic without a UU playlist', + # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg) + # Treat as a general feed 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', 'info_dict': { 'id': 'UCtFRv9O2AHqOZjjynzrv-xg', 'title': 'UCtFRv9O2AHqOZjjynzrv-xg', 'tags': [], }, - 'expected_warnings': [ - 'the playlist redirect gave error', - ], 'playlist_mincount': 9, }, { 'note': 'Youtube music Album', @@ -5615,6 +5616,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'skip_download': True, 'extractor_args': {'youtubetab': {'skip': ['webpage']}} }, + 'skip': 'Query for sorting no longer works', }, { 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', @@ -5633,10 +5635,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw', 'availability': 'public', }, - 'expected_warnings': [ - 'does not have a videos tab', - r'[Uu]navailable videos (are|will be) hidden', - ], 'playlist_mincount': 101, 'params': { 'skip_download': True, @@ -5715,13 +5713,155 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): }, 'playlist_mincount': 50, + }, { + # Channel with a real live tab (not to be mistaken with streams tab) + # Do not treat like it should redirect to live stream + 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live', + 'info_dict': { + 'id': 'UCEH7P7kyJIkS_gJf93VYbmg', + 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live', + 'tags': [], + }, + 'playlist_mincount': 20, + }, { + # Tab name is not the same as tab id + 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay', + 'info_dict': { + 'id': 'UCQvWX73GQygcwXOTSf_VDVg', + 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play', + 'tags': [], + }, + 'playlist_mincount': 8, + }, { + # Home tab id is literally home. Not to get mistaken with featured + 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home', + 'info_dict': { + 'id': 'UCQvWX73GQygcwXOTSf_VDVg', + 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home', + 'tags': [], + }, + 'playlist_mincount': 8, + }, { + # Should get three playlists for videos, shorts and streams tabs + 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', + 'info_dict': { + 'id': 'UCK9V2B22uJYu3N7eR_BT9QA', + 'title': 'Uploads for UCK9V2B22uJYu3N7eR_BT9QA' + }, + 'playlist_count': 3, + }, { + # Shorts tab with channel with handle + 'url': 'https://www.youtube.com/@NotJustBikes/shorts', + 'info_dict': { + 'id': 'UC0intLFzLaudFG-xAvUEO-A', + 'title': 'Not Just Bikes - Shorts', + 'tags': 'count:12', + 'uploader': 'Not Just Bikes', + 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A', + 'description': 'md5:7513148b1f02b924783157d84c4ea555', + 'channel_follower_count': int, + 'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A', + 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A', + 'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A', + 'channel': 'Not Just Bikes', + }, + 'playlist_mincount': 10, + }, { + # Streams tab + 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams', + 'info_dict': { + 'id': 'UC3eYAvjCVwNHgkaGbXX3sig', + 'title': '中村悠一 - Live', + 'tags': 'count:7', + 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig', + 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig', + 'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig', + 'channel': '中村悠一', + 'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig', + 'channel_follower_count': int, + 'uploader': '中村悠一', + 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300', + }, + 'playlist_mincount': 60, + }, { + # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail. + # See test_youtube_lists + 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA', + 'only_matching': True, + }, { + # No uploads and no UCID given. Should fail with no uploads error + # See test_youtube_lists + 'url': 'https://www.youtube.com/news', + 'only_matching': True + }, { + # No videos tab but has a shorts tab + 'url': 'https://www.youtube.com/c/TKFShorts', + 'info_dict': { + 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg', + 'title': 'Shorts Break - Shorts', + 'tags': 'count:32', + 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg', + 'channel': 'Shorts Break', + 'description': 'md5:a6c234cf3d50d878ef8721e34457cd11', + 'uploader': 'Shorts Break', + 'channel_follower_count': int, + 'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg', + 'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg', + 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg', + }, + 'playlist_mincount': 30, + }, { + # Trending Now Tab. tab id is empty + 'url': 'https://www.youtube.com/feed/trending', + 'info_dict': { + 'id': 'trending', + 'title': 'trending - Now', + 'tags': [], + }, + 'playlist_mincount': 30, + }, { + # Trending Gaming Tab. tab id is empty + 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D', + 'info_dict': { + 'id': 'trending', + 'title': 'trending - Gaming', + 'tags': [], + }, + 'playlist_mincount': 30, }] @classmethod def suitable(cls, url): return False if YoutubeIE.suitable(url) else super().suitable(url) - _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$') + _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$') + + def _get_url_mobj(self, url): + mobj = self._URL_RE.match(url).groupdict() + mobj.update((k, '') for k, v in mobj.items() if v is None) + return mobj + + def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'): + tab_name = (tab.get('title') or '').lower() + tab_url = urljoin(base_url, traverse_obj( + tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'))) + + tab_id = (traverse_obj(tab, 'tabIdentifier', expected_type=str) + or tab_url and self._get_url_mobj(tab_url)['tab'][1:]) + if tab_id: + return tab_id, tab_name + + # Fallback to tab name if we cannot get the tab id. + # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel + # Note that in the case of translated tab name this may result in an empty string, which we don't want. + self.write_debug(f'Falling back to selected tab name: {tab_name}') + return { + 'home': 'featured', + 'live': 'streams', + }.get(tab_name, tab_name), tab_name + + def _has_tab(self, tabs, tab_id): + return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs) @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data def _real_extract(self, url, smuggled_data): @@ -5730,14 +5870,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): urllib.parse.urlparse(url)._replace(netloc='www.youtube.com')) compat_opts = self.get_param('compat_opts', []) - def get_mobj(url): - mobj = self._URL_RE.match(url).groupdict() - mobj.update((k, '') for k, v in mobj.items() if v is None) - return mobj - - mobj, redirect_warning = get_mobj(url), None - # Youtube returns incomplete data if tabname is not lower case - pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel'] + mobj = self._get_url_mobj(url) + pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel'] if is_channel: if smuggled_data.get('is_music_url'): if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist @@ -5750,19 +5884,16 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): get_all=False, expected_type=str) if not murl: raise ExtractorError('Failed to resolve album to playlist') - return self.url_result(murl, ie=YoutubeTabIE.ie_key()) + return self.url_result(murl, YoutubeTabIE) elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/ pre = f'https://www.youtube.com/channel/{item_id}' - original_tab_name = tab + original_tab_id = tab[1:] if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts: - # Home URLs should redirect to /videos/ - redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. ' - 'To download only the videos in the home page, add a "/featured" to the URL') tab = '/videos' url = ''.join((pre, tab, post)) - mobj = get_mobj(url) + mobj = self._get_url_mobj(url) # Handle both video/playlist URLs qs = parse_qs(url) @@ -5775,77 +5906,94 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): # Common mistake: https://www.youtube.com/watch?list=playlist_id self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}') url = f'https://www.youtube.com/playlist?list={playlist_id}' - mobj = get_mobj(url) + mobj = self._get_url_mobj(url) - if video_id and playlist_id: - if self.get_param('noplaylist'): - self.to_screen(f'Downloading just video {video_id} because of --no-playlist') - return self.url_result(f'https://www.youtube.com/watch?v={video_id}', - ie=YoutubeIE.ie_key(), video_id=video_id) - self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}') + if not self._yes_playlist(playlist_id, video_id): + return self.url_result( + f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id) data, ytcfg = self._extract_data(url, item_id) # YouTube may provide a non-standard redirect to the regional channel # See: https://github.com/yt-dlp/yt-dlp/issues/2694 + # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects redirect_url = traverse_obj( data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False) if redirect_url and 'no-youtube-channel-redirect' not in compat_opts: redirect_url = ''.join(( urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post'])) - self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}') - return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key()) + self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}') + return self.url_result(redirect_url, YoutubeTabIE) - tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list) - if tabs: + tab_results = [] + tabs = self._extract_tab_renderers(data) + if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts: selected_tab = self._extract_selected_tab(tabs) - selected_tab_url = urljoin( - url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'))) - translated_tab_name = selected_tab.get('title', '').lower() - - # Prefer tab name from tab url as it is always in en, - # but only when preferred lang is set as it may not extract reliably in all cases. - selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name - or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary - or translated_tab_name) - - if selected_tab_name == 'home': - selected_tab_name = 'featured' - requested_tab_name = mobj['tab'][1:] - - if 'no-youtube-channel-redirect' not in compat_opts: - if requested_tab_name == 'live': # Live tab should have redirected to the video - raise UserNotLive(video_id=mobj['id']) - if requested_tab_name not in ('', selected_tab_name): - redirect_warning = f'The channel does not have a {requested_tab_name} tab' - if not original_tab_name: - if item_id[:2] == 'UC': - # Topic channels don't have /videos. Use the equivalent playlist instead - pl_id = f'UU{item_id[2:]}' - pl_url = f'https://www.youtube.com/playlist?list={pl_id}' - try: - data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True) - except ExtractorError: - redirect_warning += ' and the playlist redirect gave error' - else: - item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name - redirect_warning += f'. Redirecting to playlist {pl_id} instead' - if selected_tab_name and selected_tab_name != requested_tab_name: - redirect_warning += f'. {selected_tab_name} tab is being downloaded instead' + selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated + self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}') + + if not original_tab_id and selected_tab_name: + self.to_screen('Channel URLs download all uploads of the channel. ' + 'To download only the videos in a specific tab, pass the tab\'s URL') + if self._has_tab(tabs, 'streams'): + tab_results.append(self.url_result(''.join((pre, '/streams', post)))) + if self._has_tab(tabs, 'shorts'): + tab_results.append(self.url_result(''.join((pre, '/shorts', post)))) + # XXX: Members-only tab should also be extracted + + if not tab_results and selected_tab_id != 'videos': + # Channel does not have streams, shorts or videos tabs + if item_id[:2] != 'UC': + raise ExtractorError('This channel has no uploads', expected=True) + + # Topic channels don't have /videos. Use the equivalent playlist instead + pl_id = f'UU{item_id[2:]}' + pl_url = f'https://www.youtube.com/playlist?list={pl_id}' + try: + data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True) + except ExtractorError: + raise ExtractorError('This channel has no uploads', expected=True) else: - raise ExtractorError(redirect_warning, expected=True) + item_id, url = pl_id, pl_url + self.to_screen( + f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead') + + elif tab_results and selected_tab_id != 'videos': + # When there are shorts/live tabs but not videos tab + url, data = ''.join((pre, post)), None + + elif (original_tab_id or 'videos') != selected_tab_id: + if original_tab_id == 'live': + # Live tab should have redirected to the video + # Except in the case the channel has an actual live tab + # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live + raise UserNotLive(video_id=mobj['id']) + elif selected_tab_name: + raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True) + + # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg + url = f'{pre}{post}' - if redirect_warning: - self.to_screen(redirect_warning) self.write_debug(f'Final URL: {url}') # YouTube sometimes provides a button to reload playlist with unavailable videos. if 'no-youtube-unavailable-videos' not in compat_opts: data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data self._extract_and_report_alerts(data, only_once=True) - tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list) + + tabs = self._extract_tab_renderers(data) if tabs: - return self._extract_from_tabs(item_id, ytcfg, data, tabs) + tab_results[:0] = [self._extract_from_tabs(item_id, ytcfg, data, tabs)] + tab_results[0].update({ + 'extractor_key': YoutubeTabIE.ie_key(), + 'extractor': YoutubeTabIE.IE_NAME, + 'webpage_url': url, + }) + + if len(tab_results) == 1: + return tab_results[0] + elif len(tab_results) > 1: + return self.playlist_result(tab_results, item_id, title=f'Uploads for {item_id}') playlist = traverse_obj( data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict) @@ -5857,8 +6005,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): if video_id: if mobj['tab'] != '/live': # live tab is expected to redirect to video self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}') - return self.url_result(f'https://www.youtube.com/watch?v={video_id}', - ie=YoutubeIE.ie_key(), video_id=video_id) + return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id) raise ExtractorError('Unable to recognize tab page') @@ -5891,12 +6038,13 @@ class YoutubePlaylistIE(InfoExtractor): 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q', 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2', 'view_count': int, - 'uploader_url': 'https://www.youtube.com/user/Wickydoo', + 'uploader_url': 'https://www.youtube.com/c/WickmanVT', 'modified_date': r're:\d{8}', 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q', 'channel': 'Wickman', 'tags': [], - 'channel_url': 'https://www.youtube.com/user/Wickydoo', + 'channel_url': 'https://www.youtube.com/c/WickmanVT', + 'availability': 'public', }, 'playlist_mincount': 29, }, { @@ -5926,7 +6074,7 @@ class YoutubePlaylistIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw', 'availability': 'public', }, - 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], + 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'], }, { 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 'playlist_mincount': 455, From efdc45a6ea1dad1000d0478928cd4576975b9b3f Mon Sep 17 00:00:00 2001 From: MMM <flashdagger@googlemail.com> Date: Wed, 9 Nov 2022 10:05:08 +0100 Subject: [PATCH 1738/2552] [extractor/bitchute] Better error for geo-restricted videos (#5474) Authored by: flashdagger --- yt_dlp/extractor/bitchute.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index f4b6a9a0e..9e3d6337a 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -8,6 +8,7 @@ from ..utils import ( OnDemandPagedList, clean_html, get_element_by_class, + get_element_by_id, get_elements_html_by_class, int_or_none, orderedSet, @@ -49,6 +50,16 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20181113', }, 'params': {'check_formats': None}, + }, { + # restricted video + 'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/', + 'info_dict': { + 'id': 'WEnQU7XGcTdl', + 'ext': 'mp4', + 'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft', + }, + 'params': {'skip_download': True}, + 'skip': 'Georestricted in DE', }, { 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', 'only_matching': True, @@ -56,6 +67,7 @@ class BitChuteIE(InfoExtractor): 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', 'only_matching': True, }] + _GEO_BYPASS = False _HEADERS = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', @@ -78,11 +90,18 @@ class BitChuteIE(InfoExtractor): 'filesize': int_or_none(response.headers.get('Content-Length')) } + def _raise_if_restricted(self, webpage): + page_title = clean_html(get_element_by_class('page-title', webpage)) or '' + if re.fullmatch(r'(?:Channel|Video) Restricted', page_title): + reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title + self.raise_geo_restricted(reason) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) + self._raise_if_restricted(webpage) publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) entries = self._parse_html5_media_entries(url, webpage, video_id) From d9df9b4919e84a3ba7be04acb73e56d67431550c Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Wed, 9 Nov 2022 22:09:13 +1300 Subject: [PATCH 1739/2552] [extractor/unsupported] Raise error on known DRM-only sites (#5483) Authored by: coletdjnz --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/unsupported.py | 93 +++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 yt_dlp/extractor/unsupported.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 97da309c5..0ca8b3e06 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2023,6 +2023,7 @@ from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE from .unscripted import UnscriptedNewsVideoIE +from .unsupported import KnownDRMIE from .uol import UOLIE from .uplynk import ( UplynkIE, diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py new file mode 100644 index 000000000..87ad87ca2 --- /dev/null +++ b/yt_dlp/extractor/unsupported.py @@ -0,0 +1,93 @@ +from .common import InfoExtractor +from ..utils import classproperty, ExtractorError + + +class KnownDRMIE(InfoExtractor): + IE_DESC = False + IE_NAME = 'unsupported:drm' + UNSUPPORTED_URLS = ( + r'play\.hbomax\.com', + r'channel(?:4|5)\.com', + r'peacocktv\.com', + r'(?:[\w\.]+\.)?disneyplus\.com', + r'open\.spotify\.com/(?:track|playlist|album|artist)', + r'tvnz\.co\.nz', + r'oneplus\.ch', + r'artstation\.com/learning/courses', + r'philo\.com', + r'(?:[\w\.]+\.)?mech-plus\.com', + r'aha\.video', + r'mubi\.com', + r'vootkids\.com' + ) + + _TESTS = [{ + # https://github.com/yt-dlp/yt-dlp/issues/4309 + 'url': 'https://www.peacocktv.com', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/1719, + 'url': 'https://www.channel4.com', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/1548 + 'url': 'https://www.channel5.com', + 'only_matching': True, + }, { + 'url': r'https://hsesn.apps.disneyplus.com', + 'only_matching': True, + }, { + 'url': r'https://www.disneyplus.com', + 'only_matching': True, + }, { + 'url': 'https://open.spotify.com/artist/', + 'only_matching': True, + }, { + 'url': 'https://open.spotify.com/track/', + 'only_matching': True, + }, { + # TVNZ: https://github.com/yt-dlp/yt-dlp/issues/4122 + 'url': 'https://tvnz.co.nz', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/1922 + 'url': 'https://www.oneplus.ch', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/1140 + 'url': 'https://www.artstation.com/learning/courses/', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/3544 + 'url': 'https://www.philo.com', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/3533 + 'url': 'https://www.mech-plus.com/', + 'only_matching': True, + }, { + 'url': 'https://watch.mech-plus.com/', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/2934 + 'url': 'https://www.aha.video', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/2743 + 'url': 'https://mubi.com', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/3287 + 'url': 'https://www.vootkids.com', + 'only_matching': True, + }] + + @classproperty + def _VALID_URL(cls): + return rf'https?://(?:www\.)?(?:{"|".join(cls.UNSUPPORTED_URLS)})' + + def _real_extract(self, url): + raise ExtractorError( + f'The requested site is known to use DRM protection. It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported by yt-dlp. ' + f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, unless you have evidence that it is not DRM protected.', + expected=True) From 0d8affc17faa540f41cb6fba7675dbf98364250b Mon Sep 17 00:00:00 2001 From: MMM <flashdagger@googlemail.com> Date: Wed, 9 Nov 2022 10:36:11 +0100 Subject: [PATCH 1740/2552] [extractor/rumble] Add HLS formats and extract more metadata (#5280) Closes #5177, #5277 Authored by: flashdagger --- yt_dlp/extractor/generic.py | 34 ------- yt_dlp/extractor/rumble.py | 179 +++++++++++++++++++++++++++++++----- 2 files changed, 154 insertions(+), 59 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index b0b26b61a..0765d38ac 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2399,40 +2399,6 @@ class GenericIE(InfoExtractor): 'upload_date': '20210111', } }, - { - 'note': 'Rumble embed', - 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', - 'md5': '53af34098a7f92c4e51cf0bd1c33f009', - 'info_dict': { - 'id': 'vb0ofn', - 'ext': 'mp4', - 'timestamp': 1612662578, - 'uploader': 'LovingMontana', - 'channel': 'LovingMontana', - 'upload_date': '20210207', - 'title': 'Winter-loving dog helps girls dig a snow fort ', - 'channel_url': 'https://rumble.com/c/c-546523', - 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg', - 'duration': 103, - } - }, - { - 'note': 'Rumble JS embed', - 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it', - 'md5': '4701209ac99095592e73dbba21889690', - 'info_dict': { - 'id': 'v15eqxl', - 'ext': 'mp4', - 'channel': 'Mr Producer Media', - 'duration': 92, - 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh', - 'channel_url': 'https://rumble.com/c/RichSementa', - 'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg', - 'timestamp': 1654892716, - 'uploader': 'Mr Producer Media', - 'upload_date': '20220610', - } - }, { 'note': 'JSON LD with multiple @type', 'url': 'https://www.nu.nl/280161/video/hoe-een-bladvlo-dit-verwoestende-japanse-onkruid-moet-vernietigen.html', diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index c94ba68ee..27040646b 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -2,12 +2,11 @@ import itertools import re from .common import InfoExtractor -from ..compat import compat_str, compat_HTTPError +from ..compat import compat_HTTPError from ..utils import ( - determine_ext, int_or_none, parse_iso8601, - try_get, + traverse_obj, unescapeHTML, ExtractorError, ) @@ -30,6 +29,7 @@ class RumbleEmbedIE(InfoExtractor): 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg', 'duration': 234, 'uploader': 'WMAR', + 'live_status': 'not_live', } }, { 'url': 'https://rumble.com/embed/vslb7v', @@ -45,12 +45,110 @@ class RumbleEmbedIE(InfoExtractor): 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', 'duration': 901, 'uploader': 'CTNews', + 'live_status': 'not_live', } + }, { + 'url': 'https://rumble.com/embed/vunh1h', + 'info_dict': { + 'id': 'vunh1h', + 'ext': 'mp4', + 'title': '‘Gideon, op zoek naar de waarheid’ including ENG SUBS', + 'timestamp': 1647197663, + 'upload_date': '20220313', + 'channel_url': 'https://rumble.com/user/BLCKBX', + 'channel': 'BLCKBX', + 'thumbnail': r're:https://.+\.jpg', + 'duration': 5069, + 'uploader': 'BLCKBX', + 'live_status': 'not_live', + 'subtitles': { + 'en': [ + { + 'url': r're:https://.+\.vtt', + 'name': 'English', + 'ext': 'vtt' + } + ] + }, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://rumble.com/embed/v1essrt', + 'info_dict': { + 'id': 'v1essrt', + 'ext': 'mp4', + 'title': 'startswith:lofi hip hop radio - beats to relax/study', + 'timestamp': 1661519399, + 'upload_date': '20220826', + 'channel_url': 'https://rumble.com/c/LofiGirl', + 'channel': 'Lofi Girl', + 'thumbnail': r're:https://.+\.jpg', + 'duration': None, + 'uploader': 'Lofi Girl', + 'live_status': 'is_live', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://rumble.com/embed/v1amumr', + 'info_dict': { + 'id': 'v1amumr', + 'ext': 'webm', + 'fps': 60, + 'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live', + 'timestamp': 1658518457, + 'upload_date': '20220722', + 'channel_url': 'https://rumble.com/c/RumbleEvents', + 'channel': 'Rumble Events', + 'thumbnail': r're:https://.+\.jpg', + 'duration': 16427, + 'uploader': 'Rumble Events', + 'live_status': 'was_live', + }, + 'params': {'skip_download': True} }, { 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'only_matching': True, }] + _WEBPAGE_TESTS = [ + { + 'note': 'Rumble embed', + 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', + 'md5': '53af34098a7f92c4e51cf0bd1c33f009', + 'info_dict': { + 'id': 'vb0ofn', + 'ext': 'mp4', + 'timestamp': 1612662578, + 'uploader': 'LovingMontana', + 'channel': 'LovingMontana', + 'upload_date': '20210207', + 'title': 'Winter-loving dog helps girls dig a snow fort ', + 'channel_url': 'https://rumble.com/c/c-546523', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg', + 'duration': 103, + 'live_status': 'not_live', + } + }, + { + 'note': 'Rumble JS embed', + 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it', + 'md5': '4701209ac99095592e73dbba21889690', + 'info_dict': { + 'id': 'v15eqxl', + 'ext': 'mp4', + 'channel': 'Mr Producer Media', + 'duration': 92, + 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh', + 'channel_url': 'https://rumble.com/c/RichSementa', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg', + 'timestamp': 1654892716, + 'uploader': 'Mr Producer Media', + 'upload_date': '20220610', + 'live_status': 'not_live', + } + }, + ] + @classmethod def _extract_embed_urls(cls, url, webpage): embeds = tuple(super()._extract_embed_urls(url, webpage)) @@ -62,26 +160,48 @@ class RumbleEmbedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( - 'https://rumble.com/embedJS/', video_id, - query={'request': 'video', 'v': video_id}) - title = unescapeHTML(video['title']) + 'https://rumble.com/embedJS/u3/', video_id, + query={'request': 'video', 'ver': 2, 'v': video_id}) + + sys_msg = traverse_obj(video, ('sys', 'msg')) + if sys_msg: + self.report_warning(sys_msg, video_id=video_id) + + if video.get('live') == 0: + live_status = 'not_live' if video.get('livestream_has_dvr') is None else 'was_live' + elif video.get('live') == 1: + live_status = 'is_upcoming' if video.get('livestream_has_dvr') else 'was_live' + elif video.get('live') == 2: + live_status = 'is_live' + else: + live_status = None formats = [] - for height, ua in (video.get('ua') or {}).items(): - for i in range(2): - f_url = try_get(ua, lambda x: x[i], compat_str) - if f_url: - ext = determine_ext(f_url) - f = { - 'ext': ext, - 'format_id': '%s-%sp' % (ext, height), - 'height': int_or_none(height), - 'url': f_url, - } - bitrate = try_get(ua, lambda x: x[i + 2]['bitrate']) - if bitrate: - f['tbr'] = int_or_none(bitrate) - formats.append(f) + for ext, ext_info in (video.get('ua') or {}).items(): + for height, video_info in (ext_info or {}).items(): + meta = video_info.get('meta') or {} + if not video_info.get('url'): + continue + if ext == 'hls': + if meta.get('live') is True and video.get('live') == 1: + live_status = 'post_live' + formats.extend(self._extract_m3u8_formats( + video_info['url'], video_id, + ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live')) + continue + formats.append({ + 'ext': ext, + 'url': video_info['url'], + 'format_id': '%s-%sp' % (ext, height), + 'height': int_or_none(height), + 'fps': video.get('fps'), + **traverse_obj(meta, { + 'tbr': 'bitrate', + 'filesize': 'size', + 'width': 'w', + 'height': 'h', + }, default={}) + }) self._sort_formats(formats) subtitles = { @@ -92,18 +212,27 @@ class RumbleEmbedIE(InfoExtractor): } author = video.get('author') or {} + thumbnails = traverse_obj(video, ('t', ..., {'url': 'i', 'width': 'w', 'height': 'h'})) + if not thumbnails and video.get('i'): + thumbnails = [{'url': video['i']}] + + if live_status in {'is_live', 'post_live'}: + duration = None + else: + duration = int_or_none(video.get('duration')) return { 'id': video_id, - 'title': title, + 'title': unescapeHTML(video.get('title')), 'formats': formats, 'subtitles': subtitles, - 'thumbnail': video.get('i'), + 'thumbnails': thumbnails, 'timestamp': parse_iso8601(video.get('pubDate')), 'channel': author.get('name'), 'channel_url': author.get('url'), - 'duration': int_or_none(video.get('duration')), + 'duration': duration, 'uploader': author.get('name'), + 'live_status': live_status, } @@ -118,7 +247,7 @@ class RumbleChannelIE(InfoExtractor): }, }, { 'url': 'https://rumble.com/user/goldenpoodleharleyeuna', - 'playlist_count': 4, + 'playlist_mincount': 4, 'info_dict': { 'id': 'goldenpoodleharleyeuna', }, From ed6bec168dd6af955f4ec0165356ac76b944c537 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 9 Nov 2022 15:48:25 +0530 Subject: [PATCH 1741/2552] [extractor/doodstream] Remove extractor It was added in youtube-dlc, likely without sufficient scrutiny Closes #3808, Closes #5251, Closes #5403 --- yt_dlp/extractor/_extractors.py | 3 +- yt_dlp/extractor/doodstream.py | 77 --------------------------------- yt_dlp/extractor/unsupported.py | 60 ++++++++++++++++++++----- 3 files changed, 51 insertions(+), 89 deletions(-) delete mode 100644 yt_dlp/extractor/doodstream.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0ca8b3e06..053ef44ae 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -477,7 +477,6 @@ from .digitalconcerthall import DigitalConcertHallIE from .discovery import DiscoveryIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE -from .doodstream import DoodStreamIE from .dropbox import DropboxIE from .dropout import ( DropoutSeasonIE, @@ -2023,7 +2022,7 @@ from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE from .unscripted import UnscriptedNewsVideoIE -from .unsupported import KnownDRMIE +from .unsupported import KnownDRMIE, KnownPiracyIE from .uol import UOLIE from .uplynk import ( UplynkIE, diff --git a/yt_dlp/extractor/doodstream.py b/yt_dlp/extractor/doodstream.py deleted file mode 100644 index b41da32e5..000000000 --- a/yt_dlp/extractor/doodstream.py +++ /dev/null @@ -1,77 +0,0 @@ -import string -import random -import time - -from .common import InfoExtractor - - -class DoodStreamIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|pm|wf)/[ed]/(?P<id>[a-z0-9]+)' - _TESTS = [{ - 'url': 'http://dood.to/e/5s1wmbdacezb', - 'md5': '4568b83b31e13242b3f1ff96c55f0595', - 'info_dict': { - 'id': '5s1wmbdacezb', - 'ext': 'mp4', - 'title': 'Kat Wonders - Monthly May 2020', - 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', - 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', - } - }, { - 'url': 'http://dood.watch/d/5s1wmbdacezb', - 'md5': '4568b83b31e13242b3f1ff96c55f0595', - 'info_dict': { - 'id': '5s1wmbdacezb', - 'ext': 'mp4', - 'title': 'Kat Wonders - Monthly May 2020', - 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', - 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', - } - }, { - 'url': 'https://dood.to/d/jzrxn12t2s7n', - 'md5': '3207e199426eca7c2aa23c2872e6728a', - 'info_dict': { - 'id': 'jzrxn12t2s7n', - 'ext': 'mp4', - 'title': 'Stacy Cruz Cute ALLWAYSWELL', - 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com', - 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg', - } - }, { - 'url': 'https://dood.so/d/jzrxn12t2s7n', - 'only_matching': True - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - url = f'https://dood.to/e/{video_id}' - webpage = self._download_webpage(url, video_id) - - title = self._html_search_meta( - ('og:title', 'twitter:title'), webpage, default=None) or self._html_extract_title(webpage) - thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None) - token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token') - description = self._html_search_meta( - ['og:description', 'description', 'twitter:description'], webpage, default=None) - - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0', - 'referer': url - } - - pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5') - final_url = ''.join(( - self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers), - *(random.choice(string.ascii_letters + string.digits) for _ in range(10)), - f'?token={token}&expiry={int(time.time() * 1000)}', - )) - - return { - 'id': video_id, - 'title': title, - 'url': final_url, - 'http_headers': headers, - 'ext': 'mp4', - 'description': description, - 'thumbnail': thumb, - } diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 87ad87ca2..e40666ec0 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -1,11 +1,32 @@ from .common import InfoExtractor -from ..utils import classproperty, ExtractorError +from ..utils import ExtractorError, classproperty, remove_start -class KnownDRMIE(InfoExtractor): +class UnsupportedInfoExtractor(InfoExtractor): IE_DESC = False - IE_NAME = 'unsupported:drm' - UNSUPPORTED_URLS = ( + URLS = () # Redefine in subclasses + + @classproperty + def IE_NAME(cls): + return remove_start(super().IE_NAME, 'Known') + + @classproperty + def _VALID_URL(cls): + return rf'https?://(?:www\.)?(?:{"|".join(cls.URLS)})' + + +LF = '\n ' + + +class KnownDRMIE(UnsupportedInfoExtractor): + """Sites that are known to use DRM for all their videos + + Add to this list only if: + * You are reasonably certain that the site uses DRM for ALL their videos + * Multiple users have asked about this site on github/reddit/discord + """ + + URLS = ( r'play\.hbomax\.com', r'channel(?:4|5)\.com', r'peacocktv\.com', @@ -82,12 +103,31 @@ class KnownDRMIE(InfoExtractor): 'only_matching': True, }] - @classproperty - def _VALID_URL(cls): - return rf'https?://(?:www\.)?(?:{"|".join(cls.UNSUPPORTED_URLS)})' + def _real_extract(self, url): + raise ExtractorError( + f'The requested site is known to use DRM protection. ' + f'It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported.{LF}' + f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, ' + 'unless you have evidence that the video is not DRM protected', expected=True) + + +class KnownPiracyIE(UnsupportedInfoExtractor): + """Sites that have been deemed to be piracy + + In order for this to not end up being a catalog of piracy sites, + only sites that were once supported should be added to this list + """ + + URLS = ( + r'dood\.(?:to|watch|so|pm|wf|ru)', + ) + + _TESTS = [{ + 'url': 'http://dood.to/e/5s1wmbdacezb', + 'only_matching': True, + }] def _real_extract(self, url): raise ExtractorError( - f'The requested site is known to use DRM protection. It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported by yt-dlp. ' - f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, unless you have evidence that it is not DRM protected.', - expected=True) + f'This website is no longer supported since it has been determined to be primarily used for piracy.{LF}' + f'{self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open issues for it', expected=True) From c789fb778798d682a1b2d3c74180ba8d20c23552 Mon Sep 17 00:00:00 2001 From: Alex <aleksandrosansan@gmail.com> Date: Thu, 10 Nov 2022 03:41:07 +0200 Subject: [PATCH 1742/2552] [build, test] Harden workflows' security (#5410) Authored by: sashashura --- .github/workflows/build.yml | 9 ++++++--- .github/workflows/core.yml | 3 +++ .github/workflows/download.yml | 3 +++ .github/workflows/quick-test.yml | 3 +++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2a1b9a4aa..12e5426b1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,8 +1,12 @@ name: Build on: workflow_dispatch +permissions: + contents: read jobs: prepare: + permissions: + contents: write # for push_release runs-on: ubuntu-latest outputs: version_suffix: ${{ steps.version_suffix.outputs.version_suffix }} @@ -69,9 +73,6 @@ jobs: python pyinst.py --onedir (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .) python pyinst.py - - name: Get SHA2-SUMS - id: get_sha - run: | - name: Upload artifacts uses: actions/upload-artifact@v3 @@ -248,6 +249,8 @@ jobs: publish_release: + permissions: + contents: write # for action-gh-release runs-on: ubuntu-latest needs: [prepare, build_unix, build_windows, build_windows32, build_macos, build_macos_legacy] diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index d0e890b30..e12918626 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -1,5 +1,8 @@ name: Core Tests on: [push, pull_request] +permissions: + contents: read + jobs: tests: name: Core Tests diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index cc2da62fa..2b2387d4f 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -1,5 +1,8 @@ name: Download Tests on: [push, pull_request] +permissions: + contents: read + jobs: quick: name: Quick Download Tests diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 53b74e2c7..8a0ac98bb 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -1,5 +1,8 @@ name: Quick Test on: [push, pull_request] +permissions: + contents: read + jobs: tests: name: Core Test From 495322b95bbf8befa0f0b354f110a1d4eddac784 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 10 Nov 2022 07:32:25 +0530 Subject: [PATCH 1743/2552] [test] Allow `extract_flat` in download tests Authored by: coletdjnz, pukkandan --- test/helper.py | 9 +++++++-- test/test_download.py | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/test/helper.py b/test/helper.py index 3b3b44580..139bdafc3 100644 --- a/test/helper.py +++ b/test/helper.py @@ -222,6 +222,10 @@ def sanitize_got_info_dict(got_dict): if test_info_dict.get('display_id') == test_info_dict.get('id'): test_info_dict.pop('display_id') + # Check url for flat entries + if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'): + test_info_dict['url'] = got_dict['url'] + return test_info_dict @@ -235,8 +239,9 @@ def expect_info_dict(self, got_dict, expected_dict): for key in mandatory_fields: self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) # Check for mandatory fields that are automatically set by YoutubeDL - for key in ['webpage_url', 'extractor', 'extractor_key']: - self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) + if got_dict.get('_type', 'video') == 'video': + for key in ['webpage_url', 'extractor', 'extractor_key']: + self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) test_info_dict = sanitize_got_info_dict(got_dict) diff --git a/test/test_download.py b/test/test_download.py index 7ee8c7c43..43b39c36b 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -106,7 +106,7 @@ def generator(test_case, tname): params = tc.get('params', {}) if not info_dict.get('id'): raise Exception(f'Test {tname} definition incorrect - "id" key is not present') - elif not info_dict.get('ext'): + elif not info_dict.get('ext') and info_dict.get('_type', 'video') == 'video': if params.get('skip_download') and params.get('ignore_no_formats_error'): continue raise Exception(f'Test {tname} definition incorrect - "ext" key must be present to define the output file') @@ -213,6 +213,8 @@ def generator(test_case, tname): tc_res_dict = res_dict['entries'][tc_num] # First, check test cases' data against extracted data alone expect_info_dict(self, tc_res_dict, tc.get('info_dict', {})) + if tc_res_dict.get('_type', 'video') != 'video': + continue # Now, check downloaded file consistency tc_filename = get_tc_filename(tc) if not test_case.get('params', {}).get('skip_download', False): From 4dc23a80510d75546f49f8742cf8b704a2efc808 Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Thu, 10 Nov 2022 15:44:12 +1300 Subject: [PATCH 1744/2552] [extractor/youtube:tab] Fix video metadata from tabs (#5489) Closes #5488 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 96 ++++++++++++++++++++++++++++++++----- 1 file changed, 84 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 33419e74a..7e3c17ae0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -912,7 +912,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _extract_video(self, renderer): video_id = renderer.get('videoId') - title = self._get_text(renderer, 'title') + + reel_header_renderer = traverse_obj(renderer, ( + 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer', + 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer')) + + title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText') description = self._get_text(renderer, 'descriptionSnippet') duration = int_or_none(renderer.get('lengthSeconds')) @@ -920,24 +925,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor): duration = parse_duration(self._get_text( renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text'))) if duration is None: + # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab) duration = parse_duration(self._search_regex( r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$', traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str), video_id, default=None, group='duration')) - # videoInfo is a string like '50K views • 10 years ago'. - view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') - uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') channel_id = traverse_obj( renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False) - time_text = self._get_text(renderer, 'publishedTimeText', 'videoInfo') or '' - scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False)) + if not channel_id: + channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId')) + overlay_style = traverse_obj( renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str) badges = self._extract_badges(renderer) - thumbnails = self._extract_thumbnails(renderer, 'thumbnail') + navigation_url = urljoin('https://www.youtube.com/', traverse_obj( renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str)) or '' @@ -945,12 +949,22 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if overlay_style == 'SHORTS' or '/shorts/' in navigation_url: url = f'https://www.youtube.com/shorts/{video_id}' + time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo') + or self._get_text(reel_header_renderer, 'timestampText') or '') + scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False)) + live_status = ( 'is_upcoming' if scheduled_timestamp is not None else 'was_live' if 'streamed' in time_text.lower() else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW) else None) + # videoInfo is a string like '50K views • 10 years ago'. + view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or '' + view_count = (0 if 'no views' in view_count_text.lower() + else self._get_count({'simpleText': view_count_text})) + view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count' + return { '_type': 'url', 'ie_key': YoutubeIE.ie_key(), @@ -959,9 +973,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'title': title, 'description': description, 'duration': duration, - 'uploader': uploader, 'channel_id': channel_id, - 'thumbnails': thumbnails, + 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText') + or self._get_text(reel_header_renderer, 'channelTitleText')), + 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None, + 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'), 'timestamp': (self._parse_time_text(time_text) if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE) else None), @@ -973,7 +989,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None, needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None, is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None), - 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count, + view_count_field: view_count, 'live_status': live_status } @@ -5484,7 +5500,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'title': '#cctv9', 'tags': [], }, - 'playlist_mincount': 350, + 'playlist_mincount': 300, # not consistent but should be over 300 }, { 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU', 'only_matching': True, @@ -5671,7 +5687,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': [], 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', - 'description': '', + 'description': 'test description', 'title': 'cole-dlp-test-acc - 再生リスト', 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', 'uploader': 'cole-dlp-test-acc', @@ -5828,6 +5844,62 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': [], }, 'playlist_mincount': 30, + }, { + # Shorts url result in shorts tab + 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts', + 'info_dict': { + 'id': 'UCiu-3thuViMebBjw_5nWYrA', + 'title': 'cole-dlp-test-acc - Shorts', + 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA', + 'channel': 'cole-dlp-test-acc', + 'channel_follower_count': int, + 'description': 'test description', + 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', + 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', + 'tags': [], + 'uploader': 'cole-dlp-test-acc', + 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', + + }, + 'playlist': [{ + 'info_dict': { + '_type': 'url', + 'ie_key': 'Youtube', + 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60', + 'id': 'sSM9J5YH_60', + 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', + 'title': 'SHORT short', + 'channel': 'cole-dlp-test-acc', + 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', + 'view_count': int, + 'thumbnails': list, + } + }], + 'params': {'extract_flat': True}, + }, { + # Live video status should be extracted + 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live', + 'info_dict': { + 'id': 'UCQvWX73GQygcwXOTSf_VDVg', + 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live + 'tags': [] + }, + 'playlist': [{ + 'info_dict': { + '_type': 'url', + 'ie_key': 'Youtube', + 'url': 'startswith:https://www.youtube.com/watch?v=', + 'id': str, + 'title': str, + 'live_status': 'is_live', + 'channel_id': str, + 'channel_url': str, + 'concurrent_view_count': int, + 'channel': str, + } + }], + 'params': {'extract_flat': True}, + 'playlist_mincount': 1 }] @classmethod From dc3028d233b2f7091215dc0d9acc522914b9b59d Mon Sep 17 00:00:00 2001 From: Sergey <SG5@users.noreply.github.com> Date: Wed, 9 Nov 2022 19:24:14 -0800 Subject: [PATCH 1745/2552] [build] `py2exe`: Migrate to freeze API (#5149) Closes #5135 Authored by: SG5, pukkandan --- .github/workflows/build.yml | 2 +- setup.py | 123 ++++++++++++++++++++---------------- 2 files changed, 70 insertions(+), 55 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 12e5426b1..b35c35047 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -194,7 +194,7 @@ jobs: python-version: '3.8' - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds - python -m pip install --upgrade pip setuptools wheel "py2exe<0.12" + python -m pip install --upgrade pip setuptools wheel py2exe pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt - name: Prepare diff --git a/setup.py b/setup.py index 3641dfae9..88716152a 100644 --- a/setup.py +++ b/setup.py @@ -36,36 +36,34 @@ def packages(): def py2exe_params(): - import py2exe # noqa: F401 - warnings.warn( 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' - 'The recommended way is to use "pyinst.py" to build using pyinstaller') + 'It is recommended to run "pyinst.py" to build using pyinstaller instead') return { 'console': [{ 'script': './yt_dlp/__main__.py', 'dest_base': 'yt-dlp', + 'icon_resources': [(1, 'devscripts/logo.ico')], + }], + 'version_info': { 'version': VERSION, 'description': DESCRIPTION, 'comments': LONG_DESCRIPTION.split('\n')[0], 'product_name': 'yt-dlp', 'product_version': VERSION, - 'icon_resources': [(1, 'devscripts/logo.ico')], - }], + }, 'options': { - 'py2exe': { - 'bundle_files': 0, - 'compressed': 1, - 'optimize': 2, - 'dist_dir': './dist', - 'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto - 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], - # Modules that are only imported dynamically must be added here - 'includes': ['yt_dlp.compat._legacy'], - } + 'bundle_files': 0, + 'compressed': 1, + 'optimize': 2, + 'dist_dir': './dist', + 'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto + 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], + # Modules that are only imported dynamically must be added here + 'includes': ['yt_dlp.compat._legacy'], }, - 'zipfile': None + 'zipfile': None, } @@ -113,41 +111,58 @@ class build_lazy_extractors(Command): subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py']) -params = py2exe_params() if sys.argv[1:2] == ['py2exe'] else build_params() -setup( - name='yt-dlp', - version=VERSION, - maintainer='pukkandan', - maintainer_email='pukkandan.ytdlp@gmail.com', - description=DESCRIPTION, - long_description=LONG_DESCRIPTION, - long_description_content_type='text/markdown', - url='https://github.com/yt-dlp/yt-dlp', - packages=packages(), - install_requires=REQUIREMENTS, - python_requires='>=3.7', - project_urls={ - 'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme', - 'Source': 'https://github.com/yt-dlp/yt-dlp', - 'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues', - 'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators', - }, - classifiers=[ - 'Topic :: Multimedia :: Video', - 'Development Status :: 5 - Production/Stable', - 'Environment :: Console', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: Implementation', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - 'License :: Public Domain', - 'Operating System :: OS Independent', - ], - cmdclass={'build_lazy_extractors': build_lazy_extractors}, - **params -) +def main(): + if sys.argv[1:2] == ['py2exe']: + params = py2exe_params() + try: + from py2exe import freeze + except ImportError: + import py2exe # noqa: F401 + warnings.warn('You are using an outdated version of py2exe. Support for this version will be removed in the future') + params['console'][0].update(params.pop('version_info')) + params['options'] = {'py2exe': params.pop('options')} + else: + return freeze(**params) + else: + params = build_params() + + setup( + name='yt-dlp', + version=VERSION, + maintainer='pukkandan', + maintainer_email='pukkandan.ytdlp@gmail.com', + description=DESCRIPTION, + long_description=LONG_DESCRIPTION, + long_description_content_type='text/markdown', + url='https://github.com/yt-dlp/yt-dlp', + packages=packages(), + install_requires=REQUIREMENTS, + python_requires='>=3.7', + project_urls={ + 'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme', + 'Source': 'https://github.com/yt-dlp/yt-dlp', + 'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues', + 'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators', + }, + classifiers=[ + 'Topic :: Multimedia :: Video', + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: Implementation', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: PyPy', + 'License :: Public Domain', + 'Operating System :: OS Independent', + ], + cmdclass={'build_lazy_extractors': build_lazy_extractors}, + **params + ) + + +main() From 0cf643b234ff2f4d017a980dbaefdb14ed6e4db6 Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Thu, 10 Nov 2022 16:33:03 +1300 Subject: [PATCH 1746/2552] [extractor/youtube] Differentiate between no and disabled comments (#5491) `comments` and `comment_count` will be set to None, as opposed to an empty list and 0, respectively. Fixes https://github.com/yt-dlp/yt-dlp/issues/5068 Authored by: coletdjnz, pukkandan --- yt_dlp/extractor/common.py | 5 +++++ yt_dlp/extractor/youtube.py | 1 + 2 files changed, 6 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 20ed52216..34650cf4e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3738,6 +3738,9 @@ class InfoExtractor: def _get_subtitles(self, *args, **kwargs): raise NotImplementedError('This method must be implemented by subclasses') + class CommentsDisabled(Exception): + """Raise in _get_comments if comments are disabled for the video""" + def extract_comments(self, *args, **kwargs): if not self.get_param('getcomments'): return None @@ -3753,6 +3756,8 @@ class InfoExtractor: interrupted = False except KeyboardInterrupt: self.to_screen('Interrupted by user') + except self.CommentsDisabled: + return {'comments': None, 'comment_count': None} except Exception as e: if self.get_param('ignoreerrors') is not True: raise diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7e3c17ae0..5b7c94c4e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3270,6 +3270,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) if message and not parent and tracker['running_total'] == 0: self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True) + raise self.CommentsDisabled @staticmethod def _generate_comment_continuation(video_id): From e72e48c53f16771ea7d786deb6b65a40d82a14c4 Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Thu, 10 Nov 2022 19:35:22 +1300 Subject: [PATCH 1747/2552] [extractor/youtube] Ignore incomplete data error for comment replies (#5490) When --ignore-errors is used. Closes https://github.com/yt-dlp/yt-dlp/issues/4669 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 5b7c94c4e..5b39f9765 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3237,11 +3237,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): note_prefix = '%sDownloading comment%s API JSON page %d %s' % ( ' ' if parent else '', ' replies' if parent else '', page_num, comment_prog_str) - - response = self._extract_response( - item_id=None, query=continuation, - ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix, - check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None) + try: + response = self._extract_response( + item_id=None, query=continuation, + ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix, + check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None) + except ExtractorError as e: + # Ignore incomplete data error for replies if retries didn't work. + # This is to allow any other parent comments and comment threads to be downloaded. + # See: https://github.com/yt-dlp/yt-dlp/issues/4669 + if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True: + self.report_warning( + 'Received incomplete data for a comment reply thread and retrying did not help. ' + 'Ignoring to let other comments be downloaded.') + else: + raise is_forced_continuation = False continuation_contents = traverse_obj( response, 'onResponseReceivedEndpoints', expected_type=list, default=[]) From 3f5c216969165c4a0583a4795e4d15325dc009d4 Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Fri, 11 Nov 2022 10:12:10 +1300 Subject: [PATCH 1748/2552] [extractor/nzherald] Support new video embed (#5493) Authored by: coletdjnz --- yt_dlp/extractor/nzherald.py | 48 ++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py index 7c9efd922..062f9a875 100644 --- a/yt_dlp/extractor/nzherald.py +++ b/yt_dlp/extractor/nzherald.py @@ -1,6 +1,7 @@ +import json + from .brightcove import BrightcoveNewIE from .common import InfoExtractor - from ..compat import compat_str from ..utils import ( ExtractorError, @@ -13,17 +14,20 @@ class NZHeraldIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nzherald\.co\.nz/[\w\/-]+\/(?P<id>[A-Z0-9]+)' _TESTS = [ { - 'url': 'https://www.nzherald.co.nz/nz/weather-heavy-rain-gales-across-nz-most-days-this-week/PTG7QWY4E2225YHZ5NAIRBTYTQ/', + # Video accessible under 'video' key + 'url': 'https://www.nzherald.co.nz/nz/queen-elizabeth-death-nz-public-holiday-announced-for-september-26/CEOPBSXO2JDCLNK3H7E3BIE2FA/', 'info_dict': { - 'id': '6271084466001', + 'id': '6312191736112', 'ext': 'mp4', - 'title': 'MetService severe weather warning: September 6th - 7th', - 'timestamp': 1630891576, - 'upload_date': '20210906', + 'title': 'Focus: PM holds post-Cabinet press conference', + 'duration': 238.08, + 'upload_date': '20220912', 'uploader_id': '1308227299001', - 'description': 'md5:db6ca335a22e2cdf37ab9d2bcda52902' + 'timestamp': 1662957159, + 'tags': [], + 'thumbnail': r're:https?://.*\.jpg$', + 'description': 'md5:2f17713fcbfcfbe38bb9e7dfccbb0f2e', } - }, { # Webpage has brightcove embed player url 'url': 'https://www.nzherald.co.nz/travel/pencarrow-coastal-trail/HDVTPJEPP46HJ2UEMK4EGD2DFI/', @@ -34,9 +38,11 @@ class NZHeraldIE(InfoExtractor): 'timestamp': 1625102897, 'upload_date': '20210701', 'uploader_id': '1308227299001', - 'description': 'md5:d361aaa0c6498f7ac1bc4fc0a0aec1e4' + 'description': 'md5:d361aaa0c6498f7ac1bc4fc0a0aec1e4', + 'thumbnail': r're:https?://.*\.jpg$', + 'tags': ['travel', 'video'], + 'duration': 43.627, } - }, { # two video embeds of the same video 'url': 'https://www.nzherald.co.nz/nz/truck-driver-captured-cutting-off-motorist-on-state-highway-1-in-canterbury/FIHNJB7PLLPHWQPK4S7ZBDUC4I/', @@ -48,6 +54,22 @@ class NZHeraldIE(InfoExtractor): 'upload_date': '20210429', 'uploader_id': '1308227299001', 'description': 'md5:4cae7dfb7613ac4c73b9e73a75c6b5d7' + }, + 'skip': 'video removed', + }, { + # customVideo embed requiring additional API call + 'url': 'https://www.nzherald.co.nz/nz/politics/reserve-bank-rejects-political-criticisms-stands-by-review/2JO5Q4WLZRCBBNWTLACZMOP4RA/', + 'info_dict': { + 'id': '6315123873112', + 'ext': 'mp4', + 'timestamp': 1667862725, + 'title': 'Focus: Luxon on re-appointment of Reserve Bank governor Adrian Orr', + 'upload_date': '20221107', + 'description': 'md5:df2f1f7033a8160c66e28e4743f5d934', + 'uploader_id': '1308227299001', + 'tags': ['video', 'nz herald focus', 'politics', 'politics videos'], + 'thumbnail': r're:https?://.*\.jpg$', + 'duration': 99.584, } }, { 'url': 'https://www.nzherald.co.nz/kahu/kaupapa-companies-my-taiao-supporting-maori-in-study-and-business/PQBO2J25WCG77VGRX7W7BVYEAI/', @@ -80,6 +102,12 @@ class NZHeraldIE(InfoExtractor): self._search_regex(r'Fusion\.globalContent\s*=\s*({.+?})\s*;', webpage, 'fusion metadata'), article_id) video_metadata = fusion_metadata.get('video') + if not video_metadata: + custom_video_id = traverse_obj(fusion_metadata, ('customVideo', 'embed', 'id'), expected_type=str) + if custom_video_id: + video_metadata = self._download_json( + 'https://www.nzherald.co.nz/pf/api/v3/content/fetch/full-content-by-id', article_id, + query={'query': json.dumps({'id': custom_video_id, 'site': 'nzh'}), '_website': 'nzh'}) bc_video_id = traverse_obj( video_metadata or fusion_metadata, # fusion metadata is the video metadata for video-only pages 'brightcoveId', ('content_elements', ..., 'referent', 'id'), From 17fc3dc48af968e28c23197ed06542fdb47aba2b Mon Sep 17 00:00:00 2001 From: MrOctopus <shock.game@hotmail.com> Date: Fri, 11 Nov 2022 02:49:24 +0100 Subject: [PATCH 1749/2552] [build] Create armv7l and aarch64 releases (#5449) Closes #5436 Authored by: MrOctopus, pukkandan --- .github/workflows/build.yml | 59 +++++++++++++++++++++++++++++++++---- README.md | 2 ++ pyinst.py | 7 ++--- yt_dlp/update.py | 22 +++++++++----- yt_dlp/utils.py | 3 +- 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b35c35047..46a775b4d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -114,6 +114,49 @@ jobs: git -C taps/ push + build_linux_arm: + permissions: + packages: write # for Creating cache + runs-on: ubuntu-latest + needs: prepare + strategy: + matrix: + architecture: + - armv7 + - aarch64 + + steps: + - uses: actions/checkout@v3 + with: + path: ./repo + - name: Virtualized Install, Prepare & Build + uses: yt-dlp/run-on-arch-action@v2 + with: + githubToken: ${{ github.token }} # To cache image + arch: ${{ matrix.architecture }} + distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS + dockerRunArgs: --volume "${PWD}/repo:/repo" + install: | # Installing Python 3.10 from the Deadsnakes repo raises errors + apt update + apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip + python3.8 -m pip install -U pip setuptools wheel + # Cannot access requirements.txt from the repo directory at this stage + python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi + + run: | + cd repo + python3.8 -m pip install -U Pyinstaller -r requirements.txt # Cached version may be out of date + python3.8 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + python3.8 devscripts/make_lazy_extractors.py + python3.8 pyinst.py + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | # run-on-arch-action designates armv7l as armv7 + repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} + + build_macos: runs-on: macos-11 needs: prepare @@ -194,8 +237,8 @@ jobs: python-version: '3.8' - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds - python -m pip install --upgrade pip setuptools wheel py2exe - pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt + python -m pip install -U pip setuptools wheel py2exe + pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt - name: Prepare run: | @@ -230,8 +273,8 @@ jobs: architecture: 'x86' - name: Install Requirements run: | - python -m pip install --upgrade pip setuptools wheel - pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt + python -m pip install -U pip setuptools wheel + pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt - name: Prepare run: | @@ -252,7 +295,7 @@ jobs: permissions: contents: write # for action-gh-release runs-on: ubuntu-latest - needs: [prepare, build_unix, build_windows, build_windows32, build_macos, build_macos_legacy] + needs: [prepare, build_unix, build_linux_arm, build_windows, build_windows32, build_macos, build_macos_legacy] steps: - uses: actions/checkout@v3 @@ -279,6 +322,8 @@ jobs: sha256sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-256SUMS sha256sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-256SUMS + sha256sum artifact/yt-dlp_linux_armv7l | awk '{print $1 " yt-dlp_linux_armv7l"}' >> SHA2-256SUMS + sha256sum artifact/yt-dlp_linux_aarch64 | awk '{print $1 " yt-dlp_linux_aarch64"}' >> SHA2-256SUMS sha256sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-256SUMS sha256sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-256SUMS sha512sum artifact/yt-dlp | awk '{print $1 " yt-dlp"}' >> SHA2-512SUMS @@ -290,6 +335,8 @@ jobs: sha512sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-512SUMS sha512sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-512SUMS + sha512sum artifact/yt-dlp_linux_armv7l | awk '{print $1 " yt-dlp_linux_armv7l"}' >> SHA2-512SUMS + sha512sum artifact/yt-dlp_linux_aarch64 | awk '{print $1 " yt-dlp_linux_aarch64"}' >> SHA2-512SUMS sha512sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-512SUMS sha512sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-512SUMS @@ -322,6 +369,8 @@ jobs: artifact/yt-dlp_macos artifact/yt-dlp_macos.zip artifact/yt-dlp_macos_legacy + artifact/yt-dlp_linux_armv7l + artifact/yt-dlp_linux_aarch64 artifact/dist/yt-dlp_linux artifact/dist/yt-dlp_linux.zip _update_spec diff --git a/README.md b/README.md index e9ea99ebf..aac359ab9 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,8 @@ File|Description [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows)) [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update) +[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary +[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) [yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) [yt-dlp_macos_legacy](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos_legacy)|MacOS (10.9+) standalone x64 executable diff --git a/pyinst.py b/pyinst.py index 0b7c66a30..17c950563 100644 --- a/pyinst.py +++ b/pyinst.py @@ -12,9 +12,8 @@ from PyInstaller.__main__ import run as run_pyinstaller from devscripts.utils import read_version -OS_NAME, MACHINE, ARCH = sys.platform, platform.machine(), platform.architecture()[0][:2] -if MACHINE in ('x86_64', 'AMD64') or ('i' in MACHINE and '86' in MACHINE): - # NB: Windows x86 has MACHINE = AMD64 irrespective of bitness +OS_NAME, MACHINE, ARCH = sys.platform, platform.machine().lower(), platform.architecture()[0][:2] +if MACHINE in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): MACHINE = 'x86' if ARCH == '32' else '' @@ -63,7 +62,7 @@ def exe(onedir): name = '_'.join(filter(None, ( 'yt-dlp', {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), - MACHINE + MACHINE, ))) return name, ''.join(filter(None, ( 'dist/', diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 70a1d6f7f..6208aad8a 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -15,6 +15,7 @@ from .utils import ( Popen, cached_method, deprecation_warning, + remove_end, shell_quote, system_identifier, traverse_obj, @@ -35,9 +36,14 @@ def _get_variant_and_executable_path(): return 'py2exe', path if sys._MEIPASS == os.path.dirname(path): return f'{sys.platform}_dir', path - if sys.platform == 'darwin' and version_tuple(platform.mac_ver()[0]) < (10, 15): - return 'darwin_legacy_exe', path - return f'{sys.platform}_exe', path + if sys.platform == 'darwin': + machine = '_legacy' if version_tuple(platform.mac_ver()[0]) < (10, 15) else '' + else: + machine = f'_{platform.machine().lower()}' + # Ref: https://en.wikipedia.org/wiki/Uname#Examples + if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): + machine = '_x86' if platform.architecture()[0][:2] == '32' else '' + return f'{remove_end(sys.platform, "32")}{machine}_exe', path path = os.path.dirname(__file__) if isinstance(__loader__, zipimporter): @@ -68,10 +74,13 @@ def current_git_head(): _FILE_SUFFIXES = { 'zip': '', 'py2exe': '_min.exe', - 'win32_exe': '.exe', + 'win_exe': '.exe', + 'win_x86_exe': '_x86.exe', 'darwin_exe': '_macos', 'darwin_legacy_exe': '_macos_legacy', 'linux_exe': '_linux', + 'linux_aarch64_exe': '_linux_aarch64', + 'linux_armv7l_exe': '_linux_armv7l', } _NON_UPDATEABLE_REASONS = { @@ -161,10 +170,7 @@ class Updater: @functools.cached_property def release_name(self): """The release filename""" - label = _FILE_SUFFIXES[detect_variant()] - if label and platform.architecture()[0][:2] == '32': - label = f'_x86{label}' - return f'yt-dlp{label}' + return f'yt-dlp{_FILE_SUFFIXES[detect_variant()]}' @functools.cached_property def release_hash(self): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index b7e7cb7d7..4c44f4845 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2006,9 +2006,10 @@ def system_identifier(): with contextlib.suppress(OSError): # We may not have access to the executable libc_ver = platform.libc_ver() - return 'Python %s (%s %s) - %s (%s%s)' % ( + return 'Python %s (%s %s %s) - %s (%s%s)' % ( platform.python_version(), python_implementation, + platform.machine(), platform.architecture()[0], platform.platform(), ssl.OPENSSL_VERSION, From a6858cda296b532db3fd7bcfc4f960f9b2fdf30a Mon Sep 17 00:00:00 2001 From: mlampe <mlampe0@googlemail.com> Date: Fri, 11 Nov 2022 02:58:23 +0100 Subject: [PATCH 1750/2552] [build] Make linux binary truly standalone using `conda` (#5423) Authored by: mlampe --- .github/workflows/build.yml | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 46a775b4d..49b9411fd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,26 +50,43 @@ jobs: build_unix: needs: prepare - runs-on: ubuntu-18.04 # Standalone executable should be built on minimum supported OS + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: '3.10' + - uses: conda-incubator/setup-miniconda@v2 + with: + miniforge-variant: Mambaforge + use-mamba: true + channels: conda-forge + auto-update-conda: true + activate-environment: '' + auto-activate-base: false - name: Install Requirements run: | - sudo apt-get -y install zip pandoc man - python -m pip install --upgrade pip setuptools wheel twine - python -m pip install Pyinstaller -r requirements.txt + sudo apt-get -y install zip pandoc man sed + python -m pip install -U pip setuptools wheel twine + python -m pip install -U Pyinstaller -r requirements.txt + reqs=$(mktemp) + echo -e 'python=3.10.*\npyinstaller' >$reqs + sed 's/^brotli.*/brotli-python/' <requirements.txt >>$reqs + mamba create -n build --file $reqs - name: Prepare run: | python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} python devscripts/make_lazy_extractors.py - - name: Build Unix executables + - name: Build Unix platform-independent binary run: | make all tar + - name: Build Unix standalone binary + shell: bash -l {0} + run: | + unset LD_LIBRARY_PATH # Harmful; set by setup-python + conda activate build python pyinst.py --onedir (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .) python pyinst.py From f7fc8d39e99d5b0683ac48a876618a5495a9ef5e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 11 Nov 2022 03:39:41 +0530 Subject: [PATCH 1751/2552] [extractor] Fix `fatal=False` for `_search_nuxt_data` Closes #5423 --- yt_dlp/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 34650cf4e..570f8195c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1647,7 +1647,10 @@ class InfoExtractor: FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)' js, arg_keys, arg_vals = self._search_regex( (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'), - webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), fatal=fatal) + webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), + default=NO_DEFAULT if fatal else (None, None, None)) + if js is None: + return {} args = dict(zip(arg_keys.split(','), arg_vals.split(','))) From bd7e919a75cd264daabbe50137b2a7c89390c68c Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 11 Nov 2022 13:52:40 +0530 Subject: [PATCH 1752/2552] [extractor/youtube:tab] Improvements to tab handling (#5487) * Better handling of direct channel URLs - See https://github.com/yt-dlp/yt-dlp/pull/5439#issuecomment-1309322019 * Prioritize tab id from URL slug - Closes #5486 * Add metadata for the wrapping playlist * Simplify redirect for music playlists --- yt_dlp/extractor/youtube.py | 283 +++++++++++++++++++----------------- 1 file changed, 146 insertions(+), 137 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 5b39f9765..d18a16689 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4263,15 +4263,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): - @staticmethod def passthrough_smuggled_data(func): - def _smuggle(entries, smuggled_data): - for entry in entries: - # TODO: Convert URL to music.youtube instead. - # Do we need to passthrough any other smuggled_data? - entry['url'] = smuggle_url(entry['url'], smuggled_data) - yield entry + def _smuggle(info, smuggled_data): + if info.get('_type') not in ('url', 'url_transparent'): + return info + if smuggled_data.get('is_music_url'): + parsed_url = urllib.parse.urlparse(info['url']) + if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'): + smuggled_data.pop('is_music_url') + info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com')) + if smuggled_data: + info['url'] = smuggle_url(info['url'], smuggled_data) + return info @functools.wraps(func) def wrapper(self, url): @@ -4279,8 +4283,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if self.is_music_url(url): smuggled_data['is_music_url'] = True info_dict = func(self, url, smuggled_data) - if smuggled_data and info_dict.get('entries'): - info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data) + if smuggled_data: + _smuggle(info_dict, smuggled_data) + if info_dict.get('entries'): + info_dict['entries'] = (_smuggle(i, smuggled_data) for i in info_dict['entries']) return info_dict return wrapper @@ -4628,28 +4634,33 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict) def _extract_from_tabs(self, item_id, ytcfg, data, tabs): - playlist_id = title = description = channel_url = channel_name = channel_id = None - tags = [] + metadata = self._extract_metadata_from_tabs(item_id, data) selected_tab = self._extract_selected_tab(tabs) - # Deprecated - remove when layout discontinued - primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') - playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict) - metadata_renderer = try_get( - data, lambda x: x['metadata']['channelMetadataRenderer'], dict) - if metadata_renderer: - channel_name = metadata_renderer.get('title') - channel_url = metadata_renderer.get('channelUrl') - channel_id = metadata_renderer.get('externalId') - else: - metadata_renderer = try_get( - data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) + metadata['title'] += format_field(selected_tab, 'title', ' - %s') + metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s') + + return self.playlist_result( + self._entries( + selected_tab, metadata['id'], ytcfg, + self._extract_account_syncid(ytcfg, data), + self._extract_visitor_data(data, ytcfg)), + **metadata) + def _extract_metadata_from_tabs(self, item_id, data): + info = {'id': item_id} + + metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict) if metadata_renderer: - title = metadata_renderer.get('title') - description = metadata_renderer.get('description', '') - playlist_id = channel_id - tags = metadata_renderer.get('keywords', '').split() + info.update({ + 'uploader': metadata_renderer.get('title'), + 'uploader_id': metadata_renderer.get('externalId'), + 'uploader_url': metadata_renderer.get('channelUrl'), + }) + if info['uploader_id']: + info['id'] = info['uploader_id'] + else: + metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict) # We can get the uncropped banner/avatar by replacing the crop params with '=s0' # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714 @@ -4667,7 +4678,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): }) channel_banners = self._extract_thumbnails( - data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner'])) + data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) for banner in channel_banners: banner['preference'] = -10 @@ -4680,78 +4691,64 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'preference': -5 }) - # Deprecated - remove when old layout is discontinued + # Deprecated - remove primary_sidebar_renderer when layout discontinued + primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') + playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict) + primary_thumbnails = self._extract_thumbnails( primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail')) - playlist_thumbnails = self._extract_thumbnails( playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail')) - if playlist_id is None: - playlist_id = item_id + info.update({ + 'title': (traverse_obj(metadata_renderer, 'title') + or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) + or info['id']), + 'availability': self._extract_availability(data), + 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), + 'description': try_get(metadata_renderer, lambda x: x.get('description', '')), + 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()), + 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners, + }) - # Deprecated - remove primary_sidebar_renderer when old layout discontinued # Playlist stats is a text runs array containing [video count, view count, last updated]. # last updated or (view count and last updated) may be missing. playlist_stats = get_first( - (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'),)) + (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), )) + last_updated_unix = self._parse_time_text( self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text'))) + info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d') - view_count = self._get_count(playlist_stats, 1) - if view_count is None: - view_count = self._get_count(playlist_header_renderer, 'viewCountText') - - playlist_count = self._get_count(playlist_stats, 0) - if playlist_count is None: - playlist_count = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text')) - - if title is None: - title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id - title += format_field(selected_tab, 'title', ' - %s') - title += format_field(selected_tab, 'expandedText', ' - %s') - - metadata = { - 'playlist_id': playlist_id, - 'playlist_title': title, - 'playlist_description': description, - 'uploader': channel_name, - 'uploader_id': channel_id, - 'uploader_url': channel_url, - 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners, - 'tags': tags, - 'view_count': view_count, - 'availability': self._extract_availability(data), - 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'), - 'playlist_count': playlist_count, - 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), - } - if not channel_id: + info['view_count'] = self._get_count(playlist_stats, 1) + if info['view_count'] is None: # 0 is allowed + info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText') + + info['playlist_count'] = self._get_count(playlist_stats, 0) + if info['playlist_count'] is None: # 0 is allowed + info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text')) + + if not info.get('uploader_id'): owner = traverse_obj(playlist_header_renderer, 'ownerText') - if not owner: - # Deprecated + if not owner: # Deprecated owner = traverse_obj( self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'), ('videoOwner', 'videoOwnerRenderer', 'title')) owner_text = self._get_text(owner) browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {} - metadata.update(filter_dict({ + info.update({ 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text), 'uploader_id': browse_ep.get('browseId'), 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl')) - })) + }) - metadata.update({ - 'channel': metadata['uploader'], - 'channel_id': metadata['uploader_id'], - 'channel_url': metadata['uploader_url']}) - return self.playlist_result( - self._entries( - selected_tab, playlist_id, ytcfg, - self._extract_account_syncid(ytcfg, data), - self._extract_visitor_data(data, ytcfg)), - **metadata) + info.update({ + 'channel': info['uploader'], + 'channel_id': info['uploader_id'], + 'channel_url': info['uploader_url'] + }) + return info def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg): first_id = last_id = response = None @@ -5562,10 +5559,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'description': '', 'availability': 'public', }, - 'expected_warnings': [ - 'The URL does not have a videos tab', - r'[Uu]navailable videos (are|will be) hidden', - ], 'playlist_mincount': 101, }, { # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg) @@ -5773,7 +5766,16 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', 'info_dict': { 'id': 'UCK9V2B22uJYu3N7eR_BT9QA', - 'title': 'Uploads for UCK9V2B22uJYu3N7eR_BT9QA' + 'title': 'Polka Ch. 尾丸ポルカ', + 'channel_follower_count': int, + 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA', + 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', + 'uploader': 'Polka Ch. 尾丸ポルカ', + 'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9', + 'channel': 'Polka Ch. 尾丸ポルカ', + 'tags': 'count:35', + 'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', + 'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA', }, 'playlist_count': 3, }, { @@ -5929,15 +5931,18 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): tab_url = urljoin(base_url, traverse_obj( tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'))) - tab_id = (traverse_obj(tab, 'tabIdentifier', expected_type=str) - or tab_url and self._get_url_mobj(tab_url)['tab'][1:]) + tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:] + or traverse_obj(tab, 'tabIdentifier', expected_type=str)) if tab_id: - return tab_id, tab_name + return { + 'TAB_ID_SPONSORSHIPS': 'membership', + }.get(tab_id, tab_id), tab_name # Fallback to tab name if we cannot get the tab id. # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel # Note that in the case of translated tab name this may result in an empty string, which we don't want. - self.write_debug(f'Falling back to selected tab name: {tab_name}') + if tab_name: + self.write_debug(f'Falling back to selected tab name: {tab_name}') return { 'home': 'featured', 'live': 'streams', @@ -5955,47 +5960,43 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): mobj = self._get_url_mobj(url) pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel'] - if is_channel: - if smuggled_data.get('is_music_url'): - if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist - item_id = item_id[2:] - pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False - elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist - mdata = self._extract_tab_endpoint( - f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music') - murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), - get_all=False, expected_type=str) - if not murl: - raise ExtractorError('Failed to resolve album to playlist') - return self.url_result(murl, YoutubeTabIE) - elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/ - pre = f'https://www.youtube.com/channel/{item_id}' - - original_tab_id = tab[1:] + if is_channel and smuggled_data.get('is_music_url'): + if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist + return self.url_result( + f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:]) + elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist + mdata = self._extract_tab_endpoint( + f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music') + murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), + get_all=False, expected_type=str) + if not murl: + raise ExtractorError('Failed to resolve album to playlist') + return self.url_result(murl, YoutubeTabIE) + elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/ + return self.url_result( + f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id) + + original_tab_id, display_id = tab[1:], f'{item_id}{tab}' if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts: - tab = '/videos' - - url = ''.join((pre, tab, post)) - mobj = self._get_url_mobj(url) + url = f'{pre}/videos{post}' # Handle both video/playlist URLs qs = parse_qs(url) - video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list')) - + video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')] if not video_id and mobj['not_channel'].startswith('watch'): if not playlist_id: # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable - raise ExtractorError('Unable to recognize tab page') + raise ExtractorError('A video URL was given without video ID', expected=True) # Common mistake: https://www.youtube.com/watch?list=playlist_id self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}') - url = f'https://www.youtube.com/playlist?list={playlist_id}' - mobj = self._get_url_mobj(url) + return self.url_result( + f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id) if not self._yes_playlist(playlist_id, video_id): return self.url_result( f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id) - data, ytcfg = self._extract_data(url, item_id) + data, ytcfg = self._extract_data(url, display_id) # YouTube may provide a non-standard redirect to the regional channel # See: https://github.com/yt-dlp/yt-dlp/issues/2694 @@ -6003,28 +6004,26 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): redirect_url = traverse_obj( data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False) if redirect_url and 'no-youtube-channel-redirect' not in compat_opts: - redirect_url = ''.join(( - urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post'])) + redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post)) self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}') return self.url_result(redirect_url, YoutubeTabIE) - tab_results = [] - tabs = self._extract_tab_renderers(data) + tabs, extra_tabs = self._extract_tab_renderers(data), [] if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts: selected_tab = self._extract_selected_tab(tabs) selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}') if not original_tab_id and selected_tab_name: - self.to_screen('Channel URLs download all uploads of the channel. ' + self.to_screen('Downloading all uploads of the channel. ' 'To download only the videos in a specific tab, pass the tab\'s URL') if self._has_tab(tabs, 'streams'): - tab_results.append(self.url_result(''.join((pre, '/streams', post)))) + extra_tabs.append(''.join((pre, '/streams', post))) if self._has_tab(tabs, 'shorts'): - tab_results.append(self.url_result(''.join((pre, '/shorts', post)))) + extra_tabs.append(''.join((pre, '/shorts', post))) # XXX: Members-only tab should also be extracted - if not tab_results and selected_tab_id != 'videos': + if not extra_tabs and selected_tab_id != 'videos': # Channel does not have streams, shorts or videos tabs if item_id[:2] != 'UC': raise ExtractorError('This channel has no uploads', expected=True) @@ -6041,43 +6040,53 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): self.to_screen( f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead') - elif tab_results and selected_tab_id != 'videos': + elif extra_tabs and selected_tab_id != 'videos': # When there are shorts/live tabs but not videos tab - url, data = ''.join((pre, post)), None + url, data = f'{pre}{post}', None elif (original_tab_id or 'videos') != selected_tab_id: if original_tab_id == 'live': # Live tab should have redirected to the video # Except in the case the channel has an actual live tab # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live - raise UserNotLive(video_id=mobj['id']) + raise UserNotLive(video_id=item_id) elif selected_tab_name: raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True) # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg url = f'{pre}{post}' - self.write_debug(f'Final URL: {url}') - # YouTube sometimes provides a button to reload playlist with unavailable videos. if 'no-youtube-unavailable-videos' not in compat_opts: - data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data + data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data self._extract_and_report_alerts(data, only_once=True) - tabs = self._extract_tab_renderers(data) + tabs, entries = self._extract_tab_renderers(data), [] if tabs: - tab_results[:0] = [self._extract_from_tabs(item_id, ytcfg, data, tabs)] - tab_results[0].update({ + entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)] + entries[0].update({ 'extractor_key': YoutubeTabIE.ie_key(), 'extractor': YoutubeTabIE.IE_NAME, 'webpage_url': url, }) - - if len(tab_results) == 1: - return tab_results[0] - elif len(tab_results) > 1: - return self.playlist_result(tab_results, item_id, title=f'Uploads for {item_id}') - + if self.get_param('playlist_items') == '0': + entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs) + else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result` + entries.extend(map(self._real_extract, extra_tabs)) + + if len(entries) == 1: + return entries[0] + elif entries: + metadata = self._extract_metadata_from_tabs(item_id, data) + uploads_url = 'the Uploads (UU) playlist URL' + if try_get(metadata, lambda x: x['channel_id'].startswith('UC')): + uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}' + self.to_screen( + 'Downloading as multiple playlists, separated by tabs. ' + f'To download as a single playlist instead, pass {uploads_url}') + return self.playlist_result(entries, item_id, **metadata) + + # Inline playlist playlist = traverse_obj( data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict) if playlist: @@ -6086,7 +6095,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): video_id = traverse_obj( data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id if video_id: - if mobj['tab'] != '/live': # live tab is expected to redirect to video + if tab != '/live': # live tab is expected to redirect to video self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}') return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id) From e4221b700f01acd96fe6a03c20d57c59be6f1f7f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 11 Nov 2022 08:54:57 +0000 Subject: [PATCH 1753/2552] Fix `--list` options not implying `-s` in some cases (#5296) Authored by: bashonly, Grub4K --- yt_dlp/YoutubeDL.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 92b802da6..1efcfc2e4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -672,6 +672,13 @@ class YoutubeDL: else: self.params['nooverwrites'] = not self.params['overwrites'] + if self.params.get('simulate') is None and any(( + self.params.get('list_thumbnails'), + self.params.get('listformats'), + self.params.get('listsubtitles'), + )): + self.params['simulate'] = 'list_only' + self.params.setdefault('forceprint', {}) self.params.setdefault('print_to_file', {}) @@ -2643,8 +2650,7 @@ class YoutubeDL: # The pre-processors may have modified the formats formats = self._get_formats(info_dict) - list_only = self.params.get('simulate') is None and ( - self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) + list_only = self.params.get('simulate') == 'list_only' interactive_format_selection = not list_only and self.format_selector == '-' if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) From 7c8c63529ec32371a9b8b8cf48ea481ec239761b Mon Sep 17 00:00:00 2001 From: Timendum <timedum@gmail.com> Date: Fri, 11 Nov 2022 10:03:17 +0100 Subject: [PATCH 1754/2552] [extractor/cinetecamilano] Add extractor (#5279) Closes #5031 Authored by: timendum --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/cinetecamilano.py | 61 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 yt_dlp/extractor/cinetecamilano.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 053ef44ae..4ec0cf9f9 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -319,6 +319,7 @@ from .chirbit import ( ) from .cinchcast import CinchcastIE from .cinemax import CinemaxIE +from .cinetecamilano import CinetecaMilanoIE from .ciscolive import ( CiscoLiveSessionIE, CiscoLiveSearchIE, diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py new file mode 100644 index 000000000..5e770ebac --- /dev/null +++ b/yt_dlp/extractor/cinetecamilano.py @@ -0,0 +1,61 @@ +import json +import urllib.error +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + parse_iso8601, + strip_or_none, + traverse_obj, + try_get, + urljoin, +) + + +class CinetecaMilanoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cinetecamilano\.it/film/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.cinetecamilano.it/film/1942', + 'info_dict': { + 'id': '1942', + 'ext': 'mp4', + 'title': 'Il draghetto Gris\u00f9 (4 episodi)', + 'release_date': '20220129', + 'thumbnail': r're:.+\.png', + 'description': 'md5:5328cbe080b93224712b6f17fcaf2c01', + 'modified_date': '20200520', + 'duration': 3139, + 'release_timestamp': 1643446208, + 'modified_timestamp': int + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + try: + film_json = self._download_json( + f'https://www.cinetecamilano.it/api/catalogo/{video_id}/?', + video_id, headers={ + 'Referer': url, + 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '' + }) + except ExtractorError as e: + if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500) + or isinstance(e.cause, json.JSONDecodeError)): + self.raise_login_required(method='cookies') + raise + if not film_json.get('success') or not film_json.get('archive'): + raise ExtractorError('Video information not found') + archive = film_json['archive'] + + return { + 'id': video_id, + 'title': archive.get('title'), + 'description': strip_or_none(archive.get('description')), + 'duration': float_or_none(archive.get('duration'), invscale=60), + 'release_timestamp': parse_iso8601(archive.get('updated_at'), delimiter=' '), + 'modified_timestamp': parse_iso8601(archive.get('created_at'), delimiter=' '), + 'thumbnail': urljoin(url, try_get(archive, lambda x: x['thumb']['src'].replace('/public/', '/storage/'))), + 'formats': self._extract_m3u8_formats( + urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4') + } From f4b2c59cfe8368e629f2f4c8c2e66dec9a7f8873 Mon Sep 17 00:00:00 2001 From: Vitaly Khabarov <vitkhab@users.noreply.github.com> Date: Fri, 11 Nov 2022 12:36:23 +0300 Subject: [PATCH 1755/2552] [extractor/YleAreena] Add extractor (#5270) Closes #2508 Authored by: vitkhab, pukkandan --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/yle_areena.py | 71 +++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 yt_dlp/extractor/yle_areena.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4ec0cf9f9..78555c05c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2266,6 +2266,7 @@ from .yandexvideo import ( from .yapfiles import YapFilesIE from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE +from .yle_areena import YleAreenaIE from .ynet import YnetIE from .youjizz import YouJizzIE from .youku import ( diff --git a/yt_dlp/extractor/yle_areena.py b/yt_dlp/extractor/yle_areena.py new file mode 100644 index 000000000..118dc1262 --- /dev/null +++ b/yt_dlp/extractor/yle_areena.py @@ -0,0 +1,71 @@ +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import int_or_none, traverse_obj, url_or_none + + +class YleAreenaIE(InfoExtractor): + _VALID_URL = r'https?://areena\.yle\.fi/(?P<id>[\d-]+)' + _TESTS = [{ + 'url': 'https://areena.yle.fi/1-4371942', + 'md5': '932edda0ecf5dfd6423804182d32f8ac', + 'info_dict': { + 'id': '0_a3tjk92c', + 'ext': 'mp4', + 'title': 'Pouchit', + 'description': 'md5:d487309c3abbe5650265bbd1742d2f82', + 'series': 'Modernit miehet', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 2', + 'episode_number': 2, + 'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061', + 'uploader_id': 'ovp@yle.fi', + 'duration': 1435, + 'view_count': int, + 'upload_date': '20181204', + 'timestamp': 1543916210, + 'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]}, + 'age_limit': 7, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={}) + video_data = self._download_json( + f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b', + video_id) + + # Example title: 'K1, J2: Pouchit | Modernit miehet' + series, season_number, episode_number, episode = self._search_regex( + r'K(?P<season_no>[\d]+),\s*J(?P<episode_no>[\d]+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)', + info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'), + default=(None, None, None, None)) + description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str) + + subtitles = {} + for sub in traverse_obj(video_data, ('data', 'ongoing_ondemand', 'subtitles', ...)): + if url_or_none(sub.get('uri')): + subtitles.setdefault(sub.get('language') or 'und', []).append({ + 'url': sub['uri'], + 'ext': 'srt', + 'name': sub.get('kind'), + }) + + return { + '_type': 'url_transparent', + 'url': 'kaltura:1955031:%s' % traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id')), + 'ie_key': KalturaIE.ie_key(), + 'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str) + or episode or info.get('title')), + 'description': description, + 'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str) + or series), + 'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None)) + or int(season_number)), + 'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none) + or int(episode_number)), + 'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})), + 'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none), + 'subtitles': subtitles, + } From 8522226d2fea04d48802a9ef402438ff79227fe4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 11 Nov 2022 14:08:12 +0530 Subject: [PATCH 1756/2552] [ThumbnailsConvertor] Fix filename escaping Closes #4604 Authored by: pukkandan, dirkf --- yt_dlp/postprocessor/ffmpeg.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 76f9d29c5..7d55373e1 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -1081,9 +1081,9 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): @staticmethod def _options(target_ext): + yield from ('-update', '1') if target_ext == 'jpg': - return ['-bsf:v', 'mjpeg2jpeg'] - return [] + yield from ('-bsf:v', 'mjpeg2jpeg') def convert_thumbnail(self, thumbnail_filename, target_ext): thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext) @@ -1092,7 +1092,7 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): _, source_ext = os.path.splitext(thumbnail_filename) self.real_run_ffmpeg( [(thumbnail_filename, [] if source_ext == '.gif' else ['-f', 'image2', '-pattern_type', 'none'])], - [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))]) + [(thumbnail_conv_filename, self._options(target_ext))]) return thumbnail_conv_filename def run(self, info): From 7aaf4cd2a8fd8ecf2123b981782c3d12dce80d78 Mon Sep 17 00:00:00 2001 From: Robert Geislinger <mail@crpykng.de> Date: Fri, 11 Nov 2022 08:43:08 +0530 Subject: [PATCH 1757/2552] [cleanup] Misc Closes #5471, Closes #5312 Authored by: pukkandan, Alienmaster --- README.md | 6 +++++- test/helper.py | 13 +++++-------- yt_dlp/__main__.py | 2 +- yt_dlp/extractor/slideslive.py | 1 + yt_dlp/extractor/testurl.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 2 +- yt_dlp/update.py | 8 ++++---- yt_dlp/utils.py | 4 +--- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index aac359ab9..159329277 100644 --- a/README.md +++ b/README.md @@ -1204,6 +1204,10 @@ To summarize, the general syntax for a field is: Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. +<a id="outtmpl-postprocess-note"></a> + +Note: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete. + The available fields are: - `id` (string): Video identifier @@ -1304,7 +1308,7 @@ Available only when using `--download-sections` and for `chapter:` prefix when u Available only when used in `--print`: - `urls` (string): The URLs of all requested formats, one in each line - - `filename` (string): Name of the video file. Note that the actual filename may be different due to post-processing. Use `--exec echo` to get the name after all postprocessing is complete + - `filename` (string): Name of the video file. Note that the [actual filename may differ](#outtmpl-postprocess-note) - `formats_table` (table): The video format table as printed by `--list-formats` - `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails` - `subtitles_table` (table): The subtitle format table as printed by `--list-subs` diff --git a/test/helper.py b/test/helper.py index 139bdafc3..0b90660ff 100644 --- a/test/helper.py +++ b/test/helper.py @@ -254,14 +254,11 @@ def expect_info_dict(self, got_dict, expected_dict): return v.__name__ else: return repr(v) - info_dict_str = '' - if len(missing_keys) != len(expected_dict): - info_dict_str += ''.join( - f' {_repr(k)}: {_repr(v)},\n' - for k, v in test_info_dict.items() if k not in missing_keys) - - if info_dict_str: - info_dict_str += '\n' + info_dict_str = ''.join( + f' {_repr(k)}: {_repr(v)},\n' + for k, v in test_info_dict.items() if k not in missing_keys) + if info_dict_str: + info_dict_str += '\n' info_dict_str += ''.join( f' {_repr(k)}: {_repr(test_info_dict[k])},\n' for k in missing_keys) diff --git a/yt_dlp/__main__.py b/yt_dlp/__main__.py index ff5d71d3c..78701df8d 100644 --- a/yt_dlp/__main__.py +++ b/yt_dlp/__main__.py @@ -5,7 +5,7 @@ import sys -if __package__ is None and not hasattr(sys, 'frozen'): +if __package__ is None and not getattr(sys, 'frozen', False): # direct call of __main__.py import os.path path = os.path.realpath(os.path.abspath(__file__)) diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index 72ca56057..87d0fec32 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -9,6 +9,7 @@ from ..utils import ( class SlidesLiveIE(InfoExtractor): _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)' + _WORKING = False _TESTS = [{ # video_service_name = YOUTUBE 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index 2bce3b239..dccca1004 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -21,7 +21,7 @@ class TestURLIE(InfoExtractor): matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)] if len(matching_extractors) == 0: - raise ExtractorError('No extractors matching {extractor_id!r} found', expected=True) + raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True) elif len(matching_extractors) > 1: try: # Check for exact match extractor = next( diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 7d55373e1..67890fc31 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -137,7 +137,7 @@ class FFmpegPostProcessor(PostProcessor): path = self._paths.get(prog) if path in self._version_cache: return self._version_cache[path], self._features_cache.get(path, {}) - out = _get_exe_version_output(path, ['-bsfs'], to_screen=self.write_debug) + out = _get_exe_version_output(path, ['-bsfs']) ver = detect_exe_version(out) if out else False if ver: regexs = [ diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 6208aad8a..ac3e28057 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -30,13 +30,13 @@ API_URL = f'https://api.github.com/repos/{REPOSITORY}/releases' @functools.cache def _get_variant_and_executable_path(): """@returns (variant, executable_path)""" - if hasattr(sys, 'frozen'): + if getattr(sys, 'frozen', False): path = sys.executable if not hasattr(sys, '_MEIPASS'): return 'py2exe', path - if sys._MEIPASS == os.path.dirname(path): + elif sys._MEIPASS == os.path.dirname(path): return f'{sys.platform}_dir', path - if sys.platform == 'darwin': + elif sys.platform == 'darwin': machine = '_legacy' if version_tuple(platform.mac_ver()[0]) < (10, 15) else '' else: machine = f'_{platform.machine().lower()}' @@ -288,7 +288,7 @@ class Updater: # There is no sys.orig_argv in py < 3.10. Also, it can be [] when frozen if getattr(sys, 'orig_argv', None): return sys.orig_argv - elif hasattr(sys, 'frozen'): + elif getattr(sys, 'frozen', False): return sys.argv def restart(self): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4c44f4845..04a0956c9 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2698,9 +2698,7 @@ def check_executable(exe, args=[]): return exe -def _get_exe_version_output(exe, args, *, to_screen=None): - if to_screen: - to_screen(f'Checking exe version: {shell_quote([exe] + args)}') +def _get_exe_version_output(exe, args): try: # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. From 8b644025b1de710339fe317661d71691c115e249 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 11 Nov 2022 16:02:50 +0530 Subject: [PATCH 1758/2552] Release 2022.11.11 --- CONTRIBUTORS | 26 ++++++++++ Changelog.md | 121 ++++++++++++++++++++++++++++++++++++++++++++++ README.md | 2 +- supportedsites.md | 49 +++++++++++++++---- 4 files changed, 187 insertions(+), 11 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 264c087c2..f2a1368ed 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -331,3 +331,29 @@ tannertechnology Timendum tobi1805 TokyoBlackHole +ajayyy +Alienmaster +bsun0000 +changren-wcr +ClosedPort22 +CrankDatSouljaBoy +cruel-efficiency +endotronic +Generator +gibson042 +How-Bout-No +invertico +jahway603 +jwoglom +lksj +megapro17 +mlampe +MrOctopus +nosoop +puc9 +sashashura +schnusch +SG5 +the-marenga +tkgmomosheep +vitkhab diff --git a/Changelog.md b/Changelog.md index d7600b046..657a0722c 100644 --- a/Changelog.md +++ b/Changelog.md @@ -11,6 +11,127 @@ --> +### 2022.11.11 + +* Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128) +* Backport SSL configuration from Python 3.10 by [coletdjnz](https://github.com/coletdjnz) +* Do more processing in `--flat-playlist` +* Fix `--list` options not implying `-s` in some cases by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* Fix end time of clips by [cruel-efficiency](https://github.com/cruel-efficiency) +* Fix for `formats=None` +* Write API params in debug head +* [outtmpl] Ensure ASCII in json and add option for Unicode +* [SponsorBlock] Add `type` field, obey `--retry-sleep extractor`, relax duration check for large segments +* [SponsorBlock] **Support `chapter` category** by [ajayyy](https://github.com/ajayyy), [pukkandan](https://github.com/pukkandan) +* [ThumbnailsConvertor] Fix filename escaping by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [ModifyChapters] Handle the entire video being marked for removal +* [embedthumbnail] Fix thumbnail name in mp3 by [How-Bout-No](https://github.com/How-Bout-No) +* [downloader/fragment] HLS download can continue without first fragment +* [cookies] Improve `LenientSimpleCookie` by [Grub4K](https://github.com/Grub4K) +* [jsinterp] Improve separating regex +* [extractor/common] Fix `fatal=False` for `_search_nuxt_data` +* [extractor/common] Improve `_generic_title` +* [extractor/common] Fix `json_ld` type checks by [Grub4K](https://github.com/Grub4K) +* [extractor/generic] Separate embed extraction into own function +* [extractor/generic:quoted-html] Add extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/unsupported] Raise error on known DRM-only sites by [coletdjnz](https://github.com/coletdjnz) +* [utils] `js_to_json`: Improve escape handling by [Grub4K](https://github.com/Grub4K) +* [utils] `strftime_or_none`: Workaround Python bug on Windows +* [utils] `traverse_obj`: Always return list when branching, allow `re.Match` objects by [Grub4K](https://github.com/Grub4K) +* [build, test] Harden workflows' security by [sashashura](https://github.com/sashashura) +* [build] `py2exe`: Migrate to freeze API by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan) +* [build] Create `armv7l` and `aarch64` releases by [MrOctopus](https://github.com/MrOctopus), [pukkandan](https://github.com/pukkandan) +* [build] Make linux binary truly standalone using `conda` by [mlampe](https://github.com/mlampe) +* [build] Replace `set-output` with `GITHUB_OUTPUT` by [Lesmiscore](https://github.com/Lesmiscore) +* [update] Use error code `100` for update errors +* [compat] Fix `shutils.move` in restricted ACL mode on BSD by [ClosedPort22](https://github.com/ClosedPort22), [pukkandan](https://github.com/pukkandan) +* [docs, devscripts] Document `pyinst`'s argument passthrough by [jahway603](https://github.com/jahway603) +* [test] Allow `extract_flat` in download tests by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [Alienmaster](https://github.com/Alienmaster) +* [extractor/aeon] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/agora] Add extractors by [selfisekai](https://github.com/selfisekai) +* [extractor/camsoda] Add extractor by [zulaport](https://github.com/zulaport) +* [extractor/cinetecamilano] Add extractor by [timendum](https://github.com/timendum) +* [extractor/deuxm] Add extractors by [CrankDatSouljaBoy](https://github.com/CrankDatSouljaBoy) +* [extractor/genius] Add extractors by [bashonly](https://github.com/bashonly) +* [extractor/japandiet] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/listennotes] Add extractor by [lksj](https://github.com/lksj), [pukkandan](https://github.com/pukkandan) +* [extractor/nos.nl] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/oftv] Add extractors by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/podbayfm] Add extractor by [schnusch](https://github.com/schnusch) +* [extractor/qingting] Add extractor by [bashonly](https://github.com/bashonly), [changren-wcr](https://github.com/changren-wcr) +* [extractor/screen9] Add extractor by [tpikonen](https://github.com/tpikonen) +* [extractor/swearnet] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/YleAreena] Add extractor by [pukkandan](https://github.com/pukkandan), [vitkhab](https://github.com/vitkhab) +* [extractor/zeenews] Add extractor by [m4tu4g](https://github.com/m4tu4g), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube:tab] **Update tab handling for redesign** by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) + * Channel URLs download all uploads of the channel as multiple playlists, separated by tab +* [extractor/youtube] Differentiate between no comments and disabled comments by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Extract `concurrent_view_count` for livestreams by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Fix `duration` for premieres by [nosoop](https://github.com/nosoop) +* [extractor/youtube] Fix `live_status` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube] Ignore incomplete data error for comment replies by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Improve chapter parsing from description +* [extractor/youtube] Mark videos as fully watched by [bsun0000](https://github.com/bsun0000) +* [extractor/youtube] Update piped instances by [Generator](https://github.com/Generator) +* [extractor/youtube] Update playlist metadata extraction for new layout by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube:tab] Fix video metadata from tabs by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube:tab] Let `approximate_date` return timestamp +* [extractor/americastestkitchen] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/bbc] Support onion domains by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/bilibili] Add chapters and misc cleanup by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan) +* [extractor/bilibili] Fix BilibiliIE and Bangumi extractors by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan) +* [extractor/bitchute] Better error for geo-restricted videos by [flashdagger](https://github.com/flashdagger) +* [extractor/bitchute] Improve `BitChuteChannelIE` by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) +* [extractor/bitchute] Simplify extractor by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) +* [extractor/cda] Support login through API by [selfisekai](https://github.com/selfisekai) +* [extractor/crunchyroll] Beta is now the only layout by [tejing1](https://github.com/tejing1) +* [extractor/detik] Avoid unnecessary extraction +* [extractor/doodstream] Remove extractor +* [extractor/dplay] Add MotorTrendOnDemand extractor by [bashonly](https://github.com/bashonly) +* [extractor/epoch] Support videos without data-trailer by [gibson042](https://github.com/gibson042), [pukkandan](https://github.com/pukkandan) +* [extractor/fox] Extract thumbnail by [vitkhab](https://github.com/vitkhab) +* [extractor/foxnews] Add `FoxNewsVideo` extractor +* [extractor/hotstar] Add season support by [m4tu4g](https://github.com/m4tu4g) +* [extractor/hotstar] Refactor v1 API calls +* [extractor/iprima] Make json+ld non-fatal by [bashonly](https://github.com/bashonly) +* [extractor/iq] Increase phantomjs timeout +* [extractor/kaltura] Support playlists by [jwoglom](https://github.com/jwoglom), [pukkandan](https://github.com/pukkandan) +* [extractor/lbry] Authenticate with cookies by [flashdagger](https://github.com/flashdagger) +* [extractor/livestreamfails] Support posts by [invertico](https://github.com/invertico) +* [extractor/mlb] Add `MLBArticle` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/mxplayer] Improve extractor by [m4tu4g](https://github.com/m4tu4g) +* [extractor/niconico] Always use HTTPS for requests +* [extractor/nzherald] Support new video embed by [coletdjnz](https://github.com/coletdjnz) +* [extractor/odnoklassniki] Support boosty.to embeds by [Lesmiscore](https://github.com/Lesmiscore), [megapro17](https://github.com/megapro17), [pukkandan](https://github.com/pukkandan) +* [extractor/paramountplus] Update API token by [bashonly](https://github.com/bashonly) +* [extractor/reddit] Add fallback format by [bashonly](https://github.com/bashonly) +* [extractor/redgifs] Fix extractors by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/redgifs] Refresh auth token for 401 by [endotronic](https://github.com/endotronic), [pukkandan](https://github.com/pukkandan) +* [extractor/rumble] Add HLS formats and extract more metadata by [flashdagger](https://github.com/flashdagger) +* [extractor/sbs] Improve `_VALID_URL` by [bashonly](https://github.com/bashonly) +* [extractor/skyit] Fix extractors by [nixxo](https://github.com/nixxo) +* [extractor/stripchat] Fix hostname for HLS stream by [zulaport](https://github.com/zulaport) +* [extractor/stripchat] Improve error message by [freezboltz](https://github.com/freezboltz) +* [extractor/telegram] Add playlist support and more metadata by [bashonly](https://github.com/bashonly), [bsun0000](https://github.com/bsun0000) +* [extractor/Tnaflix] Fix for HTTP 500 by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan) +* [extractor/tubitv] Better DRM detection by [bashonly](https://github.com/bashonly) +* [extractor/tvp] Update extractors by [selfisekai](https://github.com/selfisekai) +* [extractor/twitcasting] Fix `data-movie-playlist` extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/twitter] Add onion site to `_VALID_URL` by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/twitter] Add Spaces extractor and GraphQL API by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +* [extractor/twitter] Support multi-video posts by [Grub4K](https://github.com/Grub4K) +* [extractor/uktvplay] Fix `_VALID_URL` +* [extractor/viu] Support subtitles of on-screen text by [tkgmomosheep](https://github.com/tkgmomosheep) +* [extractor/VK] Fix playlist URLs by [the-marenga](https://github.com/the-marenga) +* [extractor/vlive] Extract `release_timestamp` +* [extractor/voot] Improve `_VALID_URL` by [freezboltz](https://github.com/freezboltz) +* [extractor/wordpress:mb.miniAudioPlayer] Add embed extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/YoutubeWebArchive] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz) +* [extractor/zee5] Improve `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g) +* [extractor/zenyandex] Fix extractors by [lksj](https://github.com/lksj), [puc9](https://github.com/puc9), [pukkandan](https://github.com/pukkandan) + + ### 2022.10.04 * Allow a `set` to be passed as `download_archive` by [pukkandan](https://github.com/pukkandan), [bashonly](https://github.com/bashonly) diff --git a/README.md b/README.md index 159329277..13a2c17c7 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t # NEW FEATURES -* Merged with **youtube-dl v2021.12.17+ [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7b74ac77f87ca5ed6cb5e964a0c6a0678)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) +* Merged with **youtube-dl v2021.12.17+ [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)** <!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API diff --git a/supportedsites.md b/supportedsites.md index 44fc1d484..d7565c139 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -35,7 +35,7 @@ - **acast:channel** - **AcFunBangumi** - **AcFunVideo** - - **ADN**: [<abbr title="netrc machine"><em>animedigitalnetwork</em></abbr>] Anime Digital Network + - **ADN**: [<abbr title="netrc machine"><em>animationdigitalnetwork</em></abbr>] Animation Digital Network - **AdobeConnect** - **adobetv** - **adobetv:channel** @@ -46,6 +46,7 @@ - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault - **aenetworks:collection** - **aenetworks:show** + - **AeonCo** - **afreecatv**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com - **afreecatv:live**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com - **afreecatv:user** @@ -119,7 +120,6 @@ - **Bandcamp:album** - **Bandcamp:user** - **Bandcamp:weekly** - - **bangumi.bilibili.com**: BiliBili番剧 - **BannedVideo** - **bbc**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC - **bbc.co.uk**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC iPlayer @@ -149,6 +149,8 @@ - **Bilibili category extractor** - **BilibiliAudio** - **BilibiliAudioAlbum** + - **BiliBiliBangumi** + - **BiliBiliBangumiMedia** - **BiliBiliPlayer** - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix - **BilibiliSpaceAudio** @@ -195,6 +197,7 @@ - **Camdemy** - **CamdemyFolder** - **CamModels** + - **Camsoda** - **CamtasiaEmbed** - **CamWithHer** - **CanalAlpha** @@ -218,7 +221,7 @@ - **cbssports:embed** - **CCMA** - **CCTV**: 央视网 - - **CDA** + - **CDA**: [<abbr title="netrc machine"><em>cdapl</em></abbr>] - **Cellebrite** - **CeskaTelevize** - **CGTN** @@ -233,6 +236,7 @@ - **cielotv.it** - **Cinchcast** - **Cinemax** + - **CinetecaMilano** - **CiscoLiveSearch** - **CiscoLiveSession** - **ciscowebex**: Cisco Webex @@ -272,9 +276,7 @@ - **CrowdBunker** - **CrowdBunkerChannel** - **crunchyroll**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - - **crunchyroll:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **crunchyroll:playlist**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - - **crunchyroll:​playlist:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 @@ -311,6 +313,8 @@ - **democracynow** - **DestinationAmerica** - **DetikEmbed** + - **DeuxM** + - **DeuxMNews** - **DHM**: Filmarchiv - Deutsches Historisches Museum - **Digg** - **DigitalConcertHall**: [<abbr title="netrc machine"><em>digitalconcerthall</em></abbr>] DigitalConcertHall extractor @@ -328,7 +332,6 @@ - **DIYNetwork** - **dlive:stream** - **dlive:vod** - - **DoodStream** - **Dotsub** - **Douyin** - **DouyuShow** @@ -422,6 +425,7 @@ - **Foxgay** - **foxnews**: Fox News and Fox Business Video - **foxnews:article** + - **FoxNewsVideo** - **FoxSports** - **fptplay**: fptplay.vn - **FranceCulture** @@ -463,6 +467,8 @@ - **gem.cbc.ca**: [<abbr title="netrc machine"><em>cbcgem</em></abbr>] - **gem.cbc.ca:live** - **gem.cbc.ca:playlist** + - **Genius** + - **GeniusLyrics** - **Gettr** - **GettrStreaming** - **Gfycat** @@ -518,6 +524,7 @@ - **HotNewHipHop** - **hotstar** - **hotstar:playlist** + - **hotstar:season** - **hotstar:series** - **Howcast** - **HowStuffWorks** @@ -655,6 +662,7 @@ - **linkedin:​learning:course**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] - **LinuxAcademy**: [<abbr title="netrc machine"><em>linuxacademy</em></abbr>] - **Liputan6** + - **ListenNotes** - **LiTV** - **LiveJournal** - **livestream** @@ -736,6 +744,7 @@ - **mixcloud:playlist** - **mixcloud:user** - **MLB** + - **MLBArticle** - **MLBTV**: [<abbr title="netrc machine"><em>mlb</em></abbr>] - **MLBVideo** - **MLSSoccer** @@ -753,6 +762,7 @@ - **MotherlessGroup** - **Motorsport**: motorsport.com - **MotorTrend** + - **MotorTrendOnDemand** - **MovieClips** - **MovieFap** - **Moviepilot** @@ -881,6 +891,7 @@ - **NoodleMagazine** - **Noovo** - **Normalboots** + - **NOSNLArticle** - **NosVideo** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** @@ -915,6 +926,8 @@ - **ocw.mit.edu** - **OdaTV** - **Odnoklassniki** + - **OfTV** + - **OfTVPlaylist** - **OktoberfestTV** - **OlympicsReplay** - **on24**: ON24 @@ -999,6 +1012,8 @@ - **pluralsight**: [<abbr title="netrc machine"><em>pluralsight</em></abbr>] - **pluralsight:course** - **PlutoTV** + - **PodbayFM** + - **PodbayFMChannel** - **Podchaser** - **podomatic** - **Pokemon** @@ -1042,6 +1057,7 @@ - **puhutv:serie** - **Puls4** - **Pyvideo** + - **QingTing** - **qqmusic**: QQ音乐 - **qqmusic:album**: QQ音乐 - 专辑 - **qqmusic:playlist**: QQ音乐 - 歌单 @@ -1164,12 +1180,14 @@ - **SaltTVLive**: [<abbr title="netrc machine"><em>salttv</em></abbr>] - **SaltTVRecordings**: [<abbr title="netrc machine"><em>salttv</em></abbr>] - **SampleFocus** + - **Sangiin**: 参議院インターネット審議中継 (archive) - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au - **schooltv** - **ScienceChannel** - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix + - **Screen9** - **Screencast** - **ScreencastOMatic** - **ScrippsNetworks** @@ -1191,6 +1209,9 @@ - **ShareVideosEmbed** - **ShemarooMe** - **ShowRoomLive** + - **ShugiinItvLive**: 衆議院インターネット審議中継 + - **ShugiinItvLiveRoom**: 衆議院インターネット審議中継 (中継) + - **ShugiinItvVod**: 衆議院インターネット審議中継 (ビデオライブラリ) - **simplecast** - **simplecast:episode** - **simplecast:podcast** @@ -1201,13 +1222,12 @@ - **sky:​news:story** - **sky:sports** - **sky:​sports:news** - - **skyacademy.it** - **SkylineWebcams** - **skynewsarabia:article** - **skynewsarabia:video** - **SkyNewsAU** - **Slideshare** - - **SlidesLive** + - **SlidesLive**: (**Currently broken**) - **Slutload** - **Smotrim** - **Snotr** @@ -1277,6 +1297,7 @@ - **SVTPage** - **SVTPlay**: SVT Play and Öppet arkiv - **SVTSeries** + - **SwearnetEpisode** - **SWRMediathek** - **Syfy** - **SYVDK** @@ -1347,6 +1368,8 @@ - **toggo** - **Tokentube** - **Tokentube:channel** + - **tokfm:audition** + - **tokfm:podcast** - **ToonGoggles** - **tou.tv**: [<abbr title="netrc machine"><em>toutv</em></abbr>] - **Toypics**: Toypics video @@ -1378,7 +1401,6 @@ - **Turbo** - **tv.dfb.de** - **TV2** - - **TV24UAGenericPassthrough** - **TV2Article** - **TV2DK** - **TV2DKBornholmPlay** @@ -1411,8 +1433,9 @@ - **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - - **tvp:series** - **tvp:stream** + - **tvp:vod** + - **tvp:​vod:series** - **TVPlayer** - **TVPlayHome** - **Tweakers** @@ -1431,6 +1454,7 @@ - **twitter:broadcast** - **twitter:card** - **twitter:shortener** + - **twitter:spaces** - **udemy**: [<abbr title="netrc machine"><em>udemy</em></abbr>] - **udemy:course**: [<abbr title="netrc machine"><em>udemy</em></abbr>] - **UDNEmbed**: 聯合影音 @@ -1584,6 +1608,7 @@ - **WistiaChannel** - **WistiaPlaylist** - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **wordpress:mb.miniAudioPlayer** - **wordpress:playlist** - **WorldStarHipHop** - **wppilot** @@ -1591,6 +1616,8 @@ - **WSJ**: Wall Street Journal - **WSJArticle** - **WWE** + - **wyborcza:video** + - **WyborczaPodcast** - **XBef** - **XboxClips** - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing @@ -1627,6 +1654,7 @@ - **YapFiles** - **YesJapan** - **yinyuetai:video**: 音悦Tai + - **YleAreena** - **Ynet** - **YouJizz** - **youku**: 优酷 @@ -1665,6 +1693,7 @@ - **ZDFChannel** - **Zee5**: [<abbr title="netrc machine"><em>zee5</em></abbr>] - **zee5:series** + - **ZeeNews** - **ZenYandex** - **ZenYandexChannel** - **Zhihu** From 5e39fb982ee98f0bd8f020c878cf6921beae6e2e Mon Sep 17 00:00:00 2001 From: github-actions <github-actions@example.com> Date: Fri, 11 Nov 2022 10:37:46 +0000 Subject: [PATCH 1759/2552] [version] update Created by: pukkandan :ci skip all :ci run dl --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 8 ++++---- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 8 ++++---- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 8 ++++---- .github/ISSUE_TEMPLATE/4_bug_report.yml | 8 ++++---- .github/ISSUE_TEMPLATE/5_feature_request.yml | 8 ++++---- .github/ISSUE_TEMPLATE/6_question.yml | 8 ++++---- yt_dlp/version.py | 4 ++-- 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index c4bad101b..3eafd08e5 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -62,7 +62,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe) + [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -70,8 +70,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.10.04, Current version: 2022.10.04 - yt-dlp is up to date (2022.10.04) + Latest version: 2022.11.11, Current version: 2022.11.11 + yt-dlp is up to date (2022.11.11) <more lines> render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 6cbdc8ee8..295a0f254 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -74,7 +74,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe) + [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -82,8 +82,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.10.04, Current version: 2022.10.04 - yt-dlp is up to date (2022.10.04) + Latest version: 2022.11.11, Current version: 2022.11.11 + yt-dlp is up to date (2022.11.11) <more lines> render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 15101e885..6c4e97080 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm requesting a site-specific feature required: true - - label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -70,7 +70,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe) + [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -78,8 +78,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.10.04, Current version: 2022.10.04 - yt-dlp is up to date (2022.10.04) + Latest version: 2022.11.11, Current version: 2022.11.11 + yt-dlp is up to date (2022.11.11) <more lines> render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index aa03087cf..b224f3d32 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -55,7 +55,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe) + [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -63,8 +63,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.10.04, Current version: 2022.10.04 - yt-dlp is up to date (2022.10.04) + Latest version: 2022.11.11, Current version: 2022.11.11 + yt-dlp is up to date (2022.11.11) <more lines> render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 47f6644a4..d58dc2e94 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -20,7 +20,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true @@ -51,7 +51,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe) + [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -59,7 +59,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.10.04, Current version: 2022.10.04 - yt-dlp is up to date (2022.10.04) + Latest version: 2022.11.11, Current version: 2022.11.11 + yt-dlp is up to date (2022.11.11) <more lines> render: shell diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index 996f90679..213bf9156 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -26,7 +26,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true @@ -57,7 +57,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe) + [debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -65,7 +65,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.10.04, Current version: 2022.10.04 - yt-dlp is up to date (2022.10.04) + Latest version: 2022.11.11, Current version: 2022.11.11 + yt-dlp is up to date (2022.11.11) <more lines> render: shell diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 1123205bd..90b5e40ac 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2022.10.04' +__version__ = '2022.11.11' -RELEASE_GIT_HEAD = '4e0511f27' +RELEASE_GIT_HEAD = '8b644025b' VARIANT = None From 08270da5c3454cec1d26c4e34add58158af19a1d Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 11 Nov 2022 16:29:29 +0530 Subject: [PATCH 1760/2552] [extractor/youtube] Fix `ytuser:` --- yt_dlp/extractor/youtube.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d18a16689..1f9feb2d2 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -6293,9 +6293,7 @@ class YoutubeYtUserIE(InfoExtractor): def _real_extract(self, url): user_id = self._match_id(url) - return self.url_result( - 'https://www.youtube.com/user/%s/videos' % user_id, - ie=YoutubeTabIE.ie_key(), video_id=user_id) + return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id) class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): From d9658562350f6aaf9f6deb037734d1cd691a64ce Mon Sep 17 00:00:00 2001 From: Audrey <45548254+tntmod54321@users.noreply.github.com> Date: Fri, 11 Nov 2022 12:58:54 -0500 Subject: [PATCH 1761/2552] [extractor/Veoh] Add user extractor (#5242) Authored by: tntmod54321 --- yt_dlp/extractor/_extractors.py | 5 ++- yt_dlp/extractor/veoh.py | 66 ++++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 78555c05c..c1ab5a964 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2043,7 +2043,10 @@ from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veo import VeoIE -from .veoh import VeohIE +from .veoh import ( + VeohIE, + VeohUserIE +) from .vesti import VestiIE from .vevo import ( VevoIE, diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py index 70280ae85..a32c2fccb 100644 --- a/yt_dlp/extractor/veoh.py +++ b/yt_dlp/extractor/veoh.py @@ -1,9 +1,14 @@ +import functools +import json + from .common import InfoExtractor from ..utils import ( + ExtractorError, + OnDemandPagedList, int_or_none, parse_duration, qualities, - try_get + try_get, ) @@ -123,3 +128,62 @@ class VeohIE(InfoExtractor): 'categories': categories, 'tags': tags.split(', ') if tags else None, } + + +class VeohUserIE(VeohIE): + _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)' + IE_NAME = 'veoh:user' + + _TESTS = [ + { + 'url': 'https://www.veoh.com/users/valentinazoe', + 'info_dict': { + 'id': 'valentinazoe', + 'title': 'valentinazoe (Uploads)' + }, + 'playlist_mincount': 75 + }, + { + 'url': 'https://www.veoh.com/users/PiensaLibre', + 'info_dict': { + 'id': 'PiensaLibre', + 'title': 'PiensaLibre (Uploads)' + }, + 'playlist_mincount': 2 + }] + + _PAGE_SIZE = 16 + + def _fetch_page(self, uploader, page): + response = self._download_json( + 'https://www.veoh.com/users/published/videos', uploader, + note=f'Downloading videos page {page + 1}', + headers={ + 'x-csrf-token': self._TOKEN, + 'content-type': 'application/json;charset=UTF-8' + }, + data=json.dumps({ + 'username': uploader, + 'maxResults': self._PAGE_SIZE, + 'page': page + 1, + 'requestName': 'userPage' + }).encode('utf-8')) + if not response.get('success'): + raise ExtractorError(response['message']) + + for video in response['videos']: + yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE, + video['permalinkId'], video.get('title')) + + def _real_initialize(self): + webpage = self._download_webpage( + 'https://www.veoh.com', None, note='Downloading authorization token') + self._TOKEN = self._search_regex( + r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage, + 'request token', group='token') + + def _real_extract(self, url): + uploader = self._match_id(url) + return self.playlist_result(OnDemandPagedList( + functools.partial(self._fetch_page, uploader), + self._PAGE_SIZE), uploader, f'{uploader} (Uploads)') From bc5c2f8a2c84633940956a27bf2125804f73882e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 11 Nov 2022 23:03:26 +0530 Subject: [PATCH 1762/2552] Fix bugs in `PlaylistEntries` --- yt_dlp/YoutubeDL.py | 9 ++++++--- yt_dlp/utils.py | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1efcfc2e4..32bd5b3dc 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1816,7 +1816,7 @@ class YoutubeDL: elif self.params.get('playlistrandom'): random.shuffle(entries) - self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos' + self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items' f'{format_field(ie_result, "playlist_count", " of %s")}') keep_resolved_entries = self.params.get('extract_flat') != 'discard' @@ -1849,7 +1849,7 @@ class YoutubeDL: resolved_entries[i] = (playlist_index, NO_DEFAULT) continue - self.to_screen('[download] Downloading video %s of %s' % ( + self.to_screen('[download] Downloading item %s of %s' % ( self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) extra.update({ @@ -1867,8 +1867,11 @@ class YoutubeDL: resolved_entries[i] = (playlist_index, entry_result) # Update with processed data - ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT] ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT] + ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT] + if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))): + # Do not set for full playlist + ie_result.pop('requested_entries') # Write the updated info to json if _infojson_written is True and self._write_info_json( diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 04a0956c9..40313f50e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2950,10 +2950,10 @@ class PlaylistEntries: self.is_exhausted = True requested_entries = info_dict.get('requested_entries') - self.is_incomplete = bool(requested_entries) + self.is_incomplete = requested_entries is not None if self.is_incomplete: assert self.is_exhausted - self._entries = [self.MissingEntry] * max(requested_entries) + self._entries = [self.MissingEntry] * max(requested_entries or [0]) for i, entry in zip(requested_entries, entries): self._entries[i - 1] = entry elif isinstance(entries, (list, PagedList, LazyList)): @@ -3022,7 +3022,7 @@ class PlaylistEntries: if not self.is_incomplete: raise self.IndexError() if entry is self.MissingEntry: - raise EntryNotInPlaylist(f'Entry {i} cannot be found') + raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found') return entry else: def get_entry(i): From a8c754cc00a076f8cba84b477312c35a05cddbc4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 12 Nov 2022 00:02:07 +0530 Subject: [PATCH 1763/2552] [extractor/youtube] Fix bug in handling of music URLs Bug in bd7e919a75cd264daabbe50137b2a7c89390c68c Closes #5502 --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1f9feb2d2..c753713c7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4286,7 +4286,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if smuggled_data: _smuggle(info_dict, smuggled_data) if info_dict.get('entries'): - info_dict['entries'] = (_smuggle(i, smuggled_data) for i in info_dict['entries']) + info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries']) return info_dict return wrapper From 0a4b2f4180b57f8e82b5d9c078c070ddfac7c727 Mon Sep 17 00:00:00 2001 From: Elyse <26639800+elyse0@users.noreply.github.com> Date: Sat, 12 Nov 2022 01:13:13 -0600 Subject: [PATCH 1764/2552] [extractor/tencent] Fix geo-restricted video (#5505) Closes #5230 Authored by: elyse0 --- yt_dlp/extractor/tencent.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index 44cd19600..61f300fa4 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -67,9 +67,10 @@ class TencentBaseIE(InfoExtractor): formats, subtitles = [], {} for video_format in video_response['ul']['ui']: - if video_format.get('hls'): + if video_format.get('hls') or determine_ext(video_format['url']) == 'm3u8': fmts, subs = self._extract_m3u8_formats_and_subtitles( - video_format['url'] + video_format['hls']['pt'], video_id, 'mp4', fatal=False) + video_format['url'] + traverse_obj(video_format, ('hls', 'pt'), default=''), + video_id, 'mp4', fatal=False) for f in fmts: f.update({'width': video_width, 'height': video_height}) @@ -187,6 +188,10 @@ class VQQVideoIE(VQQBaseIE): 'thumbnail': r're:^https?://[^?#]+s0043cwsgj0', 'series': '青年理工工作者生活研究所', }, + }, { + # Geo-restricted to China + 'url': 'https://v.qq.com/x/cover/mcv8hkc8zk8lnov/x0036x5qqsr.html', + 'only_matching': True, }] def _real_extract(self, url): From 83cc7b8aae1328b0d148b631357f753c61c38a29 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 13 Nov 2022 08:29:49 +0530 Subject: [PATCH 1765/2552] [utils] `classproperty`: Add cache support --- yt_dlp/utils.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 40313f50e..a6bf897dc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5847,14 +5847,23 @@ def cached_method(f): class classproperty: - """property access for class methods""" + """property access for class methods with optional caching""" + def __new__(cls, func=None, *args, **kwargs): + if not func: + return functools.partial(cls, *args, **kwargs) + return super().__new__(cls) - def __init__(self, func): + def __init__(self, func, *, cache=False): functools.update_wrapper(self, func) self.func = func + self._cache = {} if cache else None def __get__(self, _, cls): - return self.func(cls) + if self._cache is None: + return self.func(cls) + elif cls not in self._cache: + self._cache[cls] = self.func(cls) + return self._cache[cls] class Namespace(types.SimpleNamespace): From 171a31dbe8b59b3bab6a9b0712594228ee1b5234 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 13 Nov 2022 10:56:04 +0530 Subject: [PATCH 1766/2552] [extractor] Add a way to distinguish IEs that returns only videos --- yt_dlp/extractor/common.py | 19 +++++++++++++++++++ yt_dlp/extractor/youtube.py | 1 + 2 files changed, 20 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 570f8195c..14984fd6f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3702,6 +3702,24 @@ class InfoExtractor: (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()), (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0]) + @classproperty(cache=True) + def _RETURN_TYPE(cls): + """What the extractor returns: "video", "playlist", "any", or None (Unknown)""" + tests = tuple(cls.get_testcases(include_onlymatching=False)) + if not tests: + return None + elif not any(k.startswith('playlist') for test in tests for k in test): + return 'video' + elif all(any(k.startswith('playlist') for k in test) for test in tests): + return 'playlist' + return 'any' + + @classmethod + def is_single_video(cls, url): + """Returns whether the URL is of a single video, None if unknown""" + assert cls.suitable(url), 'The URL must be suitable for the extractor' + return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE) + @classmethod def is_suitable(cls, age_limit): """Test whether the extractor is generally suitable for the given age limit""" @@ -3953,6 +3971,7 @@ class SearchInfoExtractor(InfoExtractor): """ _MAX_RESULTS = float('inf') + _RETURN_TYPE = 'playlist' @classproperty def _VALID_URL(cls): diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index c753713c7..032972dcf 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1050,6 +1050,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})" \s[^>]*\bclass="[^"]*\blazy-load-youtube''', ] + _RETURN_TYPE = 'video' # While there are "multifeed" test cases, they don't seem to actually exist anymore _PLAYER_INFO_RE = ( r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', From d7b460d0e5fc710950582baed2e3fc616ed98a80 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 13 Nov 2022 08:24:00 +0530 Subject: [PATCH 1767/2552] Make early reject of `--match-filter` stricter Closes #5509 --- yt_dlp/YoutubeDL.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 32bd5b3dc..525d3ab6e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1358,10 +1358,18 @@ class YoutubeDL: def _match_entry(self, info_dict, incomplete=False, silent=False): """ Returns None if the file should be downloaded """ + _type = info_dict.get('_type', 'video') + assert incomplete or _type == 'video', 'Only video result can be considered complete' video_title = info_dict.get('title', info_dict.get('id', 'entry')) def check_filter(): + if _type in ('playlist', 'multi_video'): + return + elif _type in ('url', 'url_transparent') and not try_call( + lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])): + return + if 'title' in info_dict: # This can happen when we're just evaluating the playlist title = info_dict['title'] From a4894d3e25943c4ecf4f38c0d50ce592d2175f29 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 15 Nov 2022 05:23:32 +0530 Subject: [PATCH 1768/2552] [extractor/youtube] Consider language in format de-duplication --- yt_dlp/extractor/youtube.py | 53 +++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 032972dcf..9d51f38ba 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1,5 +1,6 @@ import base64 import calendar +import collections import copy import datetime import enum @@ -2480,6 +2481,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'note': '6 channel audio', 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo', 'only_matching': True, + }, { + 'note': 'Multiple HLS formats with same itag', + 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko', + 'info_dict': { + 'id': 'kX3nB4PpJko', + 'ext': 'mp4', + 'categories': ['Entertainment'], + 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6', + 'uploader_url': 'http://www.youtube.com/user/MrBeast6000', + 'live_status': 'not_live', + 'duration': 937, + 'channel_follower_count': int, + 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp', + 'title': 'Last To Take Hand Off Jet, Keeps It!', + 'channel': 'MrBeast', + 'playable_in_embed': True, + 'view_count': int, + 'upload_date': '20221112', + 'uploader': 'MrBeast', + 'uploader_id': 'MrBeast6000', + 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA', + 'age_limit': 0, + 'availability': 'public', + 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA', + 'like_count': int, + 'tags': [], + }, + 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'}, } ] @@ -3472,7 +3501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return live_status def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): - itags, stream_ids = {}, [] + itags, stream_ids = collections.defaultdict(set), [] itag_qualities, res_qualities = {}, {0: None} q = qualities([ # Normally tiny is the smallest video-only formats. But @@ -3554,10 +3583,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_id=video_id, only_once=True) throttled = True - if itag: - itags[itag] = 'https' - stream_ids.append(stream_id) - tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) language_preference = ( 10 if audio_track.get('audioIsDefault') and 10 @@ -3616,6 +3641,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } if dct.get('ext'): dct['container'] = dct['ext'] + '_dash' + + if itag: + itags[itag].add(('https', dct.get('language'))) + stream_ids.append(stream_id) yield dct needs_live_processing = self._needs_live_processing(live_status, duration) @@ -3636,13 +3665,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): skip_manifests.add('dash') def process_manifest_format(f, proto, itag): - if itag in itags: - if itags[itag] == proto or f'{itag}-{proto}' in itags: - return False - itag = f'{itag}-{proto}' - if itag: + key = (proto, f.get('language')) + if key in itags[itag]: + return False + itags[itag].add(key) + + if any(p != proto for p, _ in itags[itag]): + f['format_id'] = f'{itag}-{proto}' + elif itag: f['format_id'] = itag - itags[itag] = proto f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)) if f['quality'] == -1 and f.get('height'): From 6368e2e639bca7e66609911d2672b6a9dc65b052 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 16 Nov 2022 06:27:43 +0530 Subject: [PATCH 1769/2552] [cleanup] Misc Closes #5541 --- devscripts/lazy_load_template.py | 2 +- devscripts/make_lazy_extractors.py | 11 ++- test/parameters.json | 3 +- yt_dlp/YoutubeDL.py | 5 +- yt_dlp/extractor/adobepass.py | 2 +- yt_dlp/extractor/aenetworks.py | 3 +- yt_dlp/extractor/afreecatv.py | 2 +- yt_dlp/extractor/alura.py | 2 +- yt_dlp/extractor/amcnetworks.py | 2 +- yt_dlp/extractor/amp.py | 2 +- yt_dlp/extractor/aol.py | 2 +- yt_dlp/extractor/audius.py | 4 +- yt_dlp/extractor/aws.py | 2 +- yt_dlp/extractor/bandaichannel.py | 2 +- yt_dlp/extractor/bandcamp.py | 4 +- yt_dlp/extractor/bbc.py | 2 +- yt_dlp/extractor/bfmtv.py | 2 +- yt_dlp/extractor/bilibili.py | 2 +- yt_dlp/extractor/cbs.py | 2 +- yt_dlp/extractor/cbsinteractive.py | 2 +- yt_dlp/extractor/cbslocal.py | 4 +- yt_dlp/extractor/cbsnews.py | 4 +- yt_dlp/extractor/cmt.py | 2 +- yt_dlp/extractor/common.py | 9 +- yt_dlp/extractor/corus.py | 2 +- yt_dlp/extractor/daum.py | 2 +- yt_dlp/extractor/dreisat.py | 2 +- yt_dlp/extractor/extremetube.py | 2 +- yt_dlp/extractor/fancode.py | 2 +- yt_dlp/extractor/hitbox.py | 2 +- yt_dlp/extractor/imgur.py | 2 +- yt_dlp/extractor/jamendo.py | 2 +- yt_dlp/extractor/la7.py | 2 +- yt_dlp/extractor/laola1tv.py | 2 +- yt_dlp/extractor/lcp.py | 2 +- yt_dlp/extractor/mediaset.py | 2 +- yt_dlp/extractor/mitele.py | 2 +- yt_dlp/extractor/mofosex.py | 2 +- yt_dlp/extractor/mtv.py | 2 +- yt_dlp/extractor/murrtube.py | 2 +- yt_dlp/extractor/musicdex.py | 2 +- yt_dlp/extractor/nationalgeographic.py | 2 +- yt_dlp/extractor/nbc.py | 4 +- yt_dlp/extractor/ndr.py | 6 +- yt_dlp/extractor/nextmedia.py | 4 +- yt_dlp/extractor/nick.py | 2 +- yt_dlp/extractor/npo.py | 4 +- yt_dlp/extractor/nrk.py | 2 +- yt_dlp/extractor/once.py | 2 +- yt_dlp/extractor/peekvids.py | 2 +- yt_dlp/extractor/radlive.py | 4 +- yt_dlp/extractor/rai.py | 6 +- yt_dlp/extractor/redbulltv.py | 2 +- yt_dlp/extractor/rts.py | 2 +- yt_dlp/extractor/rtve.py | 6 +- yt_dlp/extractor/rutube.py | 1 - yt_dlp/extractor/sevenplus.py | 2 +- yt_dlp/extractor/skyit.py | 12 +-- yt_dlp/extractor/southpark.py | 10 +- yt_dlp/extractor/tele5.py | 2 +- yt_dlp/extractor/theweatherchannel.py | 2 +- yt_dlp/extractor/tiktok.py | 4 +- yt_dlp/extractor/toutv.py | 2 +- yt_dlp/extractor/tube8.py | 2 +- yt_dlp/extractor/tvnow.py | 2 +- yt_dlp/extractor/udemy.py | 2 +- yt_dlp/extractor/uplynk.py | 3 +- yt_dlp/extractor/usanetwork.py | 2 +- yt_dlp/extractor/veoh.py | 2 +- yt_dlp/extractor/vgtv.py | 2 +- yt_dlp/extractor/vimeo.py | 10 +- yt_dlp/extractor/vvvvid.py | 2 +- yt_dlp/extractor/wdr.py | 2 +- yt_dlp/extractor/youtube.py | 125 ++++++++++++++++--------- yt_dlp/utils.py | 2 +- 75 files changed, 194 insertions(+), 156 deletions(-) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 626b85d62..c8815e01b 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -10,7 +10,7 @@ from ..utils import ( ) # These bloat the lazy_extractors, so allow them to passthrough silently -ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'} +ALLOWED_CLASSMETHODS = {'extract_from_webpage', 'get_testcases', 'get_webpage_testcases'} _WARNED = False diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 2d4530eb9..c502bdf89 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -14,10 +14,17 @@ from devscripts.utils import get_filename_args, read_file, write_file NO_ATTR = object() STATIC_CLASS_PROPERTIES = [ - 'IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_ENABLED', '_NETRC_MACHINE', 'age_limit' + 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching + '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions + 'age_limit', # Used for --age-limit (evaluated) + '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated) ] CLASS_METHODS = [ - 'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable' + 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching + 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation + 'description', # Used for --extractor-descriptions + 'is_suitable', # Used for --age-limit + 'supports_login', 'is_single_video', # Accessed in CLI only with instance ] IE_TEMPLATE = ''' class {name}({bases}): diff --git a/test/parameters.json b/test/parameters.json index bc4561374..8789ce14b 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -44,5 +44,6 @@ "writesubtitles": false, "allsubtitles": false, "listsubtitles": false, - "fixup": "never" + "fixup": "never", + "allow_playlist_files": false } diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 525d3ab6e..20940085e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1357,7 +1357,7 @@ class YoutubeDL: return self.get_output_path(dir_type, filename) def _match_entry(self, info_dict, incomplete=False, silent=False): - """ Returns None if the file should be downloaded """ + """Returns None if the file should be downloaded""" _type = info_dict.get('_type', 'video') assert incomplete or _type == 'video', 'Only video result can be considered complete' @@ -1381,6 +1381,7 @@ class YoutubeDL: if rejecttitle: if re.search(rejecttitle, title, re.IGNORECASE): return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' + date = info_dict.get('upload_date') if date is not None: dateRange = self.params.get('daterange', DateRange()) @@ -2953,8 +2954,6 @@ class YoutubeDL: if 'format' not in info_dict and 'ext' in info_dict: info_dict['format'] = info_dict['ext'] - # This is mostly just for backward compatibility of process_info - # As a side-effect, this allows for format-specific filters if self._match_entry(info_dict) is not None: info_dict['__write_download_archive'] = 'ignore' return diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index ec1be008a..e5944f714 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1352,7 +1352,7 @@ MSO_INFO = { } -class AdobePassIE(InfoExtractor): +class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' _MVPD_CACHE = 'ap-mvpd' diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index 516cb6302..094c57bf9 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -8,7 +8,7 @@ from ..utils import ( ) -class AENetworksBaseIE(ThePlatformIE): +class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE _BASE_URL_REGEX = r'''(?x)https?:// (?:(?:www|play|watch)\.)? (?P<domain> @@ -304,7 +304,6 @@ class HistoryTopicIE(AENetworksBaseIE): class HistoryPlayerIE(AENetworksBaseIE): IE_NAME = 'history:player' _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)' - _TESTS = [] def _real_extract(self, url): domain, video_id = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index b0fd158f6..bfcc08030 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -380,7 +380,7 @@ class AfreecaTVIE(InfoExtractor): return info -class AfreecaTVLiveIE(AfreecaTVIE): +class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE IE_NAME = 'afreecatv:live' _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?' diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index b76ccb2a1..ae7115f9f 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -113,7 +113,7 @@ class AluraIE(InfoExtractor): raise ExtractorError('Unable to log in') -class AluraCourseIE(AluraIE): +class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<id>[^/]+)' _LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm' diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index e04ecf65f..9369a66f7 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -9,7 +9,7 @@ from ..utils import ( ) -class AMCNetworksIE(ThePlatformIE): +class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631', diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 73b72b085..6015baad5 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -10,7 +10,7 @@ from ..utils import ( ) -class AMPIE(InfoExtractor): +class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor # parse Akamai Adaptive Media Player feed def _extract_feed_info(self, url): feed = self._download_json( diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index b67db2adc..5200f9d9d 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -9,7 +9,7 @@ from ..utils import ( ) -class AolIE(YahooIE): +class AolIE(YahooIE): # XXX: Do not subclass from concrete IE IE_NAME = 'aol.com' _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})' diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index 0105d9db8..6448b449b 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -168,7 +168,7 @@ class AudiusIE(AudiusBaseIE): } -class AudiusTrackIE(AudiusIE): +class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)''' IE_NAME = 'audius:track' IE_DESC = 'Audius track ID or API link. Prepend with "audius:"' @@ -243,7 +243,7 @@ class AudiusPlaylistIE(AudiusBaseIE): playlist_data.get('description')) -class AudiusProfileIE(AudiusPlaylistIE): +class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete IE IE_NAME = 'audius:artist' IE_DESC = 'Audius.co profile/artist pages' _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)' diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index c2b22922b..eb831a153 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -6,7 +6,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlencode -class AWSIE(InfoExtractor): +class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _AWS_ALGORITHM = 'AWS4-HMAC-SHA256' _AWS_REGION = 'us-east-1' diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py index 2e3233376..e438d16ea 100644 --- a/yt_dlp/extractor/bandaichannel.py +++ b/yt_dlp/extractor/bandaichannel.py @@ -2,7 +2,7 @@ from .brightcove import BrightcoveNewIE from ..utils import extract_attributes -class BandaiChannelIE(BrightcoveNewIE): +class BandaiChannelIE(BrightcoveNewIE): # XXX: Do not subclass from concrete IE IE_NAME = 'bandaichannel' _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)' _TESTS = [{ diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index a864ff9ac..7dcace2c6 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -211,7 +211,7 @@ class BandcampIE(InfoExtractor): } -class BandcampAlbumIE(BandcampIE): +class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE IE_NAME = 'Bandcamp:album' _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)' @@ -314,7 +314,7 @@ class BandcampAlbumIE(BandcampIE): } -class BandcampWeeklyIE(BandcampIE): +class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE IE_NAME = 'Bandcamp:weekly' _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' _TESTS = [{ diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index fe122af85..35a7a165c 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -588,7 +588,7 @@ class BBCCoUkIE(InfoExtractor): } -class BBCIE(BBCCoUkIE): +class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE IE_NAME = 'bbc' IE_DESC = 'BBC' _VALID_URL = r'''(?x) diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index 48526e38b..d86d283fa 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -42,7 +42,7 @@ class BFMTVIE(BFMTVBaseIE): return self._brightcove_url_result(video_block['videoid'], video_block) -class BFMTVLiveIE(BFMTVIE): +class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE IE_NAME = 'bfmtv:live' _VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)' _TESTS = [{ diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index de28aa4b7..8a0e10da8 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -65,7 +65,7 @@ class BilibiliBaseIE(InfoExtractor): missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality'))) if missing_formats: self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; ' - 'you have to login or become premium member to download them') + f'you have to login or become premium member to download them. {self._login_hint()}') self._sort_formats(formats) return formats diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index e32539c9e..9515806ed 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -10,7 +10,7 @@ from ..utils import ( ) -class CBSBaseIE(ThePlatformFeedIE): +class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): subtitles = {} for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]: diff --git a/yt_dlp/extractor/cbsinteractive.py b/yt_dlp/extractor/cbsinteractive.py index 7abeecf78..b09e9823e 100644 --- a/yt_dlp/extractor/cbsinteractive.py +++ b/yt_dlp/extractor/cbsinteractive.py @@ -2,7 +2,7 @@ from .cbs import CBSIE from ..utils import int_or_none -class CBSInteractiveIE(CBSIE): +class CBSInteractiveIE(CBSIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)' _TESTS = [{ 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', diff --git a/yt_dlp/extractor/cbslocal.py b/yt_dlp/extractor/cbslocal.py index c6495c95f..3d50b0499 100644 --- a/yt_dlp/extractor/cbslocal.py +++ b/yt_dlp/extractor/cbslocal.py @@ -7,7 +7,7 @@ from ..utils import ( ) -class CBSLocalIE(AnvatoIE): +class CBSLocalIE(AnvatoIE): # XXX: Do not subclass from concrete IE _VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/' _VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)' @@ -47,7 +47,7 @@ class CBSLocalIE(AnvatoIE): 'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id) -class CBSLocalArticleIE(AnvatoIE): +class CBSLocalArticleIE(AnvatoIE): # XXX: Do not subclass from concrete IE _VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)' _TESTS = [{ diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py index 76925b4f9..98ec28df0 100644 --- a/yt_dlp/extractor/cbsnews.py +++ b/yt_dlp/extractor/cbsnews.py @@ -12,7 +12,7 @@ from ..utils import ( ) -class CBSNewsEmbedIE(CBSIE): +class CBSNewsEmbedIE(CBSIE): # XXX: Do not subclass from concrete IE IE_NAME = 'cbsnews:embed' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)' _TESTS = [{ @@ -27,7 +27,7 @@ class CBSNewsEmbedIE(CBSIE): return self._extract_video_info(item['mpxRefId'], 'cbsnews') -class CBSNewsIE(CBSIE): +class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE IE_NAME = 'cbsnews' IE_DESC = 'CBS News' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)' diff --git a/yt_dlp/extractor/cmt.py b/yt_dlp/extractor/cmt.py index 4eec066dd..8aed7708b 100644 --- a/yt_dlp/extractor/cmt.py +++ b/yt_dlp/extractor/cmt.py @@ -3,7 +3,7 @@ from .mtv import MTVIE # TODO Remove - Reason: Outdated Site -class CMTIE(MTVIE): +class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE IE_NAME = 'cmt.com' _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 14984fd6f..3a1af3290 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3676,12 +3676,13 @@ class InfoExtractor: @classmethod def get_testcases(cls, include_onlymatching=False): - t = getattr(cls, '_TEST', None) + # Do not look in super classes + t = vars(cls).get('_TEST') if t: assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS' tests = [t] else: - tests = getattr(cls, '_TESTS', []) + tests = vars(cls).get('_TESTS', []) for t in tests: if not include_onlymatching and t.get('only_matching', False): continue @@ -3690,12 +3691,12 @@ class InfoExtractor: @classmethod def get_webpage_testcases(cls): - tests = getattr(cls, '_WEBPAGE_TESTS', []) + tests = vars(cls).get('_WEBPAGE_TESTS', []) for t in tests: t['name'] = cls.ie_key() return tests - @classproperty + @classproperty(cache=True) def age_limit(cls): """Get age limit from the testcases""" return max(traverse_obj( diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index 7b83c0390..8c920e3ab 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -7,7 +7,7 @@ from ..utils import ( ) -class CorusIE(ThePlatformFeedIE): +class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'''(?x) https?:// (?:www\.)? diff --git a/yt_dlp/extractor/daum.py b/yt_dlp/extractor/daum.py index a1f197b0b..3ef514065 100644 --- a/yt_dlp/extractor/daum.py +++ b/yt_dlp/extractor/daum.py @@ -125,7 +125,7 @@ class DaumClipIE(DaumBaseIE): self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id) -class DaumListIE(InfoExtractor): +class DaumListIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor def _get_entries(self, list_id, list_id_type): name = None entries = [] diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py index 80a724607..8a59c23ab 100644 --- a/yt_dlp/extractor/dreisat.py +++ b/yt_dlp/extractor/dreisat.py @@ -1,7 +1,7 @@ from .zdf import ZDFIE -class DreiSatIE(ZDFIE): +class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE IE_NAME = '3sat' _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html' _TESTS = [{ diff --git a/yt_dlp/extractor/extremetube.py b/yt_dlp/extractor/extremetube.py index 99520b6a0..2c1969899 100644 --- a/yt_dlp/extractor/extremetube.py +++ b/yt_dlp/extractor/extremetube.py @@ -2,7 +2,7 @@ from ..utils import str_to_int from .keezmovies import KeezMoviesIE -class ExtremeTubeIE(KeezMoviesIE): +class ExtremeTubeIE(KeezMoviesIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index 9716e581a..1b5db818a 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -125,7 +125,7 @@ class FancodeVodIE(InfoExtractor): } -class FancodeLiveIE(FancodeVodIE): +class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE IE_NAME = 'fancode:live' _VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+' diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py index 6ecdd390c..fdcf6770d 100644 --- a/yt_dlp/extractor/hitbox.py +++ b/yt_dlp/extractor/hitbox.py @@ -127,7 +127,7 @@ class HitboxIE(InfoExtractor): return metadata -class HitboxLiveIE(HitboxIE): +class HitboxLiveIE(HitboxIE): # XXX: Do not subclass from concrete IE IE_NAME = 'hitbox:live' _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)' _TESTS = [{ diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index a3bb47615..21c56d879 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -138,7 +138,7 @@ class ImgurGalleryIE(InfoExtractor): return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id) -class ImgurAlbumIE(ImgurGalleryIE): +class ImgurAlbumIE(ImgurGalleryIE): # XXX: Do not subclass from concrete IE IE_NAME = 'imgur:album' _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)' diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index d960ee51c..578e57a67 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -134,7 +134,7 @@ class JamendoIE(InfoExtractor): } -class JamendoAlbumIE(JamendoIE): +class JamendoAlbumIE(JamendoIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py index 5d52decdb..8ce44cc13 100644 --- a/yt_dlp/extractor/la7.py +++ b/yt_dlp/extractor/la7.py @@ -194,7 +194,7 @@ class LA7PodcastEpisodeIE(InfoExtractor): return self._extract_info(webpage, video_id) -class LA7PodcastIE(LA7PodcastEpisodeIE): +class LA7PodcastIE(LA7PodcastEpisodeIE): # XXX: Do not subclass from concrete IE IE_NAME = 'la7.it:podcast' _VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])' diff --git a/yt_dlp/extractor/laola1tv.py b/yt_dlp/extractor/laola1tv.py index 4014a9256..a90ed16a0 100644 --- a/yt_dlp/extractor/laola1tv.py +++ b/yt_dlp/extractor/laola1tv.py @@ -118,7 +118,7 @@ class Laola1TvEmbedIE(InfoExtractor): } -class Laola1TvBaseIE(Laola1TvEmbedIE): +class Laola1TvBaseIE(Laola1TvEmbedIE): # XXX: Do not subclass from concrete IE def _extract_video(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py index 87543d56f..9846319e0 100644 --- a/yt_dlp/extractor/lcp.py +++ b/yt_dlp/extractor/lcp.py @@ -2,7 +2,7 @@ from .common import InfoExtractor from .arkena import ArkenaIE -class LcpPlayIE(ArkenaIE): +class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+' _TESTS = [{ 'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0', diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index ebe894f74..a3b5491d2 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -286,7 +286,7 @@ class MediasetIE(ThePlatformBaseIE): return info -class MediasetShowIE(MediasetIE): +class MediasetShowIE(MediasetIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'''(?x) (?: https?:// diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 12b2b2432..ea2998672 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -5,7 +5,7 @@ from ..utils import ( ) -class MiTeleIE(TelecincoIE): +class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE IE_DESC = 'mitele.es' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player' diff --git a/yt_dlp/extractor/mofosex.py b/yt_dlp/extractor/mofosex.py index 4221ef3e3..9cb6980c1 100644 --- a/yt_dlp/extractor/mofosex.py +++ b/yt_dlp/extractor/mofosex.py @@ -7,7 +7,7 @@ from ..utils import ( from .keezmovies import KeezMoviesIE -class MofosexIE(KeezMoviesIE): +class MofosexIE(KeezMoviesIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html' _TESTS = [{ 'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html', diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index 10cd304eb..b2009dc5b 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -536,7 +536,7 @@ class MTVItaliaIE(MTVServicesInfoExtractor): } -class MTVItaliaProgrammaIE(MTVItaliaIE): +class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete IE IE_NAME = 'mtv.it:programma' _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)' _TESTS = [{ diff --git a/yt_dlp/extractor/murrtube.py b/yt_dlp/extractor/murrtube.py index 508d51247..6cdbbda16 100644 --- a/yt_dlp/extractor/murrtube.py +++ b/yt_dlp/extractor/murrtube.py @@ -99,7 +99,7 @@ query Medium($id: ID!) { } -class MurrtubeUserIE(MurrtubeIE): +class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE IE_DESC = 'Murrtube user profile' _VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$' _TEST = { diff --git a/yt_dlp/extractor/musicdex.py b/yt_dlp/extractor/musicdex.py index 4d8e74f6b..48f29702c 100644 --- a/yt_dlp/extractor/musicdex.py +++ b/yt_dlp/extractor/musicdex.py @@ -97,7 +97,7 @@ class MusicdexAlbumIE(MusicdexBaseIE): } -class MusicdexPageIE(MusicdexBaseIE): +class MusicdexPageIE(MusicdexBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor def _entries(self, id): next_page_url = self._API_URL % id while next_page_url: diff --git a/yt_dlp/extractor/nationalgeographic.py b/yt_dlp/extractor/nationalgeographic.py index f22317d56..ad525c258 100644 --- a/yt_dlp/extractor/nationalgeographic.py +++ b/yt_dlp/extractor/nationalgeographic.py @@ -59,7 +59,7 @@ class NationalGeographicVideoIE(InfoExtractor): } -class NationalGeographicTVIE(FOXIE): +class NationalGeographicTVIE(FOXIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)' _TESTS = [{ 'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/', diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 3de8c1508..dbc82de9f 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -24,7 +24,7 @@ from ..utils import ( ) -class NBCIE(ThePlatformIE): +class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))' _TESTS = [ @@ -315,7 +315,7 @@ class NBCSportsStreamIE(AdobePassIE): } -class NBCNewsIE(ThePlatformIE): +class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)' _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1'] diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index ad8dbd7a7..90a658cd8 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -218,7 +218,7 @@ class NJoyIE(NDRBaseIE): } -class NDREmbedBaseIE(InfoExtractor): +class NDREmbedBaseIE(InfoExtractor): # XXX: Conventionally, Concrete class names do not end in BaseIE IE_NAME = 'ndr:embed:base' _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)' _TESTS = [{ @@ -315,7 +315,7 @@ class NDREmbedBaseIE(InfoExtractor): } -class NDREmbedIE(NDREmbedBaseIE): +class NDREmbedIE(NDREmbedBaseIE): # XXX: Do not subclass from concrete IE IE_NAME = 'ndr:embed' _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html' _TESTS = [{ @@ -413,7 +413,7 @@ class NDREmbedIE(NDREmbedBaseIE): }] -class NJoyEmbedIE(NDREmbedBaseIE): +class NJoyEmbedIE(NDREmbedBaseIE): # XXX: Do not subclass from concrete IE IE_NAME = 'njoy:embed' _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' _TESTS = [{ diff --git a/yt_dlp/extractor/nextmedia.py b/yt_dlp/extractor/nextmedia.py index 1f83089fc..0e47a4d45 100644 --- a/yt_dlp/extractor/nextmedia.py +++ b/yt_dlp/extractor/nextmedia.py @@ -77,7 +77,7 @@ class NextMediaIE(InfoExtractor): return self._og_search_property('description', page) -class NextMediaActionNewsIE(NextMediaIE): +class NextMediaActionNewsIE(NextMediaIE): # XXX: Do not subclass from concrete IE IE_DESC = '蘋果日報 - 動新聞' _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' _TESTS = [{ @@ -102,7 +102,7 @@ class NextMediaActionNewsIE(NextMediaIE): return self._extract_from_nextmedia_page(news_id, url, article_page) -class AppleDailyIE(NextMediaIE): +class AppleDailyIE(NextMediaIE): # XXX: Do not subclass from concrete IE IE_DESC = '臺灣蘋果日報' _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' _TESTS = [{ diff --git a/yt_dlp/extractor/nick.py b/yt_dlp/extractor/nick.py index 2a228d8de..de22cb8d6 100644 --- a/yt_dlp/extractor/nick.py +++ b/yt_dlp/extractor/nick.py @@ -188,7 +188,7 @@ class NickDeIE(MTVServicesInfoExtractor): return self._remove_template_parameter(config['feedWithQueryParams']) -class NickNightIE(NickDeIE): +class NickNightIE(NickDeIE): # XXX: Do not subclass from concrete IE IE_NAME = 'nicknight' _VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _TESTS = [{ diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py index 0b5f32c2e..b307e6a78 100644 --- a/yt_dlp/extractor/npo.py +++ b/yt_dlp/extractor/npo.py @@ -599,7 +599,7 @@ class NPORadioFragmentIE(InfoExtractor): } -class NPODataMidEmbedIE(InfoExtractor): +class NPODataMidEmbedIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) @@ -653,7 +653,7 @@ class HetKlokhuisIE(NPODataMidEmbedIE): } -class NPOPlaylistBaseIE(NPOIE): +class NPOPlaylistBaseIE(NPOIE): # XXX: Do not subclass from concrete IE def _real_extract(self, url): playlist_id = self._match_id(url) diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 7eb5b21cb..14951f8e1 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -735,7 +735,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): entries, series_id, titles.get('title'), titles.get('subtitle')) -class NRKTVDirekteIE(NRKTVIE): +class NRKTVDirekteIE(NRKTVIE): # XXX: Do not subclass from concrete IE IE_DESC = 'NRK TV Direkte and NRK Radio Direkte' _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)' diff --git a/yt_dlp/extractor/once.py b/yt_dlp/extractor/once.py index 460b82d02..989f10abb 100644 --- a/yt_dlp/extractor/once.py +++ b/yt_dlp/extractor/once.py @@ -3,7 +3,7 @@ import re from .common import InfoExtractor -class OnceIE(InfoExtractor): +class OnceIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)' ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index f1c4469d6..fd25b5adb 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -51,7 +51,7 @@ class PeekVidsIE(InfoExtractor): return info -class PlayVidsIE(PeekVidsIE): +class PlayVidsIE(PeekVidsIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|[^/]{2}/)?(?P<id>[^/?#]*)' _TESTS = [{ 'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py index d89c9563b..ed38a07f0 100644 --- a/yt_dlp/extractor/radlive.py +++ b/yt_dlp/extractor/radlive.py @@ -94,7 +94,7 @@ class RadLiveIE(InfoExtractor): return result -class RadLiveSeasonIE(RadLiveIE): +class RadLiveSeasonIE(RadLiveIE): # XXX: Do not subclass from concrete IE IE_NAME = 'radlive:season' _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)' _TESTS = [{ @@ -134,7 +134,7 @@ class RadLiveSeasonIE(RadLiveIE): return self.playlist_result(entries, season_id, video_info.get('title')) -class RadLiveChannelIE(RadLiveIE): +class RadLiveChannelIE(RadLiveIE): # XXX: Do not subclass from concrete IE IE_NAME = 'radlive:channel' _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)' _TESTS = [{ diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 6ed8227eb..cd19ec07b 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -356,7 +356,7 @@ class RaiPlayIE(RaiBaseIE): } -class RaiPlayLiveIE(RaiPlayIE): +class RaiPlayLiveIE(RaiPlayIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'http://www.raiplay.it/dirette/rainews24', @@ -504,7 +504,7 @@ class RaiPlaySoundIE(RaiBaseIE): } -class RaiPlaySoundLiveIE(RaiPlaySoundIE): +class RaiPlaySoundLiveIE(RaiPlaySoundIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)' _TESTS = [{ 'url': 'https://www.raiplaysound.it/radio2', @@ -717,7 +717,7 @@ class RaiIE(RaiBaseIE): } -class RaiNewsIE(RaiIE): +class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE _VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html' _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)'] _TESTS = [{ diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index 2f0e41c5b..50e61ba6e 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -110,7 +110,7 @@ class RedBullTVIE(InfoExtractor): return self.extract_info(video_id) -class RedBullEmbedIE(RedBullTVIE): +class RedBullEmbedIE(RedBullTVIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})' _TESTS = [{ # HLS manifest accessible only using assetId diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py index e5ba1a26b..6644538ed 100644 --- a/yt_dlp/extractor/rts.py +++ b/yt_dlp/extractor/rts.py @@ -12,7 +12,7 @@ from ..utils import ( ) -class RTSIE(SRGSSRIE): +class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE IE_DESC = 'RTS.ch' _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html' diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 798dde7fa..b9b181feb 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -170,7 +170,7 @@ class RTVEALaCartaIE(InfoExtractor): for s in subs) -class RTVEAudioIE(RTVEALaCartaIE): +class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE IE_NAME = 'rtve.es:audio' IE_DESC = 'RTVE audio' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)' @@ -257,7 +257,7 @@ class RTVEAudioIE(RTVEALaCartaIE): } -class RTVEInfantilIE(RTVEALaCartaIE): +class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE IE_NAME = 'rtve.es:infantil' IE_DESC = 'RTVE infantil' _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/' @@ -276,7 +276,7 @@ class RTVEInfantilIE(RTVEALaCartaIE): }] -class RTVELiveIE(RTVEALaCartaIE): +class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 34af0d594..cad3caa60 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -240,7 +240,6 @@ class RutubeMovieIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:movie' IE_DESC = 'Rutube movies' _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' - _TESTS = [] _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py index 8e95bc230..36d1a86fd 100644 --- a/yt_dlp/extractor/sevenplus.py +++ b/yt_dlp/extractor/sevenplus.py @@ -13,7 +13,7 @@ from ..utils import ( ) -class SevenPlusIE(BrightcoveNewIE): +class SevenPlusIE(BrightcoveNewIE): # XXX: Do not subclass from concrete IE IE_NAME = '7plus' _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))' _TESTS = [{ diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index 2daaaf75c..9e4d7d35d 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -70,7 +70,7 @@ class SkyItPlayerIE(InfoExtractor): return self._parse_video(video, video_id) -class SkyItVideoIE(SkyItPlayerIE): +class SkyItVideoIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE IE_NAME = 'video.sky.it' _VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)' _TESTS = [{ @@ -99,7 +99,7 @@ class SkyItVideoIE(SkyItPlayerIE): return self._player_url_result(video_id) -class SkyItVideoLiveIE(SkyItPlayerIE): +class SkyItVideoLiveIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE IE_NAME = 'video.sky.it:live' _VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)' _TEST = { @@ -127,7 +127,7 @@ class SkyItVideoLiveIE(SkyItPlayerIE): return self._parse_video(livestream, asset_id) -class SkyItIE(SkyItPlayerIE): +class SkyItIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE IE_NAME = 'sky.it' _VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)' _TESTS = [{ @@ -166,7 +166,7 @@ class SkyItIE(SkyItPlayerIE): return self._player_url_result(video_id) -class SkyItArteIE(SkyItIE): +class SkyItArteIE(SkyItIE): # XXX: Do not subclass from concrete IE IE_NAME = 'arte.sky.it' _VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)' _TESTS = [{ @@ -187,7 +187,7 @@ class SkyItArteIE(SkyItIE): _VIDEO_ID_REGEX = r'"embedUrl"\s*:\s*"(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)' -class CieloTVItIE(SkyItIE): +class CieloTVItIE(SkyItIE): # XXX: Do not subclass from concrete IE IE_NAME = 'cielotv.it' _VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html' _TESTS = [{ @@ -208,7 +208,7 @@ class CieloTVItIE(SkyItIE): _VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"' -class TV8ItIE(SkyItVideoIE): +class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE IE_NAME = 'tv8.it' _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/[0-9a-z-]+-(?P<id>\d+)' _TESTS = [{ diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 7381ac362..e23f192a1 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -34,7 +34,7 @@ class SouthParkIE(MTVServicesInfoExtractor): } -class SouthParkEsIE(SouthParkIE): +class SouthParkEsIE(SouthParkIE): # XXX: Do not subclass from concrete IE IE_NAME = 'southpark.cc.com:español' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/es/episodios/(?P<id>.+?)(\?|#|$))' _LANG = 'es' @@ -50,7 +50,7 @@ class SouthParkEsIE(SouthParkIE): }] -class SouthParkDeIE(SouthParkIE): +class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE IE_NAME = 'southpark.de' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:(en/(videoclip|collections|episodes|video-clips))|(videoclip|collections|folgen))/(?P<id>(?P<unique_id>.+?)/.+?)(?:\?|#|$))' _TESTS = [{ @@ -109,7 +109,7 @@ class SouthParkDeIE(SouthParkIE): return -class SouthParkLatIE(SouthParkIE): +class SouthParkLatIE(SouthParkIE): # XXX: Do not subclass from concrete IE IE_NAME = 'southpark.lat' _VALID_URL = r'https?://(?:www\.)?southpark\.lat/(?:en/)?(?:video-?clips?|collections|episod(?:e|io)s)/(?P<id>[^/?#&]+)' _TESTS = [{ @@ -152,7 +152,7 @@ class SouthParkLatIE(SouthParkIE): return -class SouthParkNlIE(SouthParkIE): +class SouthParkNlIE(SouthParkIE): # XXX: Do not subclass from concrete IE IE_NAME = 'southpark.nl' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' @@ -167,7 +167,7 @@ class SouthParkNlIE(SouthParkIE): }] -class SouthParkDkIE(SouthParkIE): +class SouthParkDkIE(SouthParkIE): # XXX: Do not subclass from concrete IE IE_NAME = 'southparkstudios.dk' _VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index 58d343b44..9260db2b4 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -6,7 +6,7 @@ from ..utils import ( ) -class Tele5IE(DPlayIE): +class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)' _GEO_COUNTRIES = ['DE'] _TESTS = [{ diff --git a/yt_dlp/extractor/theweatherchannel.py b/yt_dlp/extractor/theweatherchannel.py index 9e94cd1ea..4f6d2ecba 100644 --- a/yt_dlp/extractor/theweatherchannel.py +++ b/yt_dlp/extractor/theweatherchannel.py @@ -8,7 +8,7 @@ from ..utils import ( ) -class TheWeatherChannelIE(ThePlatformIE): +class TheWeatherChannelIE(ThePlatformIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))' _TESTS = [{ 'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock', diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 4a35a241c..79a223861 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -655,7 +655,7 @@ class TikTokUserIE(TikTokBaseIE): return self.playlist_result(self._entries_api(user_id, videos), user_id, user_name, thumbnail=thumbnail) -class TikTokBaseListIE(TikTokBaseIE): +class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor def _entries(self, list_id, display_id): query = { self._QUERY_NAME: list_id, @@ -764,7 +764,7 @@ class TikTokTagIE(TikTokBaseListIE): return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id) -class DouyinIE(TikTokIE): +class DouyinIE(TikTokIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://www.douyin.com/video/6961737553342991651', diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py index 349c0bded..f60c199f0 100644 --- a/yt_dlp/extractor/toutv.py +++ b/yt_dlp/extractor/toutv.py @@ -9,7 +9,7 @@ from ..utils import ( ) -class TouTvIE(RadioCanadaIE): +class TouTvIE(RadioCanadaIE): # XXX: Do not subclass from concrete IE _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)' diff --git a/yt_dlp/extractor/tube8.py b/yt_dlp/extractor/tube8.py index b092ecad5..77ed05ffd 100644 --- a/yt_dlp/extractor/tube8.py +++ b/yt_dlp/extractor/tube8.py @@ -7,7 +7,7 @@ from ..utils import ( from .keezmovies import KeezMoviesIE -class Tube8IE(KeezMoviesIE): +class Tube8IE(KeezMoviesIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)' _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)'] _TESTS = [{ diff --git a/yt_dlp/extractor/tvnow.py b/yt_dlp/extractor/tvnow.py index 4aa558d83..24add5260 100644 --- a/yt_dlp/extractor/tvnow.py +++ b/yt_dlp/extractor/tvnow.py @@ -426,7 +426,7 @@ class TVNowIE(TVNowNewBaseIE): return self._extract_video(info, video_id, display_id) -class TVNowFilmIE(TVNowIE): +class TVNowFilmIE(TVNowIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'''(?x) (?P<base_url>https?:// (?:www\.)?tvnow\.(?:de|at|ch)/ diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 1dc2dbdc4..2c8a35473 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -405,7 +405,7 @@ class UdemyIE(InfoExtractor): } -class UdemyCourseIE(UdemyIE): +class UdemyCourseIE(UdemyIE): # XXX: Do not subclass from concrete IE IE_NAME = 'udemy:course' _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)' _TESTS = [{ diff --git a/yt_dlp/extractor/uplynk.py b/yt_dlp/extractor/uplynk.py index 04c96f388..9b560f719 100644 --- a/yt_dlp/extractor/uplynk.py +++ b/yt_dlp/extractor/uplynk.py @@ -52,10 +52,9 @@ class UplynkIE(InfoExtractor): return self._extract_uplynk_info(url) -class UplynkPreplayIE(UplynkIE): +class UplynkPreplayIE(UplynkIE): # XXX: Do not subclass from concrete IE IE_NAME = 'uplynk:preplay' _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json' - _TEST = None def _real_extract(self, url): path, external_id, video_id = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/usanetwork.py b/yt_dlp/extractor/usanetwork.py index d6b58a51c..4a06a9ad4 100644 --- a/yt_dlp/extractor/usanetwork.py +++ b/yt_dlp/extractor/usanetwork.py @@ -1,7 +1,7 @@ from .nbc import NBCIE -class USANetworkIE(NBCIE): +class USANetworkIE(NBCIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/(?:[^/]+/videos?|movies?)/(?:[^/]+/)?(?P<id>\d+))' _TESTS = [{ 'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302', diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py index a32c2fccb..d9b3ab115 100644 --- a/yt_dlp/extractor/veoh.py +++ b/yt_dlp/extractor/veoh.py @@ -130,7 +130,7 @@ class VeohIE(InfoExtractor): } -class VeohUserIE(VeohIE): +class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)' IE_NAME = 'veoh:user' diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py index 3e0af7fb2..b637afddf 100644 --- a/yt_dlp/extractor/vgtv.py +++ b/yt_dlp/extractor/vgtv.py @@ -9,7 +9,7 @@ from ..utils import ( ) -class VGTVIE(XstreamIE): +class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet' _GEO_BYPASS = False diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 2e36b8861..1b21c0050 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1004,7 +1004,7 @@ class VimeoIE(VimeoBaseInfoExtractor): return merge_dicts(info_dict, info_dict_config, json_ld) -class VimeoOndemandIE(VimeoIE): +class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE IE_NAME = 'vimeo:ondemand' _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)' _TESTS = [{ @@ -1129,7 +1129,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id) -class VimeoUserIE(VimeoChannelIE): +class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE IE_NAME = 'vimeo:user' _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos)?/?(?:$|[?#])' _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>' @@ -1239,7 +1239,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor): entries, album_id, album.get('name'), album.get('description')) -class VimeoGroupsIE(VimeoChannelIE): +class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE IE_NAME = 'vimeo:group' _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)' _TESTS = [{ @@ -1331,7 +1331,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): return info_dict -class VimeoWatchLaterIE(VimeoChannelIE): +class VimeoWatchLaterIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE IE_NAME = 'vimeo:watchlater' IE_DESC = 'Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)' _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater' @@ -1354,7 +1354,7 @@ class VimeoWatchLaterIE(VimeoChannelIE): return self._extract_videos('watchlater', 'https://vimeo.com/watchlater') -class VimeoLikesIE(VimeoChannelIE): +class VimeoLikesIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)' IE_NAME = 'vimeo:likes' IE_DESC = 'Vimeo user likes' diff --git a/yt_dlp/extractor/vvvvid.py b/yt_dlp/extractor/vvvvid.py index f0156d10c..0c3e83a0a 100644 --- a/yt_dlp/extractor/vvvvid.py +++ b/yt_dlp/extractor/vvvvid.py @@ -242,7 +242,7 @@ class VVVVIDIE(InfoExtractor): return info -class VVVVIDShowIE(VVVVIDIE): +class VVVVIDShowIE(VVVVIDIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE _TESTS = [{ 'url': 'https://www.vvvvid.it/show/156/psyco-pass', diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index d0ad69477..7b2e7c8e0 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -133,7 +133,7 @@ class WDRIE(InfoExtractor): } -class WDRPageIE(WDRIE): +class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE _MAUS_REGEX = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/)*?(?P<maus_id>[^/?#.]+)(?:/?|/index\.php5|\.php5)$' _PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html' _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _MAUS_REGEX diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 9d51f38ba..7e3530c0f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1051,7 +1051,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})" \s[^>]*\bclass="[^"]*\blazy-load-youtube''', ] - _RETURN_TYPE = 'video' # While there are "multifeed" test cases, they don't seem to actually exist anymore + _RETURN_TYPE = 'video' # XXX: How to handle multifeed? _PLAYER_INFO_RE = ( r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', @@ -1582,66 +1582,99 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip': 'This live event has ended.', }, { - # Multifeed videos (multiple cameras), URL is for Main Camera - 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg', + # Multifeed videos (multiple cameras), URL can be of any Camera + 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg', 'info_dict': { - 'id': 'jvGDaLqkpTg', - 'title': 'Tom Clancy Free Weekend Rainbow Whatever', - 'description': 'md5:e03b909557865076822aa169218d6a5d', + 'id': 'zaPI8MvL8pg', + 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04', + 'description': 'md5:563ccbc698b39298481ca3c571169519', }, 'playlist': [{ 'info_dict': { - 'id': 'jvGDaLqkpTg', + 'id': 'j5yGuxZ8lLU', 'ext': 'mp4', - 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)', - 'description': 'md5:e03b909557865076822aa169218d6a5d', - 'duration': 10643, - 'upload_date': '20161111', - 'uploader': 'Team PGP', - 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', + 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)', + 'uploader': 'WiiLikeToPlay', + 'description': 'md5:563ccbc698b39298481ca3c571169519', + 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray', + 'duration': 10120, + 'channel_follower_count': int, + 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg', + 'availability': 'public', + 'playable_in_embed': True, + 'upload_date': '20131105', + 'uploader_id': 'WiiRikeToPray', + 'categories': ['Gaming'], + 'live_status': 'was_live', + 'tags': 'count:24', + 'release_timestamp': 1383701910, + 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg', + 'comment_count': int, + 'age_limit': 0, + 'like_count': int, + 'channel_id': 'UCN2XePorRokPB9TEgRZpddg', + 'channel': 'WiiLikeToPlay', + 'view_count': int, + 'release_date': '20131106', }, }, { 'info_dict': { - 'id': '3AKt1R1aDnw', + 'id': 'zaPI8MvL8pg', 'ext': 'mp4', - 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)', - 'description': 'md5:e03b909557865076822aa169218d6a5d', - 'duration': 10991, - 'upload_date': '20161111', - 'uploader': 'Team PGP', - 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', + 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)', + 'uploader_id': 'WiiRikeToPray', + 'availability': 'public', + 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg', + 'channel': 'WiiLikeToPlay', + 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray', + 'channel_follower_count': int, + 'description': 'md5:563ccbc698b39298481ca3c571169519', + 'duration': 10108, + 'age_limit': 0, + 'like_count': int, + 'tags': 'count:24', + 'channel_id': 'UCN2XePorRokPB9TEgRZpddg', + 'uploader': 'WiiLikeToPlay', + 'release_timestamp': 1383701915, + 'comment_count': int, + 'upload_date': '20131105', + 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg', + 'release_date': '20131106', + 'playable_in_embed': True, + 'live_status': 'was_live', + 'categories': ['Gaming'], + 'view_count': int, }, }, { 'info_dict': { - 'id': 'RtAMM00gpVc', + 'id': 'R7r3vfO7Hao', 'ext': 'mp4', - 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)', - 'description': 'md5:e03b909557865076822aa169218d6a5d', - 'duration': 10995, - 'upload_date': '20161111', - 'uploader': 'Team PGP', - 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', - }, - }, { - 'info_dict': { - 'id': '6N2fdlP3C5U', - 'ext': 'mp4', - 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)', - 'description': 'md5:e03b909557865076822aa169218d6a5d', - 'duration': 10990, - 'upload_date': '20161111', - 'uploader': 'Team PGP', - 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg', + 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)', + 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg', + 'channel_id': 'UCN2XePorRokPB9TEgRZpddg', + 'like_count': int, + 'availability': 'public', + 'playable_in_embed': True, + 'upload_date': '20131105', + 'description': 'md5:563ccbc698b39298481ca3c571169519', + 'uploader_id': 'WiiRikeToPray', + 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray', + 'channel_follower_count': int, + 'tags': 'count:24', + 'release_date': '20131106', + 'uploader': 'WiiLikeToPlay', + 'comment_count': int, + 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg', + 'channel': 'WiiLikeToPlay', + 'categories': ['Gaming'], + 'release_timestamp': 1383701914, + 'live_status': 'was_live', + 'age_limit': 0, + 'duration': 10128, + 'view_count': int, }, }], - 'params': { - 'skip_download': True, - }, - 'skip': 'Not multifeed anymore', + 'params': {'skip_download': True}, }, { # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a6bf897dc..7cba13678 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5839,7 +5839,7 @@ def cached_method(f): bound_args.apply_defaults() key = tuple(bound_args.arguments.values())[1:] - cache = vars(self).setdefault('__cached_method__cache', {}).setdefault(f.__name__, {}) + cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {}) if key not in cache: cache[key] = f(self, *args, **kwargs) return cache[key] From 105bfd90f572cdc4f4a06bfcbadde0f1b231a098 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 16 Nov 2022 06:52:57 +0530 Subject: [PATCH 1770/2552] Add new field `aspect_ratio` Closes #5402 --- README.md | 1 + yt_dlp/YoutubeDL.py | 5 ++++- yt_dlp/extractor/common.py | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 13a2c17c7..367c6e036 100644 --- a/README.md +++ b/README.md @@ -1442,6 +1442,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `filesize_approx`: An estimate for the number of bytes - `width`: Width of the video, if known - `height`: Height of the video, if known + - `aspect_ratio`: Aspect ratio of the video, if known - `tbr`: Average bitrate of audio and video in KBit/s - `abr`: Average audio bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 20940085e..25c35dc53 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -547,7 +547,7 @@ class YoutubeDL: _format_fields = { # NB: Keep in sync with the docstring of extractor/common.py 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', - 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', + 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'preference', 'language', 'language_preference', 'quality', 'source_preference', @@ -2186,6 +2186,7 @@ class YoutubeDL: 'vcodec': the_only_video.get('vcodec'), 'vbr': the_only_video.get('vbr'), 'stretched_ratio': the_only_video.get('stretched_ratio'), + 'aspect_ratio': the_only_video.get('aspect_ratio'), }) if the_only_audio: @@ -2628,6 +2629,8 @@ class YoutubeDL: format['resolution'] = self.format_resolution(format, default=None) if format.get('dynamic_range') is None and format.get('vcodec') != 'none': format['dynamic_range'] = 'SDR' + if format.get('aspect_ratio') is None: + format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) if (info_dict.get('duration') and format.get('tbr') and not format.get('filesize') and not format.get('filesize_approx')): format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3a1af3290..94128bd84 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -150,7 +150,10 @@ class InfoExtractor: ("3D" or "DASH video") * width Width of the video, if known * height Height of the video, if known + * aspect_ratio Aspect ratio of the video, if known + Automatically calculated from width and height * resolution Textual description of width and height + Automatically calculated from width and height * dynamic_range The dynamic range of the video. One of: "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV" * tbr Average bitrate of audio and video in KBit/s From 4de88a6a362a6f976ebac5d384a79ca59606ec0a Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 17 Nov 2022 02:12:07 +0530 Subject: [PATCH 1771/2552] [extractor/generic] Don't report redirect to https --- yt_dlp/extractor/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 0765d38ac..21e92cba6 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2584,7 +2584,9 @@ class GenericIE(InfoExtractor): **smuggled_data.get('http_headers', {}) }) new_url = full_response.geturl() - if url != new_url: + if new_url == urllib.parse.urlparse(url)._replace(scheme='https').geturl(): + url = new_url + elif url != new_url: self.report_following_redirect(new_url) if force_videoid: new_url = smuggle_url(new_url, {'force_videoid': force_videoid}) From 64c464a144e2a96ec21a717d191217edda9107a4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 17 Nov 2022 08:40:34 +0530 Subject: [PATCH 1772/2552] [utils] Move `FileDownloader.parse_bytes` into utils --- yt_dlp/__init__.py | 18 +++++++++--------- yt_dlp/downloader/common.py | 9 ++------- yt_dlp/utils.py | 19 ++++++++++++++----- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 726fb0685..c03e6e691 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -16,7 +16,6 @@ import sys from .compat import compat_shlex_quote from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS -from .downloader import FileDownloader from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO @@ -50,6 +49,7 @@ from .utils import ( format_field, int_or_none, match_filter_func, + parse_bytes, parse_duration, preferredencoding, read_batch_urls, @@ -281,19 +281,19 @@ def validate_options(opts): raise ValueError(f'invalid {key} retry sleep expression {expr!r}') # Bytes - def parse_bytes(name, value): + def validate_bytes(name, value): if value is None: return None - numeric_limit = FileDownloader.parse_bytes(value) + numeric_limit = parse_bytes(value) validate(numeric_limit is not None, 'rate limit', value) return numeric_limit - opts.ratelimit = parse_bytes('rate limit', opts.ratelimit) - opts.throttledratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit) - opts.min_filesize = parse_bytes('min filesize', opts.min_filesize) - opts.max_filesize = parse_bytes('max filesize', opts.max_filesize) - opts.buffersize = parse_bytes('buffer size', opts.buffersize) - opts.http_chunk_size = parse_bytes('http chunk size', opts.http_chunk_size) + opts.ratelimit = validate_bytes('rate limit', opts.ratelimit) + opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) + opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) + opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) + opts.buffersize = validate_bytes('buffer size', opts.buffersize) + opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) # Output templates def validate_outtmpl(tmpl, msg): diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 8d110c374..fe3633250 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -15,7 +15,6 @@ from ..minicurses import ( from ..utils import ( IDENTITY, NO_DEFAULT, - NUMBER_RE, LockingUnsupportedError, Namespace, RetryManager, @@ -24,6 +23,7 @@ from ..utils import ( encodeFilename, format_bytes, join_nonempty, + parse_bytes, remove_start, sanitize_open, shell_quote, @@ -180,12 +180,7 @@ class FileDownloader: @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" - matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr) - if matchobj is None: - return None - number = float(matchobj.group(1)) - multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return int(round(number * multiplier)) + parse_bytes(bytestr) def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 7cba13678..9b6977b6d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2289,15 +2289,24 @@ def format_bytes(bytes): return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A' -def lookup_unit_table(unit_table, s): +def lookup_unit_table(unit_table, s, strict=False): + num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]') units_re = '|'.join(re.escape(u) for u in unit_table) - m = re.match( - r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s) + m = (re.fullmatch if strict else re.match)( + rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s) if not m: return None - num_str = m.group('num').replace(',', '.') + + num = float(m.group('num').replace(',', '.')) mult = unit_table[m.group('unit')] - return int(float(num_str) * mult) + return round(num * mult) + + +def parse_bytes(s): + """Parse a string indicating a byte quantity into an integer""" + return lookup_unit_table( + {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])}, + s.upper(), strict=True) def parse_filesize(s): From d0d74b719755548dab8fc7c402ad3e303391e826 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 17 Nov 2022 11:03:20 +0530 Subject: [PATCH 1773/2552] [utils] Move format sorting code into `utils` --- yt_dlp/__init__.py | 6 +- yt_dlp/extractor/common.py | 298 ++----------------------------------- yt_dlp/utils.py | 286 +++++++++++++++++++++++++++++++++++ 3 files changed, 301 insertions(+), 289 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index c03e6e691..f1a347514 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -19,7 +19,6 @@ from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO -from .extractor.common import InfoExtractor from .options import parseOpts from .postprocessor import ( FFmpegExtractAudioPP, @@ -39,6 +38,7 @@ from .utils import ( DateRange, DownloadCancelled, DownloadError, + FormatSorter, GeoUtils, PlaylistEntries, SameFileError, @@ -152,7 +152,7 @@ def set_compat_opts(opts): else: opts.embed_infojson = False if 'format-sort' in opts.compat_opts: - opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) + opts.format_sort.extend(FormatSorter.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: @@ -227,7 +227,7 @@ def validate_options(opts): # Format sort for f in opts.format_sort: - validate_regex('format sorting', f, InfoExtractor.FormatSort.regex) + validate_regex('format sorting', f, FormatSorter.regex) # Postprocessor formats validate_regex('merge output format', opts.merge_output_format, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 94128bd84..e71016c3a 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -23,13 +23,13 @@ import xml.etree.ElementTree from ..compat import functools # isort: split from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name from ..cookies import LenientSimpleCookie -from ..downloader import FileDownloader from ..downloader.f4m import get_base_url, remove_encrypted_media from ..utils import ( IDENTITY, JSON_LD_RE, NO_DEFAULT, ExtractorError, + FormatSorter, GeoRestrictedError, GeoUtils, LenientJSONDecoder, @@ -41,8 +41,8 @@ from ..utils import ( bug_reports_message, classproperty, clean_html, + deprecation_warning, determine_ext, - determine_protocol, dict_get, encode_data_uri, error_to_compat_str, @@ -1686,295 +1686,21 @@ class InfoExtractor: html, '%s form' % form_id, group='form') return self._hidden_inputs(form) - class FormatSort: - regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$' - - default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality', - 'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec', - 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases - ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr', - 'height', 'width', 'proto', 'vext', 'abr', 'aext', - 'fps', 'fs_approx', 'source', 'id') - - settings = { - 'vcodec': {'type': 'ordered', 'regex': True, - 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']}, - 'acodec': {'type': 'ordered', 'regex': True, - 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']}, - 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range', - 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]}, - 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol', - 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']}, - 'vext': {'type': 'ordered', 'field': 'video_ext', - 'order': ('mp4', 'webm', 'flv', '', 'none'), - 'order_free': ('webm', 'mp4', 'flv', '', 'none')}, - 'aext': {'type': 'ordered', 'field': 'audio_ext', - 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'), - 'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')}, - 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}, - 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple', - 'field': ('vcodec', 'acodec'), - 'function': lambda it: int(any(v != 'none' for v in it))}, - 'ie_pref': {'priority': True, 'type': 'extractor'}, - 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}, - 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}, - 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1}, - 'quality': {'convert': 'float', 'default': -1}, - 'filesize': {'convert': 'bytes'}, - 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}, - 'id': {'convert': 'string', 'field': 'format_id'}, - 'height': {'convert': 'float_none'}, - 'width': {'convert': 'float_none'}, - 'fps': {'convert': 'float_none'}, - 'channels': {'convert': 'float_none', 'field': 'audio_channels'}, - 'tbr': {'convert': 'float_none'}, - 'vbr': {'convert': 'float_none'}, - 'abr': {'convert': 'float_none'}, - 'asr': {'convert': 'float_none'}, - 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1}, - - 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}, - 'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True}, - 'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')}, - 'ext': {'type': 'combined', 'field': ('vext', 'aext')}, - 'res': {'type': 'multiple', 'field': ('height', 'width'), - 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))}, - - # Actual field names - 'format_id': {'type': 'alias', 'field': 'id'}, - 'preference': {'type': 'alias', 'field': 'ie_pref'}, - 'language_preference': {'type': 'alias', 'field': 'lang'}, - 'source_preference': {'type': 'alias', 'field': 'source'}, - 'protocol': {'type': 'alias', 'field': 'proto'}, - 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}, - 'audio_channels': {'type': 'alias', 'field': 'channels'}, - - # Deprecated - 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}, - 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}, - 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}, - 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}, - 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}, - 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}, - 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}, - 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}, - 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}, - 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}, - 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}, - 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}, - 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}, - 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}, - 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, - 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, - 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, - 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, - 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, - 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, - } - - def __init__(self, ie, field_preference): - self._order = [] - self.ydl = ie._downloader - self.evaluate_params(self.ydl.params, field_preference) - if ie.get_param('verbose'): - self.print_verbose_info(self.ydl.write_debug) - - def _get_field_setting(self, field, key): - if field not in self.settings: - if key in ('forced', 'priority'): - return False - self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is ' - 'deprecated and may be removed in a future version') - self.settings[field] = {} - propObj = self.settings[field] - if key not in propObj: - type = propObj.get('type') - if key == 'field': - default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field - elif key == 'convert': - default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore' - else: - default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None) - propObj[key] = default - return propObj[key] - - def _resolve_field_value(self, field, value, convertNone=False): - if value is None: - if not convertNone: - return None - else: - value = value.lower() - conversion = self._get_field_setting(field, 'convert') - if conversion == 'ignore': - return None - if conversion == 'string': - return value - elif conversion == 'float_none': - return float_or_none(value) - elif conversion == 'bytes': - return FileDownloader.parse_bytes(value) - elif conversion == 'order': - order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order') - use_regex = self._get_field_setting(field, 'regex') - list_length = len(order_list) - empty_pos = order_list.index('') if '' in order_list else list_length + 1 - if use_regex and value is not None: - for i, regex in enumerate(order_list): - if regex and re.match(regex, value): - return list_length - i - return list_length - empty_pos # not in list - else: # not regex or value = None - return list_length - (order_list.index(value) if value in order_list else empty_pos) - else: - if value.isnumeric(): - return float(value) - else: - self.settings[field]['convert'] = 'string' - return value - - def evaluate_params(self, params, sort_extractor): - self._use_free_order = params.get('prefer_free_formats', False) - self._sort_user = params.get('format_sort', []) - self._sort_extractor = sort_extractor - - def add_item(field, reverse, closest, limit_text): - field = field.lower() - if field in self._order: - return - self._order.append(field) - limit = self._resolve_field_value(field, limit_text) - data = { - 'reverse': reverse, - 'closest': False if limit is None else closest, - 'limit_text': limit_text, - 'limit': limit} - if field in self.settings: - self.settings[field].update(data) - else: - self.settings[field] = data - - sort_list = ( - tuple(field for field in self.default if self._get_field_setting(field, 'forced')) - + (tuple() if params.get('format_sort_force', False) - else tuple(field for field in self.default if self._get_field_setting(field, 'priority'))) - + tuple(self._sort_user) + tuple(sort_extractor) + self.default) - - for item in sort_list: - match = re.match(self.regex, item) - if match is None: - raise ExtractorError('Invalid format sort string "%s" given by extractor' % item) - field = match.group('field') - if field is None: - continue - if self._get_field_setting(field, 'type') == 'alias': - alias, field = field, self._get_field_setting(field, 'field') - if self._get_field_setting(alias, 'deprecated'): - self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may ' - f'be removed in a future version. Please use {field} instead') - reverse = match.group('reverse') is not None - closest = match.group('separator') == '~' - limit_text = match.group('limit') - - has_limit = limit_text is not None - has_multiple_fields = self._get_field_setting(field, 'type') == 'combined' - has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit') - - fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,) - limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple() - limit_count = len(limits) - for (i, f) in enumerate(fields): - add_item(f, reverse, closest, - limits[i] if i < limit_count - else limits[0] if has_limit and not has_multiple_limits - else None) - - def print_verbose_info(self, write_debug): - if self._sort_user: - write_debug('Sort order given by user: %s' % ', '.join(self._sort_user)) - if self._sort_extractor: - write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor)) - write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % ( - '+' if self._get_field_setting(field, 'reverse') else '', field, - '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':', - self._get_field_setting(field, 'limit_text'), - self._get_field_setting(field, 'limit')) - if self._get_field_setting(field, 'limit_text') is not None else '') - for field in self._order if self._get_field_setting(field, 'visible')])) - - def _calculate_field_preference_from_value(self, format, field, type, value): - reverse = self._get_field_setting(field, 'reverse') - closest = self._get_field_setting(field, 'closest') - limit = self._get_field_setting(field, 'limit') - - if type == 'extractor': - maximum = self._get_field_setting(field, 'max') - if value is None or (maximum is not None and value >= maximum): - value = -1 - elif type == 'boolean': - in_list = self._get_field_setting(field, 'in_list') - not_in_list = self._get_field_setting(field, 'not_in_list') - value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1 - elif type == 'ordered': - value = self._resolve_field_value(field, value, True) - - # try to convert to number - val_num = float_or_none(value, default=self._get_field_setting(field, 'default')) - is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None - if is_num: - value = val_num - - return ((-10, 0) if value is None - else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher - else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest - else (0, value, 0) if not reverse and (limit is None or value <= limit) - else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit - else (-1, value, 0)) - - def _calculate_field_preference(self, format, field): - type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple - get_value = lambda f: format.get(self._get_field_setting(f, 'field')) - if type == 'multiple': - type = 'field' # Only 'field' is allowed in multiple for now - actual_fields = self._get_field_setting(field, 'field') - - value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields) - else: - value = get_value(field) - return self._calculate_field_preference_from_value(format, field, type, value) - - def calculate_preference(self, format): - # Determine missing protocol - if not format.get('protocol'): - format['protocol'] = determine_protocol(format) - - # Determine missing ext - if not format.get('ext') and 'url' in format: - format['ext'] = determine_ext(format['url']) - if format.get('vcodec') == 'none': - format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none' - format['video_ext'] = 'none' - else: - format['video_ext'] = format['ext'] - format['audio_ext'] = 'none' - # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported? - # format['preference'] = -1000 - - # Determine missing bitrates - if format.get('tbr') is None: - if format.get('vbr') is not None and format.get('abr') is not None: - format['tbr'] = format.get('vbr', 0) + format.get('abr', 0) - else: - if format.get('vcodec') != 'none' and format.get('vbr') is None: - format['vbr'] = format.get('tbr') - format.get('abr', 0) - if format.get('acodec') != 'none' and format.get('abr') is None: - format['abr'] = format.get('tbr') - format.get('vbr', 0) + @classproperty(cache=True) + def FormatSort(cls): + class FormatSort(FormatSorter): + def __init__(ie, *args, **kwargs): + super().__init__(ie._downloader, *args, **kwargs) - return tuple(self._calculate_field_preference(format, field) for field in self._order) + deprecation_warning( + 'yt_dlp.InfoExtractor.FormatSort is deprecated and may be removed in the future. ' + 'Use yt_dlp.utils.FormatSorter instead') + return FormatSort def _sort_formats(self, formats, field_preference=[]): if not formats: return - formats.sort(key=self.FormatSort(self, field_preference).calculate_preference) + formats.sort(key=FormatSorter(self._downloader, field_preference).calculate_preference) def _check_formats(self, formats, video_id): if formats: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9b6977b6d..0283c45f6 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6000,6 +6000,292 @@ def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None) return orderedSet(requested) +class FormatSorter: + regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$' + + default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality', + 'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec', + 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases + ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr', + 'height', 'width', 'proto', 'vext', 'abr', 'aext', + 'fps', 'fs_approx', 'source', 'id') + + settings = { + 'vcodec': {'type': 'ordered', 'regex': True, + 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']}, + 'acodec': {'type': 'ordered', 'regex': True, + 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']}, + 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range', + 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]}, + 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol', + 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']}, + 'vext': {'type': 'ordered', 'field': 'video_ext', + 'order': ('mp4', 'webm', 'flv', '', 'none'), + 'order_free': ('webm', 'mp4', 'flv', '', 'none')}, + 'aext': {'type': 'ordered', 'field': 'audio_ext', + 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'), + 'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')}, + 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}, + 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple', + 'field': ('vcodec', 'acodec'), + 'function': lambda it: int(any(v != 'none' for v in it))}, + 'ie_pref': {'priority': True, 'type': 'extractor'}, + 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}, + 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}, + 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1}, + 'quality': {'convert': 'float', 'default': -1}, + 'filesize': {'convert': 'bytes'}, + 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}, + 'id': {'convert': 'string', 'field': 'format_id'}, + 'height': {'convert': 'float_none'}, + 'width': {'convert': 'float_none'}, + 'fps': {'convert': 'float_none'}, + 'channels': {'convert': 'float_none', 'field': 'audio_channels'}, + 'tbr': {'convert': 'float_none'}, + 'vbr': {'convert': 'float_none'}, + 'abr': {'convert': 'float_none'}, + 'asr': {'convert': 'float_none'}, + 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1}, + + 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}, + 'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True}, + 'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')}, + 'ext': {'type': 'combined', 'field': ('vext', 'aext')}, + 'res': {'type': 'multiple', 'field': ('height', 'width'), + 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))}, + + # Actual field names + 'format_id': {'type': 'alias', 'field': 'id'}, + 'preference': {'type': 'alias', 'field': 'ie_pref'}, + 'language_preference': {'type': 'alias', 'field': 'lang'}, + 'source_preference': {'type': 'alias', 'field': 'source'}, + 'protocol': {'type': 'alias', 'field': 'proto'}, + 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}, + 'audio_channels': {'type': 'alias', 'field': 'channels'}, + + # Deprecated + 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}, + 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}, + 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}, + 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}, + 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}, + 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}, + 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}, + 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}, + 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}, + 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}, + 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}, + 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}, + 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}, + 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}, + 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, + 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, + 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, + 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, + 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, + 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, + } + + def __init__(self, ydl, field_preference): + self.ydl = ydl + self._order = [] + self.evaluate_params(self.ydl.params, field_preference) + if ydl.params.get('verbose'): + self.print_verbose_info(self.ydl.write_debug) + + def _get_field_setting(self, field, key): + if field not in self.settings: + if key in ('forced', 'priority'): + return False + self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is ' + 'deprecated and may be removed in a future version') + self.settings[field] = {} + propObj = self.settings[field] + if key not in propObj: + type = propObj.get('type') + if key == 'field': + default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field + elif key == 'convert': + default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore' + else: + default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None) + propObj[key] = default + return propObj[key] + + def _resolve_field_value(self, field, value, convertNone=False): + if value is None: + if not convertNone: + return None + else: + value = value.lower() + conversion = self._get_field_setting(field, 'convert') + if conversion == 'ignore': + return None + if conversion == 'string': + return value + elif conversion == 'float_none': + return float_or_none(value) + elif conversion == 'bytes': + return parse_bytes(value) + elif conversion == 'order': + order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order') + use_regex = self._get_field_setting(field, 'regex') + list_length = len(order_list) + empty_pos = order_list.index('') if '' in order_list else list_length + 1 + if use_regex and value is not None: + for i, regex in enumerate(order_list): + if regex and re.match(regex, value): + return list_length - i + return list_length - empty_pos # not in list + else: # not regex or value = None + return list_length - (order_list.index(value) if value in order_list else empty_pos) + else: + if value.isnumeric(): + return float(value) + else: + self.settings[field]['convert'] = 'string' + return value + + def evaluate_params(self, params, sort_extractor): + self._use_free_order = params.get('prefer_free_formats', False) + self._sort_user = params.get('format_sort', []) + self._sort_extractor = sort_extractor + + def add_item(field, reverse, closest, limit_text): + field = field.lower() + if field in self._order: + return + self._order.append(field) + limit = self._resolve_field_value(field, limit_text) + data = { + 'reverse': reverse, + 'closest': False if limit is None else closest, + 'limit_text': limit_text, + 'limit': limit} + if field in self.settings: + self.settings[field].update(data) + else: + self.settings[field] = data + + sort_list = ( + tuple(field for field in self.default if self._get_field_setting(field, 'forced')) + + (tuple() if params.get('format_sort_force', False) + else tuple(field for field in self.default if self._get_field_setting(field, 'priority'))) + + tuple(self._sort_user) + tuple(sort_extractor) + self.default) + + for item in sort_list: + match = re.match(self.regex, item) + if match is None: + raise ExtractorError('Invalid format sort string "%s" given by extractor' % item) + field = match.group('field') + if field is None: + continue + if self._get_field_setting(field, 'type') == 'alias': + alias, field = field, self._get_field_setting(field, 'field') + if self._get_field_setting(alias, 'deprecated'): + self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may ' + f'be removed in a future version. Please use {field} instead') + reverse = match.group('reverse') is not None + closest = match.group('separator') == '~' + limit_text = match.group('limit') + + has_limit = limit_text is not None + has_multiple_fields = self._get_field_setting(field, 'type') == 'combined' + has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit') + + fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,) + limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple() + limit_count = len(limits) + for (i, f) in enumerate(fields): + add_item(f, reverse, closest, + limits[i] if i < limit_count + else limits[0] if has_limit and not has_multiple_limits + else None) + + def print_verbose_info(self, write_debug): + if self._sort_user: + write_debug('Sort order given by user: %s' % ', '.join(self._sort_user)) + if self._sort_extractor: + write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor)) + write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % ( + '+' if self._get_field_setting(field, 'reverse') else '', field, + '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':', + self._get_field_setting(field, 'limit_text'), + self._get_field_setting(field, 'limit')) + if self._get_field_setting(field, 'limit_text') is not None else '') + for field in self._order if self._get_field_setting(field, 'visible')])) + + def _calculate_field_preference_from_value(self, format, field, type, value): + reverse = self._get_field_setting(field, 'reverse') + closest = self._get_field_setting(field, 'closest') + limit = self._get_field_setting(field, 'limit') + + if type == 'extractor': + maximum = self._get_field_setting(field, 'max') + if value is None or (maximum is not None and value >= maximum): + value = -1 + elif type == 'boolean': + in_list = self._get_field_setting(field, 'in_list') + not_in_list = self._get_field_setting(field, 'not_in_list') + value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1 + elif type == 'ordered': + value = self._resolve_field_value(field, value, True) + + # try to convert to number + val_num = float_or_none(value, default=self._get_field_setting(field, 'default')) + is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None + if is_num: + value = val_num + + return ((-10, 0) if value is None + else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher + else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest + else (0, value, 0) if not reverse and (limit is None or value <= limit) + else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit + else (-1, value, 0)) + + def _calculate_field_preference(self, format, field): + type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple + get_value = lambda f: format.get(self._get_field_setting(f, 'field')) + if type == 'multiple': + type = 'field' # Only 'field' is allowed in multiple for now + actual_fields = self._get_field_setting(field, 'field') + + value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields) + else: + value = get_value(field) + return self._calculate_field_preference_from_value(format, field, type, value) + + def calculate_preference(self, format): + # Determine missing protocol + if not format.get('protocol'): + format['protocol'] = determine_protocol(format) + + # Determine missing ext + if not format.get('ext') and 'url' in format: + format['ext'] = determine_ext(format['url']) + if format.get('vcodec') == 'none': + format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none' + format['video_ext'] = 'none' + else: + format['video_ext'] = format['ext'] + format['audio_ext'] = 'none' + # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported? + # format['preference'] = -1000 + + # Determine missing bitrates + if format.get('tbr') is None: + if format.get('vbr') is not None and format.get('abr') is not None: + format['tbr'] = format.get('vbr', 0) + format.get('abr', 0) + else: + if format.get('vcodec') != 'none' and format.get('vbr') is None: + format['vbr'] = format.get('tbr') - format.get('abr', 0) + if format.get('acodec') != 'none' and format.get('abr') is None: + format['abr'] = format.get('tbr') - format.get('vbr', 0) + + return tuple(self._calculate_field_preference(format, field) for field in self._order) + + # Deprecated has_certifi = bool(certifi) has_websockets = bool(websockets) From 784320c98c2a7e84d72636bc25f6f54c86f5e481 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 17 Nov 2022 10:53:05 +0530 Subject: [PATCH 1774/2552] Implement universal format sorting Closes #5566 --- yt_dlp/YoutubeDL.py | 14 ++++++++++++++ yt_dlp/extractor/common.py | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 25c35dc53..b1d009280 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -67,6 +67,7 @@ from .utils import ( EntryNotInPlaylist, ExistingVideoReached, ExtractorError, + FormatSorter, GeoRestrictedError, HEADRequest, ISO3166Utils, @@ -2461,6 +2462,18 @@ class YoutubeDL: if err: self.report_error(err, tb=False) + def sort_formats(self, info_dict): + formats = self._get_formats(info_dict) + if not formats: + return + # Backward compatibility with InfoExtractor._sort_formats + field_preference = formats[0].pop('__sort_fields', None) + if field_preference: + info_dict['_format_sort_fields'] = field_preference + + formats.sort(key=FormatSorter( + self, info_dict.get('_format_sort_fields', [])).calculate_preference) + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' self._num_videos += 1 @@ -2546,6 +2559,7 @@ class YoutubeDL: info_dict['requested_subtitles'] = self.process_subtitles( info_dict['id'], subtitles, automatic_captions) + self.sort_formats(info_dict) formats = self._get_formats(info_dict) # or None ensures --clean-infojson removes it diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e71016c3a..3701fe6b3 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -344,6 +344,7 @@ class InfoExtractor: 'unlisted' or 'public'. Use 'InfoExtractor._availability' to set it _old_archive_ids: A list of old archive ids needed for backward compatibility + _format_sort_fields: A list of fields to use for sorting formats __post_extractor: A function to be called just before the metadata is written to either disk, logger or console. The function must return a dict which will be added to the info_dict. @@ -1698,9 +1699,8 @@ class InfoExtractor: return FormatSort def _sort_formats(self, formats, field_preference=[]): - if not formats: - return - formats.sort(key=FormatSorter(self._downloader, field_preference).calculate_preference) + if formats and field_preference: + formats[0]['__sort_fields'] = field_preference def _check_formats(self, formats, video_id): if formats: From 9f14daf22b4080ae1531a772ee7574959af4e2fa Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 17 Nov 2022 10:40:03 +0530 Subject: [PATCH 1775/2552] [extractor] Deprecate `_sort_formats` --- test/test_InfoExtractor.py | 4 +- test/test_YoutubeDL.py | 49 +++++++-------------- yt_dlp/extractor/abc.py | 3 -- yt_dlp/extractor/abcotvs.py | 2 - yt_dlp/extractor/acfun.py | 1 - yt_dlp/extractor/adn.py | 1 - yt_dlp/extractor/adobetv.py | 2 - yt_dlp/extractor/adultswim.py | 1 - yt_dlp/extractor/aenetworks.py | 1 - yt_dlp/extractor/afreecatv.py | 3 -- yt_dlp/extractor/agora.py | 2 - yt_dlp/extractor/allocine.py | 2 - yt_dlp/extractor/alsace20tv.py | 1 - yt_dlp/extractor/alura.py | 2 - yt_dlp/extractor/amcnetworks.py | 1 - yt_dlp/extractor/amp.py | 2 - yt_dlp/extractor/ant1newsgr.py | 1 - yt_dlp/extractor/anvato.py | 2 - yt_dlp/extractor/aol.py | 1 - yt_dlp/extractor/apa.py | 1 - yt_dlp/extractor/aparat.py | 1 - yt_dlp/extractor/appletrailers.py | 3 -- yt_dlp/extractor/archiveorg.py | 2 +- yt_dlp/extractor/arcpublishing.py | 1 - yt_dlp/extractor/ard.py | 4 -- yt_dlp/extractor/arkena.py | 1 - yt_dlp/extractor/arnes.py | 1 - yt_dlp/extractor/arte.py | 1 - yt_dlp/extractor/atresplayer.py | 1 - yt_dlp/extractor/atvat.py | 1 - yt_dlp/extractor/audimedia.py | 1 - yt_dlp/extractor/banbye.py | 2 - yt_dlp/extractor/bandcamp.py | 3 -- yt_dlp/extractor/bannedvideo.py | 1 - yt_dlp/extractor/bbc.py | 13 ------ yt_dlp/extractor/beatport.py | 1 - yt_dlp/extractor/beeg.py | 2 - yt_dlp/extractor/bigflix.py | 2 - yt_dlp/extractor/bilibili.py | 3 -- yt_dlp/extractor/biqle.py | 1 - yt_dlp/extractor/bitchute.py | 1 - yt_dlp/extractor/bitwave.py | 1 - yt_dlp/extractor/bloomberg.py | 1 - yt_dlp/extractor/bokecc.py | 2 - yt_dlp/extractor/bongacams.py | 1 - yt_dlp/extractor/booyah.py | 1 - yt_dlp/extractor/box.py | 2 - yt_dlp/extractor/bpb.py | 2 - yt_dlp/extractor/br.py | 2 - yt_dlp/extractor/breakcom.py | 1 - yt_dlp/extractor/breitbart.py | 1 - yt_dlp/extractor/brightcove.py | 2 - yt_dlp/extractor/byutv.py | 1 - yt_dlp/extractor/c56.py | 1 - yt_dlp/extractor/cableav.py | 1 - yt_dlp/extractor/callin.py | 1 - yt_dlp/extractor/caltrans.py | 1 - yt_dlp/extractor/cam4.py | 1 - yt_dlp/extractor/cammodels.py | 1 - yt_dlp/extractor/camsoda.py | 2 - yt_dlp/extractor/canalalpha.py | 1 - yt_dlp/extractor/canalc2.py | 2 - yt_dlp/extractor/canalplus.py | 1 - yt_dlp/extractor/canvas.py | 1 - yt_dlp/extractor/carambatv.py | 1 - yt_dlp/extractor/cbc.py | 2 - yt_dlp/extractor/cbs.py | 1 - yt_dlp/extractor/cbsnews.py | 1 - yt_dlp/extractor/cbssports.py | 1 - yt_dlp/extractor/ccc.py | 1 - yt_dlp/extractor/ccma.py | 1 - yt_dlp/extractor/cctv.py | 2 - yt_dlp/extractor/cda.py | 4 -- yt_dlp/extractor/cellebrite.py | 1 - yt_dlp/extractor/ceskatelevize.py | 3 -- yt_dlp/extractor/channel9.py | 1 - yt_dlp/extractor/charlierose.py | 2 - yt_dlp/extractor/chaturbate.py | 1 - yt_dlp/extractor/chingari.py | 1 - yt_dlp/extractor/cinchcast.py | 1 - yt_dlp/extractor/ciscowebex.py | 1 - yt_dlp/extractor/cliphunter.py | 1 - yt_dlp/extractor/cloudflarestream.py | 1 - yt_dlp/extractor/clubic.py | 1 - yt_dlp/extractor/clyp.py | 1 - yt_dlp/extractor/common.py | 11 +++-- yt_dlp/extractor/condenast.py | 1 - yt_dlp/extractor/contv.py | 2 - yt_dlp/extractor/corus.py | 1 - yt_dlp/extractor/coub.py | 2 - yt_dlp/extractor/cpac.py | 2 - yt_dlp/extractor/crackle.py | 1 - yt_dlp/extractor/crooksandliars.py | 1 - yt_dlp/extractor/crowdbunker.py | 1 - yt_dlp/extractor/crunchyroll.py | 1 - yt_dlp/extractor/cspan.py | 1 - yt_dlp/extractor/curiositystream.py | 1 - yt_dlp/extractor/daftsex.py | 2 - yt_dlp/extractor/dailymail.py | 1 - yt_dlp/extractor/dailymotion.py | 1 - yt_dlp/extractor/dailywire.py | 1 - yt_dlp/extractor/damtomo.py | 1 - yt_dlp/extractor/daystar.py | 1 - yt_dlp/extractor/deezer.py | 2 - yt_dlp/extractor/democracynow.py | 2 - yt_dlp/extractor/detik.py | 1 - yt_dlp/extractor/dfb.py | 1 - yt_dlp/extractor/digitalconcerthall.py | 1 - yt_dlp/extractor/digiteka.py | 2 - yt_dlp/extractor/discoverygo.py | 1 - yt_dlp/extractor/disney.py | 1 - yt_dlp/extractor/dispeak.py | 1 - yt_dlp/extractor/dlive.py | 2 - yt_dlp/extractor/dplay.py | 1 - yt_dlp/extractor/drbonanza.py | 1 - yt_dlp/extractor/dropbox.py | 1 - yt_dlp/extractor/drtuber.py | 1 - yt_dlp/extractor/drtv.py | 3 -- yt_dlp/extractor/dumpert.py | 1 - yt_dlp/extractor/dvtv.py | 1 - yt_dlp/extractor/dw.py | 1 - yt_dlp/extractor/eagleplatform.py | 2 - yt_dlp/extractor/egghead.py | 1 - yt_dlp/extractor/einthusan.py | 2 - yt_dlp/extractor/eitb.py | 2 - yt_dlp/extractor/ellentube.py | 1 - yt_dlp/extractor/elonet.py | 1 - yt_dlp/extractor/epicon.py | 1 - yt_dlp/extractor/eporner.py | 1 - yt_dlp/extractor/ertgr.py | 5 +-- yt_dlp/extractor/escapist.py | 1 - yt_dlp/extractor/espn.py | 3 -- yt_dlp/extractor/esri.py | 1 - yt_dlp/extractor/europa.py | 1 - yt_dlp/extractor/eurosport.py | 2 - yt_dlp/extractor/euscreen.py | 1 - yt_dlp/extractor/expotv.py | 1 - yt_dlp/extractor/expressen.py | 1 - yt_dlp/extractor/facebook.py | 12 +++-- yt_dlp/extractor/faz.py | 1 - yt_dlp/extractor/fc2.py | 1 - yt_dlp/extractor/fczenit.py | 2 - yt_dlp/extractor/fifa.py | 1 - yt_dlp/extractor/filmmodu.py | 2 - yt_dlp/extractor/filmon.py | 2 - yt_dlp/extractor/firsttv.py | 1 - yt_dlp/extractor/flickr.py | 1 - yt_dlp/extractor/folketinget.py | 1 - yt_dlp/extractor/fourtube.py | 1 - yt_dlp/extractor/fourzerostudio.py | 1 - yt_dlp/extractor/fox.py | 1 - yt_dlp/extractor/foxgay.py | 2 - yt_dlp/extractor/fptplay.py | 1 - yt_dlp/extractor/francetv.py | 2 - yt_dlp/extractor/freesound.py | 1 - yt_dlp/extractor/freetv.py | 2 - yt_dlp/extractor/frontendmasters.py | 1 - yt_dlp/extractor/fujitv.py | 2 +- yt_dlp/extractor/funimation.py | 2 +- yt_dlp/extractor/fusion.py | 1 - yt_dlp/extractor/gab.py | 3 -- yt_dlp/extractor/gaia.py | 1 - yt_dlp/extractor/gamespot.py | 2 - yt_dlp/extractor/gaskrank.py | 1 - yt_dlp/extractor/gedidigital.py | 1 - yt_dlp/extractor/generic.py | 12 ----- yt_dlp/extractor/genericembeds.py | 1 - yt_dlp/extractor/gettr.py | 4 -- yt_dlp/extractor/gfycat.py | 1 - yt_dlp/extractor/giantbomb.py | 2 - yt_dlp/extractor/giga.py | 1 - yt_dlp/extractor/globo.py | 1 - yt_dlp/extractor/glomex.py | 1 - yt_dlp/extractor/go.py | 1 - yt_dlp/extractor/golem.py | 1 - yt_dlp/extractor/goodgame.py | 1 - yt_dlp/extractor/googledrive.py | 2 - yt_dlp/extractor/goplay.py | 1 - yt_dlp/extractor/gopro.py | 2 - yt_dlp/extractor/gronkh.py | 1 - yt_dlp/extractor/hbo.py | 1 - yt_dlp/extractor/hearthisat.py | 1 - yt_dlp/extractor/heise.py | 1 - yt_dlp/extractor/hellporno.py | 1 - yt_dlp/extractor/helsinki.py | 1 - yt_dlp/extractor/hidive.py | 1 - yt_dlp/extractor/hitbox.py | 2 - yt_dlp/extractor/hketv.py | 1 - yt_dlp/extractor/hotstar.py | 1 - yt_dlp/extractor/howstuffworks.py | 2 - yt_dlp/extractor/hrfensehen.py | 2 - yt_dlp/extractor/hrti.py | 1 - yt_dlp/extractor/hse.py | 1 - yt_dlp/extractor/huffpost.py | 2 - yt_dlp/extractor/hungama.py | 1 - yt_dlp/extractor/huya.py | 2 - yt_dlp/extractor/icareus.py | 1 - yt_dlp/extractor/ichinanalive.py | 4 -- yt_dlp/extractor/ign.py | 2 - yt_dlp/extractor/imdb.py | 1 - yt_dlp/extractor/imggaming.py | 1 - yt_dlp/extractor/imgur.py | 2 - yt_dlp/extractor/indavideo.py | 1 - yt_dlp/extractor/infoq.py | 2 - yt_dlp/extractor/instagram.py | 2 - yt_dlp/extractor/internazionale.py | 1 - yt_dlp/extractor/internetvideoarchive.py | 1 - yt_dlp/extractor/iprima.py | 3 -- yt_dlp/extractor/iqiyi.py | 3 -- yt_dlp/extractor/islamchannel.py | 1 - yt_dlp/extractor/itv.py | 1 - yt_dlp/extractor/ivi.py | 1 - yt_dlp/extractor/ivideon.py | 1 - yt_dlp/extractor/iwara.py | 2 - yt_dlp/extractor/ixigua.py | 1 - yt_dlp/extractor/izlesene.py | 1 - yt_dlp/extractor/jable.py | 1 - yt_dlp/extractor/jamendo.py | 1 - yt_dlp/extractor/japandiet.py | 3 -- yt_dlp/extractor/jixie.py | 1 - yt_dlp/extractor/joj.py | 1 - yt_dlp/extractor/kakao.py | 1 - yt_dlp/extractor/kaltura.py | 2 - yt_dlp/extractor/keezmovies.py | 7 --- yt_dlp/extractor/kelbyone.py | 1 - yt_dlp/extractor/kinja.py | 3 -- yt_dlp/extractor/kinopoisk.py | 1 - yt_dlp/extractor/konserthusetplay.py | 2 - yt_dlp/extractor/koo.py | 1 - yt_dlp/extractor/kusi.py | 1 - yt_dlp/extractor/kuwo.py | 3 -- yt_dlp/extractor/la7.py | 3 -- yt_dlp/extractor/laola1tv.py | 1 - yt_dlp/extractor/lbry.py | 1 - yt_dlp/extractor/lecture2go.py | 2 - yt_dlp/extractor/lecturio.py | 1 - yt_dlp/extractor/leeco.py | 3 +- yt_dlp/extractor/lego.py | 1 - yt_dlp/extractor/libraryofcongress.py | 2 - yt_dlp/extractor/lifenews.py | 2 - yt_dlp/extractor/likee.py | 1 - yt_dlp/extractor/limelight.py | 2 - yt_dlp/extractor/line.py | 1 - yt_dlp/extractor/linkedin.py | 10 ++--- yt_dlp/extractor/linuxacademy.py | 1 - yt_dlp/extractor/livestream.py | 3 -- yt_dlp/extractor/lnkgo.py | 2 - yt_dlp/extractor/lrt.py | 1 - yt_dlp/extractor/lynda.py | 2 - yt_dlp/extractor/mailru.py | 1 - yt_dlp/extractor/mainstreaming.py | 2 - yt_dlp/extractor/malltv.py | 1 - yt_dlp/extractor/mangomolo.py | 1 - yt_dlp/extractor/manoto.py | 2 - yt_dlp/extractor/manyvids.py | 2 - yt_dlp/extractor/massengeschmacktv.py | 2 - yt_dlp/extractor/masters.py | 1 - yt_dlp/extractor/matchtv.py | 1 - yt_dlp/extractor/mdr.py | 2 - yt_dlp/extractor/medaltv.py | 2 - yt_dlp/extractor/mediaklikk.py | 1 - yt_dlp/extractor/medialaan.py | 1 - yt_dlp/extractor/mediaset.py | 2 - yt_dlp/extractor/mediasite.py | 2 - yt_dlp/extractor/mediaworksnz.py | 2 - yt_dlp/extractor/megatvcom.py | 1 - yt_dlp/extractor/melonvod.py | 1 - yt_dlp/extractor/metacafe.py | 1 - yt_dlp/extractor/metacritic.py | 1 - yt_dlp/extractor/mgoon.py | 1 - yt_dlp/extractor/mgtv.py | 1 - yt_dlp/extractor/microsoftembed.py | 1 - yt_dlp/extractor/microsoftstream.py | 1 - yt_dlp/extractor/microsoftvirtualacademy.py | 1 - yt_dlp/extractor/mildom.py | 4 -- yt_dlp/extractor/minds.py | 1 - yt_dlp/extractor/minoto.py | 1 - yt_dlp/extractor/mirrativ.py | 1 - yt_dlp/extractor/mixcloud.py | 2 - yt_dlp/extractor/mlb.py | 2 - yt_dlp/extractor/mnet.py | 1 - yt_dlp/extractor/mocha.py | 2 - yt_dlp/extractor/moviezine.py | 2 - yt_dlp/extractor/msn.py | 1 - yt_dlp/extractor/mtv.py | 4 -- yt_dlp/extractor/muenchentv.py | 1 - yt_dlp/extractor/mwave.py | 1 - yt_dlp/extractor/myspace.py | 2 - yt_dlp/extractor/n1.py | 2 - yt_dlp/extractor/nate.py | 1 - yt_dlp/extractor/naver.py | 2 - yt_dlp/extractor/nba.py | 2 - yt_dlp/extractor/nbc.py | 4 -- yt_dlp/extractor/ndr.py | 1 - yt_dlp/extractor/neteasemusic.py | 3 -- yt_dlp/extractor/netzkino.py | 1 - yt_dlp/extractor/newgrounds.py | 1 - yt_dlp/extractor/newspicks.py | 1 - yt_dlp/extractor/newstube.py | 1 - yt_dlp/extractor/newsy.py | 1 - yt_dlp/extractor/nexx.py | 2 - yt_dlp/extractor/nfb.py | 1 - yt_dlp/extractor/nfhsnetwork.py | 4 +- yt_dlp/extractor/nfl.py | 1 - yt_dlp/extractor/nhk.py | 2 - yt_dlp/extractor/nhl.py | 1 - yt_dlp/extractor/niconico.py | 2 - yt_dlp/extractor/ninecninemedia.py | 1 - yt_dlp/extractor/ninegag.py | 1 - yt_dlp/extractor/njpwworld.py | 2 - yt_dlp/extractor/nobelprize.py | 1 - yt_dlp/extractor/noodlemagazine.py | 2 - yt_dlp/extractor/nova.py | 2 - yt_dlp/extractor/novaplay.py | 1 - yt_dlp/extractor/noz.py | 1 - yt_dlp/extractor/npo.py | 4 -- yt_dlp/extractor/npr.py | 2 - yt_dlp/extractor/nrk.py | 1 - yt_dlp/extractor/ntvde.py | 1 - yt_dlp/extractor/ntvru.py | 1 - yt_dlp/extractor/nuvid.py | 1 - yt_dlp/extractor/nytimes.py | 1 - yt_dlp/extractor/odnoklassniki.py | 2 - yt_dlp/extractor/olympics.py | 1 - yt_dlp/extractor/on24.py | 1 - yt_dlp/extractor/onefootball.py | 1 - yt_dlp/extractor/onet.py | 1 - yt_dlp/extractor/ooyala.py | 1 - yt_dlp/extractor/opencast.py | 2 - yt_dlp/extractor/openrec.py | 3 -- yt_dlp/extractor/ora.py | 1 - yt_dlp/extractor/orf.py | 4 -- yt_dlp/extractor/pandoratv.py | 1 - yt_dlp/extractor/panopto.py | 1 - yt_dlp/extractor/parlview.py | 1 - yt_dlp/extractor/patreon.py | 1 - yt_dlp/extractor/pbs.py | 1 - yt_dlp/extractor/pearvideo.py | 1 - yt_dlp/extractor/peekvids.py | 1 - yt_dlp/extractor/peertube.py | 1 - yt_dlp/extractor/peertv.py | 2 - yt_dlp/extractor/peloton.py | 1 - yt_dlp/extractor/performgroup.py | 1 - yt_dlp/extractor/periscope.py | 1 - yt_dlp/extractor/philharmoniedeparis.py | 1 - yt_dlp/extractor/picarto.py | 2 - yt_dlp/extractor/piksel.py | 3 +- yt_dlp/extractor/pinkbike.py | 1 - yt_dlp/extractor/pinterest.py | 1 - yt_dlp/extractor/pixivsketch.py | 1 - yt_dlp/extractor/pladform.py | 2 - yt_dlp/extractor/planetmarathi.py | 1 - yt_dlp/extractor/platzi.py | 1 - yt_dlp/extractor/playplustv.py | 1 - yt_dlp/extractor/plays.py | 1 - yt_dlp/extractor/playtvak.py | 1 - yt_dlp/extractor/playvid.py | 1 - yt_dlp/extractor/playwire.py | 1 - yt_dlp/extractor/pluralsight.py | 2 - yt_dlp/extractor/plutotv.py | 1 - yt_dlp/extractor/polsatgo.py | 1 - yt_dlp/extractor/polskieradio.py | 2 - yt_dlp/extractor/porncom.py | 2 - yt_dlp/extractor/pornflip.py | 1 - yt_dlp/extractor/pornhd.py | 1 - yt_dlp/extractor/pornhub.py | 4 -- yt_dlp/extractor/pornovoisines.py | 1 - yt_dlp/extractor/projectveritas.py | 1 - yt_dlp/extractor/prosiebensat1.py | 1 - yt_dlp/extractor/puhutv.py | 1 - yt_dlp/extractor/qqmusic.py | 1 - yt_dlp/extractor/r7.py | 1 - yt_dlp/extractor/radiko.py | 1 - yt_dlp/extractor/radiocanada.py | 1 - yt_dlp/extractor/radiode.py | 1 - yt_dlp/extractor/radiofrance.py | 1 - yt_dlp/extractor/radiojavan.py | 1 - yt_dlp/extractor/radlive.py | 1 - yt_dlp/extractor/rai.py | 6 --- yt_dlp/extractor/rcs.py | 1 - yt_dlp/extractor/rcti.py | 2 - yt_dlp/extractor/redbee.py | 6 +-- yt_dlp/extractor/redbulltv.py | 1 - yt_dlp/extractor/reddit.py | 1 - yt_dlp/extractor/redgifs.py | 1 - yt_dlp/extractor/redtube.py | 1 - yt_dlp/extractor/rentv.py | 1 - yt_dlp/extractor/restudy.py | 1 - yt_dlp/extractor/reuters.py | 1 - yt_dlp/extractor/rice.py | 1 - yt_dlp/extractor/rockstargames.py | 2 - yt_dlp/extractor/rokfin.py | 1 - yt_dlp/extractor/roosterteeth.py | 1 - yt_dlp/extractor/rte.py | 2 - yt_dlp/extractor/rtl2.py | 3 -- yt_dlp/extractor/rtlnl.py | 2 - yt_dlp/extractor/rts.py | 1 - yt_dlp/extractor/rtve.py | 2 - yt_dlp/extractor/rtvnh.py | 1 - yt_dlp/extractor/rtvs.py | 1 - yt_dlp/extractor/rtvslo.py | 1 - yt_dlp/extractor/rule34video.py | 2 - yt_dlp/extractor/rumble.py | 1 - yt_dlp/extractor/rutube.py | 1 - yt_dlp/extractor/rutv.py | 3 +- yt_dlp/extractor/ruutu.py | 2 - yt_dlp/extractor/sapo.py | 2 - yt_dlp/extractor/screen9.py | 1 - yt_dlp/extractor/scrolller.py | 2 - yt_dlp/extractor/senategov.py | 3 -- yt_dlp/extractor/sendtonews.py | 6 +-- yt_dlp/extractor/servus.py | 1 - yt_dlp/extractor/sexu.py | 1 - yt_dlp/extractor/seznamzpravy.py | 1 - yt_dlp/extractor/shahid.py | 1 - yt_dlp/extractor/shemaroome.py | 1 - yt_dlp/extractor/showroomlive.py | 1 - yt_dlp/extractor/sina.py | 1 - yt_dlp/extractor/sixplay.py | 1 - yt_dlp/extractor/skyit.py | 1 - yt_dlp/extractor/slideslive.py | 1 - yt_dlp/extractor/sohu.py | 1 - yt_dlp/extractor/sonyliv.py | 1 - yt_dlp/extractor/soundcloud.py | 1 - yt_dlp/extractor/sovietscloset.py | 1 - yt_dlp/extractor/spankbang.py | 2 - yt_dlp/extractor/spankwire.py | 1 - yt_dlp/extractor/sport5.py | 1 - yt_dlp/extractor/sportbox.py | 1 - yt_dlp/extractor/springboardplatform.py | 2 - yt_dlp/extractor/srgssr.py | 1 - yt_dlp/extractor/startrek.py | 1 - yt_dlp/extractor/steam.py | 2 - yt_dlp/extractor/streamable.py | 1 - yt_dlp/extractor/streamanity.py | 1 - yt_dlp/extractor/streamcz.py | 1 - yt_dlp/extractor/stripchat.py | 2 - yt_dlp/extractor/substack.py | 1 - yt_dlp/extractor/sunporno.py | 1 - yt_dlp/extractor/sverigesradio.py | 1 - yt_dlp/extractor/svt.py | 1 - yt_dlp/extractor/swrmediathek.py | 1 - yt_dlp/extractor/tagesschau.py | 2 - yt_dlp/extractor/tass.py | 1 - yt_dlp/extractor/teachertube.py | 2 - yt_dlp/extractor/teamcoco.py | 1 - yt_dlp/extractor/ted.py | 2 - yt_dlp/extractor/tele13.py | 1 - yt_dlp/extractor/telebruxelles.py | 1 - yt_dlp/extractor/telecinco.py | 1 - yt_dlp/extractor/telegraaf.py | 2 - yt_dlp/extractor/telegram.py | 1 - yt_dlp/extractor/telemb.py | 1 - yt_dlp/extractor/telemundo.py | 1 - yt_dlp/extractor/tencent.py | 1 - yt_dlp/extractor/tennistv.py | 2 - yt_dlp/extractor/tenplay.py | 1 - yt_dlp/extractor/theholetv.py | 1 - yt_dlp/extractor/theplatform.py | 3 -- yt_dlp/extractor/theta.py | 2 - yt_dlp/extractor/theweatherchannel.py | 1 - yt_dlp/extractor/threeqsdn.py | 8 ++-- yt_dlp/extractor/threespeak.py | 1 - yt_dlp/extractor/tiktok.py | 5 +-- yt_dlp/extractor/tnaflix.py | 1 - yt_dlp/extractor/toggle.py | 1 - yt_dlp/extractor/tokentube.py | 2 - yt_dlp/extractor/triller.py | 1 - yt_dlp/extractor/trovo.py | 2 - yt_dlp/extractor/tubetugraz.py | 1 - yt_dlp/extractor/tubitv.py | 2 - yt_dlp/extractor/tumblr.py | 1 - yt_dlp/extractor/tunein.py | 1 - yt_dlp/extractor/tunepk.py | 1 - yt_dlp/extractor/turbo.py | 1 - yt_dlp/extractor/turner.py | 2 - yt_dlp/extractor/tv2.py | 2 - yt_dlp/extractor/tv24ua.py | 1 - yt_dlp/extractor/tv2dk.py | 1 - yt_dlp/extractor/tv2hu.py | 1 - yt_dlp/extractor/tv4.py | 2 - yt_dlp/extractor/tv5mondeplus.py | 1 - yt_dlp/extractor/tvc.py | 1 - yt_dlp/extractor/tvigle.py | 1 - yt_dlp/extractor/tvn24.py | 1 - yt_dlp/extractor/tvnet.py | 1 - yt_dlp/extractor/tvnow.py | 2 - yt_dlp/extractor/tvopengr.py | 1 - yt_dlp/extractor/tvp.py | 2 - yt_dlp/extractor/tvplay.py | 4 -- yt_dlp/extractor/tvplayer.py | 1 - yt_dlp/extractor/tweakers.py | 1 - yt_dlp/extractor/twentymin.py | 1 - yt_dlp/extractor/twitcasting.py | 6 +-- yt_dlp/extractor/twitch.py | 2 - yt_dlp/extractor/twitter.py | 4 +- yt_dlp/extractor/udemy.py | 2 - yt_dlp/extractor/udn.py | 2 - yt_dlp/extractor/umg.py | 1 - yt_dlp/extractor/unistra.py | 1 - yt_dlp/extractor/uol.py | 1 - yt_dlp/extractor/uplynk.py | 1 - yt_dlp/extractor/urort.py | 1 - yt_dlp/extractor/urplay.py | 1 - yt_dlp/extractor/ustream.py | 2 - yt_dlp/extractor/ustudio.py | 2 - yt_dlp/extractor/utreon.py | 1 - yt_dlp/extractor/veo.py | 2 - yt_dlp/extractor/veoh.py | 1 - yt_dlp/extractor/vevo.py | 1 - yt_dlp/extractor/vgtv.py | 2 - yt_dlp/extractor/vice.py | 1 - yt_dlp/extractor/viddler.py | 1 - yt_dlp/extractor/videa.py | 1 - yt_dlp/extractor/videocampus_sachsen.py | 1 - yt_dlp/extractor/videomore.py | 1 - yt_dlp/extractor/videopress.py | 1 - yt_dlp/extractor/vidio.py | 3 -- yt_dlp/extractor/vidlii.py | 1 - yt_dlp/extractor/viewlift.py | 1 - yt_dlp/extractor/viidea.py | 1 - yt_dlp/extractor/viki.py | 1 - yt_dlp/extractor/vimeo.py | 17 +++---- yt_dlp/extractor/vimm.py | 2 - yt_dlp/extractor/vimple.py | 1 - yt_dlp/extractor/vine.py | 1 - yt_dlp/extractor/viqeo.py | 1 - yt_dlp/extractor/viu.py | 2 - yt_dlp/extractor/vk.py | 1 - yt_dlp/extractor/vlive.py | 2 - yt_dlp/extractor/vodplatform.py | 1 - yt_dlp/extractor/voicerepublic.py | 1 - yt_dlp/extractor/voicy.py | 1 - yt_dlp/extractor/voot.py | 1 - yt_dlp/extractor/voxmedia.py | 2 - yt_dlp/extractor/vrv.py | 1 - yt_dlp/extractor/vshare.py | 2 - yt_dlp/extractor/vvvvid.py | 1 - yt_dlp/extractor/vzaar.py | 2 - yt_dlp/extractor/walla.py | 1 - yt_dlp/extractor/wasdtv.py | 2 - yt_dlp/extractor/wat.py | 2 - yt_dlp/extractor/watchbox.py | 1 - yt_dlp/extractor/wdr.py | 2 - yt_dlp/extractor/webcaster.py | 1 - yt_dlp/extractor/webofstories.py | 2 - yt_dlp/extractor/weibo.py | 2 - yt_dlp/extractor/whowatch.py | 1 - yt_dlp/extractor/willow.py | 1 - yt_dlp/extractor/wimtv.py | 1 - yt_dlp/extractor/wistia.py | 2 - yt_dlp/extractor/wppilot.py | 2 - yt_dlp/extractor/wsj.py | 1 - yt_dlp/extractor/xfileshare.py | 1 - yt_dlp/extractor/xhamster.py | 3 -- yt_dlp/extractor/xinpianchang.py | 2 - yt_dlp/extractor/xnxx.py | 1 - yt_dlp/extractor/xstream.py | 1 - yt_dlp/extractor/xtube.py | 1 - yt_dlp/extractor/xuite.py | 1 - yt_dlp/extractor/xvideos.py | 2 - yt_dlp/extractor/yahoo.py | 3 -- yt_dlp/extractor/yandexdisk.py | 1 - yt_dlp/extractor/yandexvideo.py | 3 -- yt_dlp/extractor/yapfiles.py | 1 - yt_dlp/extractor/yinyuetai.py | 1 - yt_dlp/extractor/ynet.py | 1 - yt_dlp/extractor/youku.py | 1 - yt_dlp/extractor/youporn.py | 1 - yt_dlp/extractor/youtube.py | 6 +-- yt_dlp/extractor/zapiks.py | 1 - yt_dlp/extractor/zattoo.py | 1 - yt_dlp/extractor/zdf.py | 3 +- yt_dlp/extractor/zee5.py | 1 - yt_dlp/extractor/zeenews.py | 1 - yt_dlp/extractor/zhihu.py | 1 - yt_dlp/extractor/zingmp3.py | 1 - yt_dlp/extractor/zoom.py | 2 - yt_dlp/extractor/zype.py | 1 - 579 files changed, 69 insertions(+), 918 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 016a2ac7f..683ead315 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -41,7 +41,9 @@ class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler): class DummyIE(InfoExtractor): - pass + def _sort_formats(self, formats, field_preference=[]): + self._downloader.sort_formats( + {'formats': formats, '_format_sort_fields': field_preference}) class TestInfoExtractor(unittest.TestCase): diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 60e457108..8da1e5e4b 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -68,8 +68,7 @@ class TestFormatSelection(unittest.TestCase): {'ext': 'mp4', 'height': 460, 'url': TEST_URL}, ] info_dict = _make_result(formats) - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'webm') @@ -82,8 +81,7 @@ class TestFormatSelection(unittest.TestCase): {'ext': 'mp4', 'height': 1080, 'url': TEST_URL}, ] info_dict['formats'] = formats - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'mp4') @@ -97,8 +95,7 @@ class TestFormatSelection(unittest.TestCase): {'ext': 'flv', 'height': 720, 'url': TEST_URL}, ] info_dict['formats'] = formats - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'mp4') @@ -110,15 +107,14 @@ class TestFormatSelection(unittest.TestCase): {'ext': 'webm', 'height': 720, 'url': TEST_URL}, ] info_dict['formats'] = formats - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'webm') def test_format_selection(self): formats = [ - {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, + {'format_id': '35', 'ext': 'mp4', 'preference': 0, 'url': TEST_URL}, {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL}, {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL}, {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL}, @@ -186,22 +182,19 @@ class TestFormatSelection(unittest.TestCase): info_dict = _make_result(formats) ydl = YDL({'format': 'best'}) - ie = YoutubeIE(ydl) - ie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'aac-64') ydl = YDL({'format': 'mp3'}) - ie = YoutubeIE(ydl) - ie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'mp3-64') ydl = YDL({'prefer_free_formats': True}) - ie = YoutubeIE(ydl) - ie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'ogg-64') @@ -346,8 +339,7 @@ class TestFormatSelection(unittest.TestCase): info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': 'bestvideo+bestaudio'}) - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '248+172') @@ -355,40 +347,35 @@ class TestFormatSelection(unittest.TestCase): info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'}) - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '38') info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': 'bestvideo/best,bestaudio'}) - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['137', '141']) info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'}) - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['137+141', '248+141']) info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'}) - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['136+141', '247+141']) info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'}) - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['248+141']) @@ -396,16 +383,14 @@ class TestFormatSelection(unittest.TestCase): for f1, f2 in zip(formats_order, formats_order[1:]): info_dict = _make_result([f1, f2], extractor='youtube') ydl = YDL({'format': 'best/bestvideo'}) - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1['format_id']) info_dict = _make_result([f2, f1], extractor='youtube') ydl = YDL({'format': 'best/bestvideo'}) - yie = YoutubeIE(ydl) - yie._sort_formats(info_dict['formats']) + ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1['format_id']) @@ -480,7 +465,7 @@ class TestFormatSelection(unittest.TestCase): for f in formats: f['url'] = 'http://_/' f['ext'] = 'unknown' - info_dict = _make_result(formats) + info_dict = _make_result(formats, _format_sort_fields=('id', )) ydl = YDL({'format': 'best[filesize<3000]'}) ydl.process_ie_result(info_dict) diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 03f10ab23..0ca76b85a 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -155,8 +155,6 @@ class ABCIE(InfoExtractor): 'format_id': format_id }) - self._sort_formats(formats) - return { 'id': video_id, 'title': self._og_search_title(webpage), @@ -221,7 +219,6 @@ class ABCIViewIE(InfoExtractor): entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if formats: break - self._sort_formats(formats) subtitles = {} src_vtt = stream.get('captions', {}).get('src-vtt') diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 44a9f8ca5..6dca19de4 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -78,7 +78,6 @@ class ABCOTVSIE(InfoExtractor): 'url': mp4_url, 'width': 640, }) - self._sort_formats(formats) image = video.get('image') or {} @@ -119,7 +118,6 @@ class ABCOTVSClipsIE(InfoExtractor): title = video_data['title'] formats = self._extract_m3u8_formats( video_data['videoURL'].split('?')[0], video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py index 9ec259a75..dc5792944 100644 --- a/yt_dlp/extractor/acfun.py +++ b/yt_dlp/extractor/acfun.py @@ -27,7 +27,6 @@ class AcFunVideoBaseIE(InfoExtractor): **parse_codecs(video.get('codecs', '')) }) - self._sort_formats(formats) return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 16f648de3..e0c18c877 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -235,7 +235,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' for f in m3u8_formats: f['language'] = 'fr' formats.extend(m3u8_formats) - self._sort_formats(formats) video = (self._download_json( self._API_BASE_URL + 'video/%s' % video_id, video_id, diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index d8e07b3a1..d1525a1af 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -70,7 +70,6 @@ class AdobeTVBaseIE(InfoExtractor): }) s3_extracted = True formats.append(f) - self._sort_formats(formats) return { 'id': video_id, @@ -269,7 +268,6 @@ class AdobeTVVideoIE(AdobeTVBaseIE): 'width': int_or_none(source.get('width') or None), 'url': source_src, }) - self._sort_formats(formats) # For both metadata and downloaded files the duration varies among # formats. I just pick the max one diff --git a/yt_dlp/extractor/adultswim.py b/yt_dlp/extractor/adultswim.py index 1368954bc..bd29eb43e 100644 --- a/yt_dlp/extractor/adultswim.py +++ b/yt_dlp/extractor/adultswim.py @@ -180,7 +180,6 @@ class AdultSwimIE(TurnerBaseIE): info['subtitles'].setdefault('en', []).append({ 'url': asset_url, }) - self._sort_formats(info['formats']) return info else: diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index 094c57bf9..d7c401016 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -62,7 +62,6 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE subtitles = self._merge_subtitles(subtitles, tp_subtitles) if last_e and not formats: raise last_e - self._sort_formats(formats) return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index bfcc08030..9276fe799 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -338,7 +338,6 @@ class AfreecaTVIE(InfoExtractor): }] if not formats and not self.get_param('ignore_no_formats'): continue - self._sort_formats(formats) file_info = common_entry.copy() file_info.update({ 'id': format_id, @@ -464,8 +463,6 @@ class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE 'quality': quality_key(quality_str), }) - self._sort_formats(formats) - station_info = self._download_json( 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no, query={'szBjId': broadcaster_id}, fatal=False, diff --git a/yt_dlp/extractor/agora.py b/yt_dlp/extractor/agora.py index 714414bd4..abb2d3ff2 100644 --- a/yt_dlp/extractor/agora.py +++ b/yt_dlp/extractor/agora.py @@ -55,7 +55,6 @@ class WyborczaVideoIE(InfoExtractor): if meta['files'].get('dash'): formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id)) - self._sort_formats(formats) return { 'id': video_id, 'formats': formats, @@ -179,7 +178,6 @@ class TokFMPodcastIE(InfoExtractor): 'acodec': ext, }) - self._sort_formats(formats) return { 'id': media_id, 'formats': formats, diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 1f881e2a0..2d342cf03 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -112,8 +112,6 @@ class AllocineIE(InfoExtractor): }) duration, view_count, timestamp = [None] * 3 - self._sort_formats(formats) - return { 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index d16ab496e..ea3332e3d 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -22,7 +22,6 @@ class Alsace20TVBaseIE(InfoExtractor): self._extract_smil_formats(fmt_url, video_id, fatal=False) if '/smil:_' in fmt_url else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) - self._sort_formats(formats) webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index ae7115f9f..bfe066bc6 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -63,8 +63,6 @@ class AluraIE(InfoExtractor): f['height'] = int('720' if m.group('res') == 'hd' else '480') formats.extend(video_format) - self._sort_formats(formats) - return { 'id': video_id, 'title': video_title, diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index 9369a66f7..c58bc7bfb 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -106,7 +106,6 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE media_url = update_url_query(media_url, query) formats, subtitles = self._extract_theplatform_smil( media_url, video_id) - self._sort_formats(formats) thumbnails = [] thumbnail_urls = [properties.get('imageDesktop')] diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 6015baad5..b0cbd775c 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -84,8 +84,6 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with 'ext': ext, }) - self._sort_formats(formats) - timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date')) return { diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py index fac476e21..7b384b22d 100644 --- a/yt_dlp/extractor/ant1newsgr.py +++ b/yt_dlp/extractor/ant1newsgr.py @@ -19,7 +19,6 @@ class Ant1NewsGrBaseIE(InfoExtractor): raise ExtractorError('no source found for %s' % video_id) formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4') if determine_ext(source) == 'm3u8' else ([{'url': source}], {})) - self._sort_formats(formats) thumbnails = scale_thumbnails_to_max_format_width( formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') return { diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 0d7575a1f..79bfe412b 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -354,8 +354,6 @@ class AnvatoIE(InfoExtractor): }) formats.append(a_format) - self._sort_formats(formats) - subtitles = {} for caption in video_data.get('captions', []): a_caption = { diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 5200f9d9d..6949ca974 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -119,7 +119,6 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'height': int_or_none(qs.get('h', [None])[0]), }) formats.append(f) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index c9147e855..1ea0b1de4 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -72,7 +72,6 @@ class APAIE(InfoExtractor): 'format_id': format_id, 'height': height, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py index 90464556d..4a989d837 100644 --- a/yt_dlp/extractor/aparat.py +++ b/yt_dlp/extractor/aparat.py @@ -73,7 +73,6 @@ class AparatIE(InfoExtractor): r'(\d+)[pP]', label or '', 'height', default=None)), }) - self._sort_formats(formats) info = self._search_json_ld(webpage, video_id, default={}) diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 6b63f070d..2e0b0a8c9 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -120,7 +120,6 @@ class AppleTrailersIE(InfoExtractor): 'height': int_or_none(size_data.get('height')), 'language': version[:2], }) - self._sort_formats(formats) entries.append({ 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), @@ -185,8 +184,6 @@ class AppleTrailersIE(InfoExtractor): 'height': int_or_none(format['height']), }) - self._sort_formats(formats) - playlist.append({ '_type': 'video', 'id': video_id, diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 4218f52d6..90dda9f53 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -312,7 +312,7 @@ class ArchiveOrgIE(InfoExtractor): }) for entry in entries.values(): - self._sort_formats(entry['formats'], ('source', )) + entry['_format_sort_fields'] = ('source', ) if len(entries) == 1: # If there's only one item, use it as the main info dict diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index de9ccc538..febd3d28a 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -144,7 +144,6 @@ class ArcPublishingIE(InfoExtractor): 'url': s_url, 'quality': -10, }) - self._sort_formats(formats) subtitles = {} for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []): diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index f294679ef..0a8a8746a 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -40,8 +40,6 @@ class ARDMediathekBaseIE(InfoExtractor): 'This video is not available due to geoblocking', countries=self._GEO_COUNTRIES, metadata_available=True) - self._sort_formats(formats) - subtitles = {} subtitle_url = media_info.get('_subtitleUrl') if subtitle_url: @@ -262,7 +260,6 @@ class ARDMediathekIE(ARDMediathekBaseIE): 'format_id': fid, 'url': furl, }) - self._sort_formats(formats) info = { 'formats': formats, } @@ -371,7 +368,6 @@ class ARDIE(InfoExtractor): continue f['url'] = format_url formats.append(f) - self._sort_formats(formats) _SUB_FORMATS = ( ('./dataTimedText', 'ttml'), diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index 9a0273e2c..de36ec886 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -136,7 +136,6 @@ class ArkenaIE(InfoExtractor): elif mime_type == 'application/vnd.ms-sstr+xml': formats.extend(self._extract_ism_formats( href, video_id, ism_id='mss', fatal=False)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index c80ce2233..a493714d1 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -73,7 +73,6 @@ class ArnesIE(InfoExtractor): 'width': int_or_none(media.get('width')), 'height': int_or_none(media.get('height')), }) - self._sort_formats(formats) channel = video.get('channel') or {} channel_id = channel.get('url') diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index b60fa0233..54e4d2d0c 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -186,7 +186,6 @@ class ArteTVIE(ArteTVBaseIE): formats.extend(secondary_formats) self._remove_duplicate_formats(formats) - self._sort_formats(formats) metadata = config['data']['attributes']['metadata'] diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 39d1f1cc5..a20e7f988 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -84,7 +84,6 @@ class AtresPlayerIE(InfoExtractor): elif src_type == 'application/dash+xml': formats, subtitles = self._extract_mpd_formats( src, video_id, mpd_id='dash', fatal=False) - self._sort_formats(formats) heartbeat = episode.get('heartbeat') or {} omniture = episode.get('omniture') or {} diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 2311837e9..d6ed9e495 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -49,7 +49,6 @@ class ATVAtIE(InfoExtractor): 'url': source_url, 'format_id': protocol, }) - self._sort_formats(formats) return { 'id': clip_id, diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index c1c4f67d0..35114e545 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -76,7 +76,6 @@ class AudiMediaIE(InfoExtractor): 'format_id': 'http-%s' % bitrate, }) formats.append(f) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index 92f567c5d..c87342565 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -80,8 +80,6 @@ class BanByeIE(BanByeBaseIE): 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4', } for quality in data['quality']] - self._sort_formats(formats) - return { 'id': video_id, 'title': data.get('title'), diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 7dcace2c6..de81e0de7 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -184,8 +184,6 @@ class BandcampIE(InfoExtractor): 'acodec': format_id.split('-')[0], }) - self._sort_formats(formats) - title = '%s - %s' % (artist, track) if artist else track if not duration: @@ -363,7 +361,6 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE 'ext': ext, 'vcodec': 'none', }) - self._sort_formats(formats) title = show.get('audio_title') or 'Bandcamp Weekly' subtitle = show.get('subtitle') diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index ec9bdd8ca..51e722057 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -135,7 +135,6 @@ query GetCommentReplies($id: String!) { formats.extend(self._extract_m3u8_formats( video_info.get('streamUrl'), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', live=True)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 35a7a165c..9d28e70a3 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -575,8 +575,6 @@ class BBCCoUkIE(InfoExtractor): else: programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) - self._sort_formats(formats) - return { 'id': programme_id, 'title': title, @@ -890,7 +888,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE def _extract_from_playlist_sxml(self, url, playlist_id, timestamp): programme_id, title, description, duration, formats, subtitles = \ self._process_legacy_playlist_url(url, playlist_id) - self._sort_formats(formats) return { 'id': programme_id, 'title': title, @@ -954,7 +951,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE duration = int_or_none(items[0].get('duration')) programme_id = items[0].get('vpid') formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) entries.append({ 'id': programme_id, 'title': title, @@ -991,7 +987,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE continue raise if entry: - self._sort_formats(entry['formats']) entries.append(entry) if entries: @@ -1015,7 +1010,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if programme_id: formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star) digital_data = self._parse_json( self._search_regex( @@ -1047,7 +1041,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if version_id: title = smp_data['title'] formats, subtitles = self._download_media_selector(version_id) - self._sort_formats(formats) image_url = smp_data.get('holdingImageURL') display_date = init_data.get('displayDate') topic_title = init_data.get('topicTitle') @@ -1089,7 +1082,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE continue title = lead_media.get('title') or self._og_search_title(webpage) formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) description = lead_media.get('summary') uploader = lead_media.get('masterBrand') uploader_id = lead_media.get('mid') @@ -1118,7 +1110,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if current_programme and programme_id and current_programme.get('type') == 'playable_item': title = current_programme.get('titles', {}).get('tertiary') or playlist_title formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) synopses = current_programme.get('synopses') or {} network = current_programme.get('network') or {} duration = int_or_none( @@ -1151,7 +1142,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE clip_title = clip.get('title') if clip_vpid and clip_title: formats, subtitles = self._download_media_selector(clip_vpid) - self._sort_formats(formats) return { 'id': clip_vpid, 'title': clip_title, @@ -1173,7 +1163,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if not programme_id: continue formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) entries.append({ 'id': programme_id, 'title': playlist_title, @@ -1205,7 +1194,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if not (item_id and item_title): continue formats, subtitles = self._download_media_selector(item_id) - self._sort_formats(formats) item_desc = None blocks = try_get(media, lambda x: x['summary']['blocks'], list) if blocks: @@ -1306,7 +1294,6 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id) if not formats and not self.get_param('ignore_no_formats'): continue - self._sort_formats(formats) video_id = media_meta.get('externalId') if not video_id: diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index f71f1f308..0aecbd089 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -74,7 +74,6 @@ class BeatportIE(InfoExtractor): fmt['abr'] = 96 fmt['asr'] = 44100 formats.append(fmt) - self._sort_formats(formats) images = [] for name, info in track['images'].items(): diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index 5957e370a..52ee68eca 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -76,8 +76,6 @@ class BeegIE(InfoExtractor): f['height'] = height formats.extend(current_formats) - self._sort_formats(formats) - return { 'id': video_id, 'display_id': first_fact.get('id'), diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 6b2797ca0..02d1ba0e3 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -63,8 +63,6 @@ class BigflixIE(InfoExtractor): 'url': decode_url(file_url), }) - self._sort_formats(formats) - description = self._html_search_meta('description', webpage) return { diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 8a0e10da8..bc0424194 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -67,7 +67,6 @@ class BilibiliBaseIE(InfoExtractor): self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; ' f'you have to login or become premium member to download them. {self._login_hint()}') - self._sort_formats(formats) return formats def json2srt(self, json_data): @@ -879,7 +878,6 @@ class BiliIntlBaseIE(InfoExtractor): 'filesize': aud.get('size'), }) - self._sort_formats(formats) return formats def _extract_video_info(self, video_data, *, ep_id=None, aid=None): @@ -1105,7 +1103,6 @@ class BiliLiveIE(InfoExtractor): }) for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []: formats.extend(self._parse_formats(qn, fmt)) - self._sort_formats(formats) return { 'id': room_id, diff --git a/yt_dlp/extractor/biqle.py b/yt_dlp/extractor/biqle.py index 3a4234491..027753503 100644 --- a/yt_dlp/extractor/biqle.py +++ b/yt_dlp/extractor/biqle.py @@ -86,7 +86,6 @@ class BIQLEIE(InfoExtractor): 'height': int_or_none(height), 'ext': ext, }) - self._sort_formats(formats) thumbnails = [] for k, v in item.items(): diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index 9e3d6337a..10e7b0b2b 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -117,7 +117,6 @@ class BitChuteIE(InfoExtractor): self.raise_no_formats( 'Video is unavailable. Please make sure this video is playable in the browser ' 'before reporting this issue.', expected=True, video_id=video_id) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/bitwave.py b/yt_dlp/extractor/bitwave.py index bd8eac1f1..a82cd263a 100644 --- a/yt_dlp/extractor/bitwave.py +++ b/yt_dlp/extractor/bitwave.py @@ -45,7 +45,6 @@ class BitwaveStreamIE(InfoExtractor): formats = self._extract_m3u8_formats( channel['data']['url'], username, 'mp4') - self._sort_formats(formats) return { 'id': username, diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index c842c342c..792155e51 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -67,7 +67,6 @@ class BloombergIE(InfoExtractor): else: formats.extend(self._extract_f4m_formats( stream_url, video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py index 0c081750e..ca326f25f 100644 --- a/yt_dlp/extractor/bokecc.py +++ b/yt_dlp/extractor/bokecc.py @@ -21,8 +21,6 @@ class BokeCCBaseIE(InfoExtractor): 'quality': int(quality.attrib['value']), } for quality in info_xml.findall('./video/quality')] - self._sort_formats(formats) - return formats diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index 9ba166b04..bf955668d 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -57,7 +57,6 @@ class BongaCamsIE(InfoExtractor): formats = self._extract_m3u8_formats( '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id), channel_id, 'mp4', m3u8_id='hls', live=True) - self._sort_formats(formats) return { 'id': channel_id, diff --git a/yt_dlp/extractor/booyah.py b/yt_dlp/extractor/booyah.py index 8c94714be..5c55f2c76 100644 --- a/yt_dlp/extractor/booyah.py +++ b/yt_dlp/extractor/booyah.py @@ -67,7 +67,6 @@ class BooyahClipsIE(BooyahBaseIE): 'height': video_data.get('resolution'), 'preference': -10, })) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 5842de88a..8ab149626 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -79,8 +79,6 @@ class BoxIE(InfoExtractor): 'url': update_url_query(authenticated_download_url, query), }) - self._sort_formats(formats) - creator = f.get('created_by') or {} return { diff --git a/yt_dlp/extractor/bpb.py b/yt_dlp/extractor/bpb.py index 388f1f94f..f28e581b8 100644 --- a/yt_dlp/extractor/bpb.py +++ b/yt_dlp/extractor/bpb.py @@ -48,8 +48,6 @@ class BpbIE(InfoExtractor): 'format_id': '%s-%s' % (quality, determine_ext(video_url)), }) - self._sort_formats(formats) - return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index faac442e8..309452d23 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -157,7 +157,6 @@ class BRIE(InfoExtractor): 'format_id': 'rtmp-%s' % asset_type, }) formats.append(rtmp_format_info) - self._sort_formats(formats) return formats def _extract_thumbnails(self, variants, base_url): @@ -272,7 +271,6 @@ class BRMediathekIE(InfoExtractor): 'tbr': tbr, 'filesize': int_or_none(node.get('fileSize')), }) - self._sort_formats(formats) subtitles = {} for edge in clip.get('captionFiles', {}).get('edges', []): diff --git a/yt_dlp/extractor/breakcom.py b/yt_dlp/extractor/breakcom.py index 51c8c822f..00cf308c7 100644 --- a/yt_dlp/extractor/breakcom.py +++ b/yt_dlp/extractor/breakcom.py @@ -63,7 +63,6 @@ class BreakIE(InfoExtractor): 'format_id': 'http-%d' % bitrate if bitrate else 'http', 'tbr': bitrate, }) - self._sort_formats(formats) title = self._search_regex( (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index ca5757374..ea0a59c86 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -24,7 +24,6 @@ class BreitBartIE(InfoExtractor): webpage = self._download_webpage(url, video_id) formats = self._extract_m3u8_formats(f'https://cdn.jwplayer.com/manifests/{video_id}.m3u8', video_id, ext='mp4') - self._sort_formats(formats) return { 'id': video_id, 'title': self._generic_title('', webpage), diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 99a216fb4..35e1aa9c9 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -546,8 +546,6 @@ class BrightcoveNewIE(AdobePassIE): self.raise_no_formats( error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) - self._sort_formats(formats) - for f in formats: f.setdefault('http_headers', {}).update(headers) diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py index eca2e294e..9ed6efe79 100644 --- a/yt_dlp/extractor/byutv.py +++ b/yt_dlp/extractor/byutv.py @@ -108,7 +108,6 @@ class BYUtvIE(InfoExtractor): 'thumbnail': ep.get('imageThumbnail'), 'duration': parse_duration(ep.get('length')), }) - self._sort_formats(formats) return merge_dicts(info, { 'id': video_id, diff --git a/yt_dlp/extractor/c56.py b/yt_dlp/extractor/c56.py index 1d98ea598..e4b1c9a84 100644 --- a/yt_dlp/extractor/c56.py +++ b/yt_dlp/extractor/c56.py @@ -49,7 +49,6 @@ class C56IE(InfoExtractor): 'url': f['url'] } for f in info['rfiles'] ] - self._sort_formats(formats) return { 'id': info['vid'], diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py index 3200b5677..2e374e5eb 100644 --- a/yt_dlp/extractor/cableav.py +++ b/yt_dlp/extractor/cableav.py @@ -22,7 +22,6 @@ class CableAVIE(InfoExtractor): video_url = self._og_search_video_url(webpage, secure=False) formats = self._extract_m3u8_formats(video_url, video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index 6c8129f06..e9668763e 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -54,7 +54,6 @@ class CallinIE(InfoExtractor): title = episode.get('title') or self._generic_title('', webpage) url = episode['m3u8'] formats = self._extract_m3u8_formats(url, display_id, ext='ts') - self._sort_formats(formats) show = traverse_obj(episode, ('show', 'title')) show_id = traverse_obj(episode, ('show', 'id')) diff --git a/yt_dlp/extractor/caltrans.py b/yt_dlp/extractor/caltrans.py index e52dfb170..f4a4a834b 100644 --- a/yt_dlp/extractor/caltrans.py +++ b/yt_dlp/extractor/caltrans.py @@ -27,7 +27,6 @@ class CaltransIE(InfoExtractor): video_stream = self._search_regex(r'videoStreamURL\s*=\s*"([^"]+)"', global_vars, 'Video Stream URL', fatal=False) formats = self._extract_m3u8_formats(video_stream, video_id, 'ts', live=True) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py index 4256b28e0..2650cc1ef 100644 --- a/yt_dlp/extractor/cam4.py +++ b/yt_dlp/extractor/cam4.py @@ -20,7 +20,6 @@ class CAM4IE(InfoExtractor): m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL') formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True) - self._sort_formats(formats) return { 'id': channel_id, diff --git a/yt_dlp/extractor/cammodels.py b/yt_dlp/extractor/cammodels.py index 32fbffcc2..0509057fc 100644 --- a/yt_dlp/extractor/cammodels.py +++ b/yt_dlp/extractor/cammodels.py @@ -84,7 +84,6 @@ class CamModelsIE(InfoExtractor): else: continue formats.append(f) - self._sort_formats(formats) return { 'id': user_id, diff --git a/yt_dlp/extractor/camsoda.py b/yt_dlp/extractor/camsoda.py index 1b47b0584..021cd916f 100644 --- a/yt_dlp/extractor/camsoda.py +++ b/yt_dlp/extractor/camsoda.py @@ -47,8 +47,6 @@ class CamsodaIE(InfoExtractor): if not formats: self.raise_no_formats('No active streams found', expected=True) - self._sort_formats(formats) - return { 'id': video_id, 'title': self._html_extract_title(webpage), diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index f2ec9355f..df5ca5818 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -82,7 +82,6 @@ class CanalAlphaIE(InfoExtractor): dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash']) formats.extend(dash_frmts) subtitles = self._merge_subtitles(subtitles, dash_subs) - self._sort_formats(formats) return { 'id': id, 'title': data_json.get('title').strip(), diff --git a/yt_dlp/extractor/canalc2.py b/yt_dlp/extractor/canalc2.py index c9bb94c40..597cb2a6b 100644 --- a/yt_dlp/extractor/canalc2.py +++ b/yt_dlp/extractor/canalc2.py @@ -58,8 +58,6 @@ class Canalc2IE(InfoExtractor): else: info = self._parse_html5_media_entries(url, webpage, url)[0] - self._sort_formats(info['formats']) - info.update({ 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/canalplus.py b/yt_dlp/extractor/canalplus.py index b184398e2..b7e2f9dd4 100644 --- a/yt_dlp/extractor/canalplus.py +++ b/yt_dlp/extractor/canalplus.py @@ -86,7 +86,6 @@ class CanalplusIE(InfoExtractor): 'format_id': format_id, 'quality': preference(format_id), }) - self._sort_formats(formats) thumbnails = [{ 'id': image_id, diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py index 8eff4a57c..ae6e03a4d 100644 --- a/yt_dlp/extractor/canvas.py +++ b/yt_dlp/extractor/canvas.py @@ -118,7 +118,6 @@ class CanvasIE(InfoExtractor): 'format_id': format_type, 'url': format_url, }) - self._sort_formats(formats) subtitle_urls = data.get('subtitleUrls') if isinstance(subtitle_urls, list): diff --git a/yt_dlp/extractor/carambatv.py b/yt_dlp/extractor/carambatv.py index 087ea8aa0..d6044a319 100644 --- a/yt_dlp/extractor/carambatv.py +++ b/yt_dlp/extractor/carambatv.py @@ -43,7 +43,6 @@ class CarambaTVIE(InfoExtractor): 'height': int_or_none(f.get('height')), 'format_id': format_field(f, 'height', '%sp'), } for f in video['qualities'] if f.get('fn')] - self._sort_formats(formats) thumbnail = video.get('splash') duration = float_or_none(try_get( diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 999b7bc53..210f5f8ee 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -380,8 +380,6 @@ class CBCGemIE(InfoExtractor): if 'descriptive' in format['format_id'].lower(): format['preference'] = -2 - self._sort_formats(formats) - return { 'id': video_id, 'title': video_info['title'], diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index 9515806ed..9aacd50c4 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -52,7 +52,6 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE subtitles = self._merge_subtitles(subtitles, tp_subtitles) if last_e and not formats: self.raise_no_formats(last_e, True, content_id) - self._sort_formats(formats) extra_info.update({ 'id': content_id, diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py index 98ec28df0..16edf3af8 100644 --- a/yt_dlp/extractor/cbsnews.py +++ b/yt_dlp/extractor/cbsnews.py @@ -132,7 +132,6 @@ class CBSNewsLiveVideoIE(InfoExtractor): }) formats = self._extract_akamai_formats(video_info['url'], display_id) - self._sort_formats(formats) return { 'id': display_id, diff --git a/yt_dlp/extractor/cbssports.py b/yt_dlp/extractor/cbssports.py index 56a255149..b5d85af12 100644 --- a/yt_dlp/extractor/cbssports.py +++ b/yt_dlp/extractor/cbssports.py @@ -40,7 +40,6 @@ class CBSSportsEmbedIE(InfoExtractor): formats = self._extract_m3u8_formats( metadata['files'][0]['url'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - self._sort_formats(formats) image = video.get('image') thumbnails = None diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py index 1bc0f07f2..22e3a22ec 100644 --- a/yt_dlp/extractor/ccc.py +++ b/yt_dlp/extractor/ccc.py @@ -64,7 +64,6 @@ class CCCIE(InfoExtractor): 'language': language, 'vcodec': vcodec, }) - self._sort_formats(formats) return { 'id': event_id, diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index ca739f8a1..88ff82f6e 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -81,7 +81,6 @@ class CCMAIE(InfoExtractor): 'url': media_url, 'vcodec': 'none' if media_type == 'audio' else None, }) - self._sort_formats(formats) informacio = media['informacio'] title = informacio['titol'] diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py index 623cbb342..466bdfb7c 100644 --- a/yt_dlp/extractor/cctv.py +++ b/yt_dlp/extractor/cctv.py @@ -170,8 +170,6 @@ class CCTVIE(InfoExtractor): hls_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) - uploader = data.get('editer_name') description = self._html_search_meta( 'description', webpage, default=None) diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 2a12b054b..d1212e686 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -151,8 +151,6 @@ class CDAIE(InfoExtractor): 'filesize': quality.get('length'), } for quality in meta['qualities'] if quality.get('file')] - self._sort_formats(formats) - return { 'id': video_id, 'title': meta.get('title'), @@ -304,6 +302,4 @@ class CDAIE(InfoExtractor): extract_format(webpage, resolution) - self._sort_formats(formats) - return merge_dicts(info_dict, info) diff --git a/yt_dlp/extractor/cellebrite.py b/yt_dlp/extractor/cellebrite.py index 64a30d7e3..9896a31af 100644 --- a/yt_dlp/extractor/cellebrite.py +++ b/yt_dlp/extractor/cellebrite.py @@ -50,7 +50,6 @@ class CellebriteIE(InfoExtractor): f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0] formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id) - self._sort_formats(formats) return { 'id': str(json_data['videoId']), 'title': json_data.get('name') or self._og_search_title(webpage), diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 5f4c447f2..be2b0bb43 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -249,9 +249,6 @@ class CeskaTelevizeIE(InfoExtractor): 'is_live': is_live, }) - for e in entries: - self._sort_formats(e['formats']) - if len(entries) == 1: return entries[0] return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) diff --git a/yt_dlp/extractor/channel9.py b/yt_dlp/extractor/channel9.py index d0390d937..a88474060 100644 --- a/yt_dlp/extractor/channel9.py +++ b/yt_dlp/extractor/channel9.py @@ -185,7 +185,6 @@ class Channel9IE(InfoExtractor): if not formats and not slides and not zip_file: self.raise_no_formats( 'None of recording, slides or zip are available for %s' % content_path) - self._sort_formats(formats) subtitles = {} for caption in content_data.get('Captions', []): diff --git a/yt_dlp/extractor/charlierose.py b/yt_dlp/extractor/charlierose.py index 27f8b33e5..8fe6797c6 100644 --- a/yt_dlp/extractor/charlierose.py +++ b/yt_dlp/extractor/charlierose.py @@ -38,8 +38,6 @@ class CharlieRoseIE(InfoExtractor): info_dict = self._parse_html5_media_entries( self._PLAYER_BASE % video_id, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0] - - self._sort_formats(info_dict['formats']) self._remove_duplicate_formats(info_dict['formats']) info_dict.update({ diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index d39210bf7..99dfcfdeb 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -95,7 +95,6 @@ class ChaturbateIE(InfoExtractor): # ffmpeg skips segments for fast m3u8 preference=-10 if m3u8_id == 'fast' else None, m3u8_id=m3u8_id, fatal=False, live=True)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py index e54d92a86..48091dd65 100644 --- a/yt_dlp/extractor/chingari.py +++ b/yt_dlp/extractor/chingari.py @@ -32,7 +32,6 @@ class ChingariBaseIE(InfoExtractor): 'url': base_url + '/apipublic' + media_data['path'], 'quality': 10, }) - self._sort_formats(formats) timestamp = str_to_int(post_data.get('created_at')) if timestamp: timestamp = int_or_none(timestamp, 1000) diff --git a/yt_dlp/extractor/cinchcast.py b/yt_dlp/extractor/cinchcast.py index ff962aad1..7a7ea8b22 100644 --- a/yt_dlp/extractor/cinchcast.py +++ b/yt_dlp/extractor/cinchcast.py @@ -47,7 +47,6 @@ class CinchcastIE(InfoExtractor): 'format_id': 'backup', 'url': backup_url, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index e1aae9bda..44595d854 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -72,7 +72,6 @@ class CiscoWebexIE(InfoExtractor): 'vcodec': 'none', 'acodec': 'mp3', }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/cliphunter.py b/yt_dlp/extractor/cliphunter.py index 7e5fd3175..2b907dc80 100644 --- a/yt_dlp/extractor/cliphunter.py +++ b/yt_dlp/extractor/cliphunter.py @@ -62,7 +62,6 @@ class CliphunterIE(InfoExtractor): 'height': int_or_none(height), 'tbr': int_or_none(f.get('br')), }) - self._sort_formats(formats) thumbnail = self._search_regex( r"var\s+mov_thumb\s*=\s*'([^']+)';", diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index 8bc0ad883..748e8e908 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -51,7 +51,6 @@ class CloudflareStreamIE(InfoExtractor): 'm3u8_native', m3u8_id='hls', fatal=False) formats.extend(self._extract_mpd_formats( manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/clubic.py b/yt_dlp/extractor/clubic.py index ce8621296..403e44aaf 100644 --- a/yt_dlp/extractor/clubic.py +++ b/yt_dlp/extractor/clubic.py @@ -42,7 +42,6 @@ class ClubicIE(InfoExtractor): 'url': src['src'], 'quality': quality_order(src['streamQuality']), } for src in sources] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/clyp.py b/yt_dlp/extractor/clyp.py index c64726ca2..0aaf73d18 100644 --- a/yt_dlp/extractor/clyp.py +++ b/yt_dlp/extractor/clyp.py @@ -60,7 +60,6 @@ class ClypIE(InfoExtractor): 'format_id': format_id, 'vcodec': 'none', }) - self._sort_formats(formats) title = metadata['Title'] description = metadata.get('Description') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3701fe6b3..c2b9970ec 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1699,7 +1699,14 @@ class InfoExtractor: return FormatSort def _sort_formats(self, formats, field_preference=[]): - if formats and field_preference: + if not field_preference: + self._downloader.deprecation_warning( + 'yt_dlp.InfoExtractor._sort_formats is deprecated and is no longer required') + return + self._downloader.deprecation_warning( + 'yt_dlp.InfoExtractor._sort_formats is deprecated and no longer works as expected. ' + 'Return _format_sort_fields in the info_dict instead') + if formats: formats[0]['__sort_fields'] = field_preference def _check_formats(self, formats, video_id): @@ -2431,7 +2438,6 @@ class InfoExtractor: 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), }) - self._sort_formats(formats) entries.append({ 'id': playlist_id, @@ -3269,7 +3275,6 @@ class InfoExtractor: 'url': formats[0]['url'], }) else: - self._sort_formats(formats) entry['formats'] = formats entries.append(entry) if len(entries) == 1: diff --git a/yt_dlp/extractor/condenast.py b/yt_dlp/extractor/condenast.py index ffdd820e2..3170c2990 100644 --- a/yt_dlp/extractor/condenast.py +++ b/yt_dlp/extractor/condenast.py @@ -197,7 +197,6 @@ class CondeNastIE(InfoExtractor): 'ext': ext, 'quality': 1 if quality == 'high' else 0, }) - self._sort_formats(formats) subtitles = {} for t, caption in video_info.get('captions', {}).items(): diff --git a/yt_dlp/extractor/contv.py b/yt_dlp/extractor/contv.py index 50648a536..d69e81610 100644 --- a/yt_dlp/extractor/contv.py +++ b/yt_dlp/extractor/contv.py @@ -69,8 +69,6 @@ class CONtvIE(InfoExtractor): 'url': media_mp4_url, }) - self._sort_formats(formats) - subtitles = {} captions = m_details.get('captions') or {} for caption_url in captions.values(): diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index 8c920e3ab..c03d65310 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -126,7 +126,6 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE smil, smil_url, video_id, namespace)) if not formats and video.get('drm'): self.report_drm(video_id) - self._sort_formats(formats) subtitles = {} for track in video.get('tracks', []): diff --git a/yt_dlp/extractor/coub.py b/yt_dlp/extractor/coub.py index b462acaf0..9bab698a3 100644 --- a/yt_dlp/extractor/coub.py +++ b/yt_dlp/extractor/coub.py @@ -104,8 +104,6 @@ class CoubIE(InfoExtractor): 'source_preference': preference_key(MOBILE), }) - self._sort_formats(formats) - thumbnail = coub.get('picture') duration = float_or_none(coub.get('duration')) timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at')) diff --git a/yt_dlp/extractor/cpac.py b/yt_dlp/extractor/cpac.py index 65ac2497f..0f23f2be2 100644 --- a/yt_dlp/extractor/cpac.py +++ b/yt_dlp/extractor/cpac.py @@ -54,8 +54,6 @@ class CPACIE(InfoExtractor): else: fmt['language_preference'] = -10 - self._sort_formats(formats) - category = str_or_none(content['details']['category_%s_t' % (url_lang, )]) def is_live(v_type): diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py index 319374f3b..46100151a 100644 --- a/yt_dlp/extractor/crackle.py +++ b/yt_dlp/extractor/crackle.py @@ -177,7 +177,6 @@ class CrackleIE(InfoExtractor): }) if not formats and has_drm: self.report_drm(video_id) - self._sort_formats(formats) description = media.get('Description') duration = int_or_none(media.get( diff --git a/yt_dlp/extractor/crooksandliars.py b/yt_dlp/extractor/crooksandliars.py index 85c145e12..4de7e3d53 100644 --- a/yt_dlp/extractor/crooksandliars.py +++ b/yt_dlp/extractor/crooksandliars.py @@ -45,7 +45,6 @@ class CrooksAndLiarsIE(InfoExtractor): 'format_id': item['type'], 'quality': quality(item['type']), } for item in manifest['flavors'] if item['mime'].startswith('video/')] - self._sort_formats(formats) return { 'url': url, diff --git a/yt_dlp/extractor/crowdbunker.py b/yt_dlp/extractor/crowdbunker.py index 75d90b5c5..d83c01560 100644 --- a/yt_dlp/extractor/crowdbunker.py +++ b/yt_dlp/extractor/crowdbunker.py @@ -60,7 +60,6 @@ class CrowdBunkerIE(InfoExtractor): 'width': int_or_none(image.get('width')), } for image in video_json.get('thumbnails') or [] if image.get('url')] - self._sort_formats(formats) return { 'id': id, 'title': video_json.get('title'), diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 35752f1bd..ee344ce8b 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -208,7 +208,6 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE): f['language'] = stream_response.get('audio_locale') f['quality'] = hardsub_preference(hardsub_lang.lower()) formats.extend(adaptive_formats) - self._sort_formats(formats) return { 'id': internal_id, diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index 1184633f5..0075680e8 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -218,7 +218,6 @@ class CSpanIE(InfoExtractor): path, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }] add_referer(formats) - self._sort_formats(formats) entries.append({ 'id': '%s_%d' % (video_id, partnum + 1), 'title': ( diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index a105b6ce2..26cf24fbb 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -117,7 +117,6 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): 'format_id': 'http', }) formats.append(fmt) - self._sort_formats(formats) title = media['title'] diff --git a/yt_dlp/extractor/daftsex.py b/yt_dlp/extractor/daftsex.py index 0fe014f76..551d5e3ab 100644 --- a/yt_dlp/extractor/daftsex.py +++ b/yt_dlp/extractor/daftsex.py @@ -81,7 +81,6 @@ class DaftsexIE(InfoExtractor): 'height': int_or_none(height), 'ext': ext, }) - self._sort_formats(formats) return { 'id': video_id, @@ -117,7 +116,6 @@ class DaftsexIE(InfoExtractor): 'height': int_or_none(height), 'ext': ext, }) - self._sort_formats(formats) thumbnails = [] for k, v in item.items(): diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index f25d7a8c6..43401e111 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -63,7 +63,6 @@ class DailyMailIE(InfoExtractor): 'protocol': protocol, 'ext': 'mp4' if is_hls else None, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index 65a9feec5..2a44718fb 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -293,7 +293,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): f['url'] = f['url'].split('#')[0] if not f.get('fps') and f['format_id'].endswith('@60'): f['fps'] = 60 - self._sort_formats(formats) subtitles = {} subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {} diff --git a/yt_dlp/extractor/dailywire.py b/yt_dlp/extractor/dailywire.py index 1f27797ad..f177c9d9c 100644 --- a/yt_dlp/extractor/dailywire.py +++ b/yt_dlp/extractor/dailywire.py @@ -67,7 +67,6 @@ class DailyWireIE(DailyWireBaseIE): format_, subs_ = self._extract_m3u8_formats_and_subtitles(url, slug) formats.extend(format_) self._merge_subtitles(subs_, target=subtitles) - self._sort_formats(formats) return { 'id': episode_info['id'], 'display_id': slug, diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py index 962d9741b..0e08e4f65 100644 --- a/yt_dlp/extractor/damtomo.py +++ b/yt_dlp/extractor/damtomo.py @@ -36,7 +36,6 @@ class DamtomoBaseIE(InfoExtractor): if not m3u8_url: raise ExtractorError('Failed to obtain m3u8 URL') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/daystar.py b/yt_dlp/extractor/daystar.py index 4f59d904f..ef3520a21 100644 --- a/yt_dlp/extractor/daystar.py +++ b/yt_dlp/extractor/daystar.py @@ -36,7 +36,6 @@ class DaystarClipIE(InfoExtractor): video_id, 'mp4', fatal=False, headers={'Referer': src_iframe}) formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/deezer.py b/yt_dlp/extractor/deezer.py index bee1c7501..f61f12af0 100644 --- a/yt_dlp/extractor/deezer.py +++ b/yt_dlp/extractor/deezer.py @@ -62,7 +62,6 @@ class DeezerPlaylistIE(DeezerBaseInfoExtractor): 'preference': -100, # Only the first 30 seconds 'ext': 'mp3', }] - self._sort_formats(formats) artists = ', '.join( orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS'))) entries.append({ @@ -115,7 +114,6 @@ class DeezerAlbumIE(DeezerBaseInfoExtractor): 'preference': -100, # Only the first 30 seconds 'ext': 'mp3', }] - self._sort_formats(formats) artists = ', '.join( orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS'))) entries.append({ diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py index af327e6c6..1624d085c 100644 --- a/yt_dlp/extractor/democracynow.py +++ b/yt_dlp/extractor/democracynow.py @@ -59,8 +59,6 @@ class DemocracynowIE(InfoExtractor): 'vcodec': 'none' if key == 'audio' else None, }) - self._sort_formats(formats) - default_lang = 'en' subtitles = {} diff --git a/yt_dlp/extractor/detik.py b/yt_dlp/extractor/detik.py index 7209e6611..f14805424 100644 --- a/yt_dlp/extractor/detik.py +++ b/yt_dlp/extractor/detik.py @@ -146,7 +146,6 @@ class DetikEmbedIE(InfoExtractor): } formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id) - self._sort_formats(formats) json_ld_data = self._search_json_ld(webpage, display_id, default={}) yield merge_dicts(json_ld_data, extra_info_dict, { diff --git a/yt_dlp/extractor/dfb.py b/yt_dlp/extractor/dfb.py index 5aca72988..c4fb5c2a4 100644 --- a/yt_dlp/extractor/dfb.py +++ b/yt_dlp/extractor/dfb.py @@ -41,7 +41,6 @@ class DFBIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( manifest_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 3813a51fe..3461e36eb 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -88,7 +88,6 @@ class DigitalConcertHallIE(InfoExtractor): m3u8_url = traverse_obj( stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False) formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False) - self._sort_formats(formats) yield { 'id': video_id, diff --git a/yt_dlp/extractor/digiteka.py b/yt_dlp/extractor/digiteka.py index 5fbc42ffe..912e33ba7 100644 --- a/yt_dlp/extractor/digiteka.py +++ b/yt_dlp/extractor/digiteka.py @@ -81,8 +81,6 @@ class DigitekaIE(InfoExtractor): 'format_id': source.get('label'), }) - self._sort_formats(formats) - title = deliver_info['title'] thumbnail = jwconf.get('image') duration = int_or_none(deliver_info.get('duration')) diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py index 7b4278c88..1f3d8e31c 100644 --- a/yt_dlp/extractor/discoverygo.py +++ b/yt_dlp/extractor/discoverygo.py @@ -50,7 +50,6 @@ class DiscoveryGoBaseIE(InfoExtractor): elif stream_kind == 'hds': formats.extend(self._extract_f4m_formats( stream_url, display_id, f4m_id=stream_kind, fatal=False)) - self._sort_formats(formats) video_id = video.get('id') or display_id description = video.get('description', {}).get('detailed') diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py index f9af59a57..430de326f 100644 --- a/yt_dlp/extractor/disney.py +++ b/yt_dlp/extractor/disney.py @@ -134,7 +134,6 @@ class DisneyIE(InfoExtractor): self.raise_no_formats( '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']), expected=True) - self._sort_formats(formats) subtitles = {} for caption in video_data.get('captions', []): diff --git a/yt_dlp/extractor/dispeak.py b/yt_dlp/extractor/dispeak.py index d4f3324e7..37f89b9bc 100644 --- a/yt_dlp/extractor/dispeak.py +++ b/yt_dlp/extractor/dispeak.py @@ -117,7 +117,6 @@ class DigitallySpeakingIE(InfoExtractor): video_formats = self._parse_mp4(metadata) if video_formats is None: video_formats = self._parse_flv(metadata) - self._sort_formats(video_formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/dlive.py b/yt_dlp/extractor/dlive.py index 31b4a568f..30fcf9fce 100644 --- a/yt_dlp/extractor/dlive.py +++ b/yt_dlp/extractor/dlive.py @@ -40,7 +40,6 @@ class DLiveVODIE(InfoExtractor): title = broadcast['title'] formats = self._extract_m3u8_formats( broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native') - self._sort_formats(formats) return { 'id': vod_id, 'title': title, @@ -79,7 +78,6 @@ class DLiveStreamIE(InfoExtractor): formats = self._extract_m3u8_formats( 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username, display_name, 'mp4') - self._sort_formats(formats) return { 'id': display_name, 'title': title, diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 3f0b315a5..8eb4d8ffa 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -126,7 +126,6 @@ class DPlayBaseIE(InfoExtractor): 'url': format_url, 'format_id': format_id, }) - self._sort_formats(formats) creator = series = None tags = [] diff --git a/yt_dlp/extractor/drbonanza.py b/yt_dlp/extractor/drbonanza.py index dca8c89d0..824d70def 100644 --- a/yt_dlp/extractor/drbonanza.py +++ b/yt_dlp/extractor/drbonanza.py @@ -30,7 +30,6 @@ class DRBonanzaIE(InfoExtractor): info = self._parse_html5_media_entries( url, webpage, display_id, m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0] - self._sort_formats(info['formats']) asset = self._parse_json( self._search_regex( diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index 54d97a25d..214b309bf 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -63,7 +63,6 @@ class DropboxIE(InfoExtractor): video_url = re.sub(r'[?&]dl=0', '', url) video_url += ('?' if '?' not in video_url else '&') + 'dl=1' formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1}) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index 824c2be12..e5dab6ac0 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -58,7 +58,6 @@ class DrTuberIE(InfoExtractor): 'quality': 2 if format_id == 'hq' else 1, 'url': video_url }) - self._sort_formats(formats) duration = int_or_none(video_data.get('duration')) or parse_duration( video_data.get('duration_format')) diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py index 708b72fae..128f43914 100644 --- a/yt_dlp/extractor/drtv.py +++ b/yt_dlp/extractor/drtv.py @@ -300,8 +300,6 @@ class DRTVIE(InfoExtractor): 'Unfortunately, DR is not allowed to show this program outside Denmark.', countries=self._GEO_COUNTRIES) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, @@ -366,7 +364,6 @@ class DRTVLiveIE(InfoExtractor): formats.extend(self._extract_f4m_formats(update_url_query( '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}), channel_id, f4m_id=link_type, fatal=False)) - self._sort_formats(formats) return { 'id': channel_id, diff --git a/yt_dlp/extractor/dumpert.py b/yt_dlp/extractor/dumpert.py index dc61115ff..010c2d092 100644 --- a/yt_dlp/extractor/dumpert.py +++ b/yt_dlp/extractor/dumpert.py @@ -48,7 +48,6 @@ class DumpertIE(InfoExtractor): 'format_id': version, 'quality': quality(version), }) - self._sort_formats(formats) thumbnails = [] stills = item.get('stills') or {} diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index 61d469f11..e67143370 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -142,7 +142,6 @@ class DVTVIE(InfoExtractor): 'format_id': join_nonempty('http', ext, label), 'height': int_or_none(height), }) - self._sort_formats(formats) return { 'id': data.get('mediaid') or video_id, diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py index ee2365ddd..9c4a08e54 100644 --- a/yt_dlp/extractor/dw.py +++ b/yt_dlp/extractor/dw.py @@ -62,7 +62,6 @@ class DWIE(InfoExtractor): transform_source=lambda s: s.replace( 'rtmp://tv-od.dw.de/flash/', 'http://tv-download.dw.de/dwtv_video/flv/')) - self._sort_formats(formats) upload_date = hidden_inputs.get('display_date') if not upload_date: diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py index 7e5047b56..9ebd24d80 100644 --- a/yt_dlp/extractor/eagleplatform.py +++ b/yt_dlp/extractor/eagleplatform.py @@ -192,8 +192,6 @@ class EaglePlatformIE(InfoExtractor): f['url'] = format_url formats.append(f) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/egghead.py b/yt_dlp/extractor/egghead.py index d5c954961..a4b2a12f6 100644 --- a/yt_dlp/extractor/egghead.py +++ b/yt_dlp/extractor/egghead.py @@ -117,7 +117,6 @@ class EggheadLessonIE(EggheadBaseIE): formats.append({ 'url': format_url, }) - self._sort_formats(formats) return { 'id': lesson_id, diff --git a/yt_dlp/extractor/einthusan.py b/yt_dlp/extractor/einthusan.py index 37be68c61..53bc2535d 100644 --- a/yt_dlp/extractor/einthusan.py +++ b/yt_dlp/extractor/einthusan.py @@ -89,8 +89,6 @@ class EinthusanIE(InfoExtractor): 'url': mp4_url, }) - self._sort_formats(formats) - description = get_elements_by_class('synopsis', webpage)[0] thumbnail = self._html_search_regex( r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''', diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py index 01a47f6fd..bd027da6b 100644 --- a/yt_dlp/extractor/eitb.py +++ b/yt_dlp/extractor/eitb.py @@ -71,8 +71,6 @@ class EitbIE(InfoExtractor): '%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'), video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - return { 'id': video_id, 'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'], diff --git a/yt_dlp/extractor/ellentube.py b/yt_dlp/extractor/ellentube.py index bcd458cdf..6eb00f9c9 100644 --- a/yt_dlp/extractor/ellentube.py +++ b/yt_dlp/extractor/ellentube.py @@ -28,7 +28,6 @@ class EllenTubeBaseIE(InfoExtractor): entry_protocol='m3u8_native', m3u8_id='hls') duration = int_or_none(entry.get('duration')) break - self._sort_formats(formats) def get_insight(kind): return int_or_none(try_get( diff --git a/yt_dlp/extractor/elonet.py b/yt_dlp/extractor/elonet.py index f99e12250..c5558ffcd 100644 --- a/yt_dlp/extractor/elonet.py +++ b/yt_dlp/extractor/elonet.py @@ -53,7 +53,6 @@ class ElonetIE(InfoExtractor): else: formats, subtitles = [], {} self.raise_no_formats(f'Unknown streaming format {ext}') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py index 89424785e..3bfcc5470 100644 --- a/yt_dlp/extractor/epicon.py +++ b/yt_dlp/extractor/epicon.py @@ -59,7 +59,6 @@ class EpiconIE(InfoExtractor): description = self._og_search_description(webpage) or None thumbnail = self._og_search_thumbnail(webpage) or None formats = self._extract_m3u8_formats(data_json['url']['video_url'], id) - self._sort_formats(formats) subtitles = {} for subtitle in data_json.get('subtitles', []): diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py index 6bc70c5c6..a2337979b 100644 --- a/yt_dlp/extractor/eporner.py +++ b/yt_dlp/extractor/eporner.py @@ -106,7 +106,6 @@ class EpornerIE(InfoExtractor): 'height': height, 'fps': fps, }) - self._sort_formats(formats) json_ld = self._search_json_ld(webpage, display_id, default={}) diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index eb52ad031..9ecdf5d3b 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -73,7 +73,7 @@ class ERTFlixCodenameIE(ERTFlixBaseIE): }, ] - def _extract_formats_and_subs(self, video_id, allow_none=True): + def _extract_formats_and_subs(self, video_id): media_info = self._call_api(video_id, codename=video_id) formats, subs = [], {} for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []: @@ -97,8 +97,6 @@ class ERTFlixCodenameIE(ERTFlixBaseIE): formats.extend(formats_) self._merge_subtitles(subs_, target=subs) - if formats or not allow_none: - self._sort_formats(formats) return formats, subs def _real_extract(self, url): @@ -292,7 +290,6 @@ class ERTWebtvEmbedIE(InfoExtractor): formats, subs = self._extract_m3u8_formats_and_subtitles( f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8', video_id, 'mp4') - self._sort_formats(formats) thumbnail_id = parse_qs(url).get('bgimg', [None])[0] if thumbnail_id and not thumbnail_id.startswith('http'): thumbnail_id = f'https://program.ert.gr{thumbnail_id}' diff --git a/yt_dlp/extractor/escapist.py b/yt_dlp/extractor/escapist.py index 5d9c46f72..85a1cbf40 100644 --- a/yt_dlp/extractor/escapist.py +++ b/yt_dlp/extractor/escapist.py @@ -95,7 +95,6 @@ class EscapistIE(InfoExtractor): 'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']), 'height': int_or_none(video.get('res')), } for video in data['files']['videos']] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index ba0a98bea..f4b0134ab 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -162,7 +162,6 @@ class ESPNIE(OnceIE): links = clip.get('links', {}) traverse_source(links.get('source', {})) traverse_source(links.get('mobile', {})) - self._sort_formats(formats) description = clip.get('caption') or clip.get('description') thumbnail = clip.get('thumbnail') @@ -269,7 +268,6 @@ class ESPNCricInfoIE(InfoExtractor): 'url': item['url'], 'vcodec': 'none', }) - self._sort_formats(formats) return { 'id': id, 'title': data_json.get('title'), @@ -400,7 +398,6 @@ class WatchESPNIE(AdobePassIE): m3u8_url, headers = asset['stream'], {} formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/esri.py b/yt_dlp/extractor/esri.py index 1736788db..02e7efaf0 100644 --- a/yt_dlp/extractor/esri.py +++ b/yt_dlp/extractor/esri.py @@ -43,7 +43,6 @@ class EsriVideoIE(InfoExtractor): 'height': int(height), 'filesize_approx': parse_filesize(filesize), }) - self._sort_formats(formats) title = self._html_search_meta('title', webpage, 'title') description = self._html_search_meta( diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index ea20b4d4d..c2b493765 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -76,7 +76,6 @@ class EuropaIE(InfoExtractor): 'format_note': xpath_text(file_, './lglabel'), 'language_preference': language_preference(lang) }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/eurosport.py b/yt_dlp/extractor/eurosport.py index 5681499fb..654e11206 100644 --- a/yt_dlp/extractor/eurosport.py +++ b/yt_dlp/extractor/eurosport.py @@ -83,8 +83,6 @@ class EurosportIE(InfoExtractor): formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - self._sort_formats(formats) - return { 'id': json_data['id'], 'title': json_ld_data.get('title') or self._og_search_title(webpage), diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 4435f08e0..65a1dc7c5 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -45,7 +45,6 @@ class EUScreenIE(InfoExtractor): formats = [{ 'url': source['src'], } for source in video_json.get('sources', [])] - self._sort_formats(formats) return { 'id': id, diff --git a/yt_dlp/extractor/expotv.py b/yt_dlp/extractor/expotv.py index 92eaf4248..bda6e3cb2 100644 --- a/yt_dlp/extractor/expotv.py +++ b/yt_dlp/extractor/expotv.py @@ -49,7 +49,6 @@ class ExpoTVIE(InfoExtractor): r'filename=.*\.([a-z0-9_A-Z]+)&', media_url, 'file extension', default=None) or fcfg.get('type'), }) - self._sort_formats(formats) title = self._og_search_title(webpage) description = self._og_search_description(webpage) diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py index 5381e9880..86967b631 100644 --- a/yt_dlp/extractor/expressen.py +++ b/yt_dlp/extractor/expressen.py @@ -70,7 +70,6 @@ class ExpressenIE(InfoExtractor): formats = [{ 'url': stream, }] - self._sort_formats(formats) title = info.get('titleRaw') or data['title'] description = info.get('descriptionRaw') diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 35acbc643..1404be612 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -461,13 +461,12 @@ class FacebookIE(InfoExtractor): formats.extend(self._parse_mpd_formats( compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)))) - def process_formats(formats): + def process_formats(info): # Downloads with browser's User-Agent are rate limited. Working around # with non-browser User-Agent. - for f in formats: + for f in info['formats']: f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1' - - self._sort_formats(formats, ('res', 'quality')) + info['_format_sort_fields'] = ('res', 'quality') def extract_relay_data(_filter): return self._parse_json(self._search_regex( @@ -510,7 +509,6 @@ class FacebookIE(InfoExtractor): 'url': playable_url, }) extract_dash_manifest(video, formats) - process_formats(formats) v_id = video.get('videoId') or video.get('id') or video_id info = { 'id': v_id, @@ -521,6 +519,7 @@ class FacebookIE(InfoExtractor): 'timestamp': int_or_none(video.get('publish_time')), 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), } + process_formats(info) description = try_get(video, lambda x: x['savable_description']['text']) title = video.get('name') if title: @@ -687,13 +686,12 @@ class FacebookIE(InfoExtractor): if subtitles_src: subtitles.setdefault('en', []).append({'url': subtitles_src}) - process_formats(formats) - info_dict = { 'id': video_id, 'formats': formats, 'subtitles': subtitles, } + process_formats(info_dict) info_dict.update(extract_metadata(webpage)) return info_dict diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py index cc12fda2b..bca62add9 100644 --- a/yt_dlp/extractor/faz.py +++ b/yt_dlp/extractor/faz.py @@ -78,7 +78,6 @@ class FazIE(InfoExtractor): 'tbr': tbr or int(mobj.group(3)), }) formats.append(f) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index 3501c4cf6..dd5e088fc 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -250,7 +250,6 @@ class FC2LiveIE(InfoExtractor): 'Referer': url, })) - self._sort_formats(formats) for fmt in formats: fmt.update({ 'protocol': 'fc2_live', diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py index df40888e1..8175b6b0f 100644 --- a/yt_dlp/extractor/fczenit.py +++ b/yt_dlp/extractor/fczenit.py @@ -38,8 +38,6 @@ class FczenitIE(InfoExtractor): 'height': int_or_none(q.get('label')), } for q in msi_data['qualities'] if q.get('url')] - self._sort_formats(formats) - tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')] return { diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index e170b67a7..dc00edcb3 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -80,7 +80,6 @@ class FifaIE(InfoExtractor): }) formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/filmmodu.py b/yt_dlp/extractor/filmmodu.py index d74131192..9eb550eed 100644 --- a/yt_dlp/extractor/filmmodu.py +++ b/yt_dlp/extractor/filmmodu.py @@ -51,8 +51,6 @@ class FilmmoduIE(InfoExtractor): 'protocol': 'm3u8_native', } for source in data['sources']] - self._sort_formats(formats) - subtitles = {} if data.get('subtitle'): diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py index 7040231be..9a93cb984 100644 --- a/yt_dlp/extractor/filmon.py +++ b/yt_dlp/extractor/filmon.py @@ -65,7 +65,6 @@ class FilmOnIE(InfoExtractor): 'quality': QUALITY(stream.get('quality')), 'protocol': 'm3u8_native', }) - self._sort_formats(formats) thumbnails = [] poster = response.get('poster', {}) @@ -153,7 +152,6 @@ class FilmOnChannelIE(InfoExtractor): 'ext': 'mp4', 'quality': QUALITY(quality), }) - self._sort_formats(formats) thumbnails = [] for name, width, height in self._THUMBNAIL_RES: diff --git a/yt_dlp/extractor/firsttv.py b/yt_dlp/extractor/firsttv.py index 99c27e0c3..f74bd132f 100644 --- a/yt_dlp/extractor/firsttv.py +++ b/yt_dlp/extractor/firsttv.py @@ -123,7 +123,6 @@ class FirstTVIE(InfoExtractor): % (path, m3u8_path), display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) duration = int_or_none(item.get('duration') or self._html_search_meta( diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py index 9f60a6b1f..89a40d7e2 100644 --- a/yt_dlp/extractor/flickr.py +++ b/yt_dlp/extractor/flickr.py @@ -89,7 +89,6 @@ class FlickrIE(InfoExtractor): 'url': stream['_content'], 'quality': preference(stream_type), }) - self._sort_formats(formats) owner = video_info.get('owner', {}) uploader_id = owner.get('nsid') diff --git a/yt_dlp/extractor/folketinget.py b/yt_dlp/extractor/folketinget.py index 0e69fa32f..55a11e591 100644 --- a/yt_dlp/extractor/folketinget.py +++ b/yt_dlp/extractor/folketinget.py @@ -59,7 +59,6 @@ class FolketingetIE(InfoExtractor): 'url': xpath_text(n, './url', fatal=True), 'tbr': int_or_none(n.attrib['bitrate']), } for n in doc.findall('.//streams/stream')] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/fourtube.py b/yt_dlp/extractor/fourtube.py index c6af100f3..b6368b87d 100644 --- a/yt_dlp/extractor/fourtube.py +++ b/yt_dlp/extractor/fourtube.py @@ -35,7 +35,6 @@ class FourTubeBaseIE(InfoExtractor): 'resolution': format + 'p', 'quality': int(format), } for format in sources] - self._sort_formats(formats) return formats def _real_extract(self, url): diff --git a/yt_dlp/extractor/fourzerostudio.py b/yt_dlp/extractor/fourzerostudio.py index e1804e39e..c388a3a07 100644 --- a/yt_dlp/extractor/fourzerostudio.py +++ b/yt_dlp/extractor/fourzerostudio.py @@ -29,7 +29,6 @@ class FourZeroStudioArchiveIE(InfoExtractor): 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False) formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/fox.py b/yt_dlp/extractor/fox.py index 53826630f..15c0c48c1 100644 --- a/yt_dlp/extractor/fox.py +++ b/yt_dlp/extractor/fox.py @@ -132,7 +132,6 @@ class FOXIE(InfoExtractor): formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) data = try_get( video, lambda x: x['trackingData']['properties'], dict) or {} diff --git a/yt_dlp/extractor/foxgay.py b/yt_dlp/extractor/foxgay.py index b285464ec..f4f29c65d 100644 --- a/yt_dlp/extractor/foxgay.py +++ b/yt_dlp/extractor/foxgay.py @@ -48,8 +48,6 @@ class FoxgayIE(InfoExtractor): } for source, resolution in zip( video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))] - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py index 1872d8a1c..85613bafe 100644 --- a/yt_dlp/extractor/fptplay.py +++ b/yt_dlp/extractor/fptplay.py @@ -59,7 +59,6 @@ class FptplayIE(InfoExtractor): info = self._download_json( self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, 'title': join_nonempty(title, real_episode, delim=' - '), diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 56a00a238..052317204 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -191,8 +191,6 @@ class FranceTVIE(InfoExtractor): } for sheet in spritesheets] }) - self._sort_formats(formats) - if subtitle: title += ' - %s' % subtitle title = title.strip() diff --git a/yt_dlp/extractor/freesound.py b/yt_dlp/extractor/freesound.py index 9724dbdf0..8b5f2278c 100644 --- a/yt_dlp/extractor/freesound.py +++ b/yt_dlp/extractor/freesound.py @@ -63,7 +63,6 @@ class FreesoundIE(InfoExtractor): 'format_note': channels, 'quality': quality, } for quality, format_url in enumerate(audio_urls)] - self._sort_formats(formats) return { 'id': audio_id, diff --git a/yt_dlp/extractor/freetv.py b/yt_dlp/extractor/freetv.py index f38bae90b..757a10d01 100644 --- a/yt_dlp/extractor/freetv.py +++ b/yt_dlp/extractor/freetv.py @@ -43,7 +43,6 @@ class FreeTvMoviesIE(FreeTvBaseIE): video_id, video_url = api_response['displayMeta']['contentID'], api_response['displayMeta']['streamURLVideo'] formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, @@ -106,7 +105,6 @@ class FreeTvIE(FreeTvBaseIE): for episode in episodes: video_id = str(episode['contentID']) formats, subtitles = self._extract_m3u8_formats_and_subtitles(episode['streamURL'], video_id, 'mp4') - self._sort_formats(formats) yield { 'id': video_id, diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index e0529b7ba..3bae8add0 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -160,7 +160,6 @@ class FrontendMastersIE(FrontendMastersBaseIE): 'format_id': format_id, }) formats.append(f) - self._sort_formats(formats) subtitles = { 'en': [{ diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py index d7f49accd..668bb2743 100644 --- a/yt_dlp/extractor/fujitv.py +++ b/yt_dlp/extractor/fujitv.py @@ -57,7 +57,6 @@ class FujiTVFODPlus7IE(InfoExtractor): self._BITRATE_MAP.get(f.get('tbr'), ())))) formats.extend(fmt) subtitles = self._merge_subtitles(subtitles, subs) - self._sort_formats(formats, ['tbr']) return { 'id': video_id, @@ -68,4 +67,5 @@ class FujiTVFODPlus7IE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, 'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg', + '_format_sort_fields': ('tbr', ) } diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index c70cf50c7..18363c1b9 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -247,7 +247,6 @@ class FunimationIE(FunimationBaseIE): self.raise_no_formats( 'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id) self._remove_duplicate_formats(formats) - self._sort_formats(formats, ('lang', 'source')) return { 'id': episode_id, @@ -266,6 +265,7 @@ class FunimationIE(FunimationBaseIE): 'formats': formats, 'thumbnails': thumbnails, 'subtitles': subtitles, + '_format_sort_fields': ('lang', 'source'), } def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name): diff --git a/yt_dlp/extractor/fusion.py b/yt_dlp/extractor/fusion.py index 46bda49ea..689422fca 100644 --- a/yt_dlp/extractor/fusion.py +++ b/yt_dlp/extractor/fusion.py @@ -70,7 +70,6 @@ class FusionIE(InfoExtractor): 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', }) if formats: - self._sort_formats(formats) info['formats'] = formats else: info.update({ diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index 7ed81f761..5016e2ff9 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -54,7 +54,6 @@ class GabTVIE(InfoExtractor): else: frmt['height'] = str_to_int(resolution.replace('p', '')) formats.append(frmt) - self._sort_formats(formats) return { 'id': id, @@ -120,8 +119,6 @@ class GabIE(InfoExtractor): } for url, f in ((media.get('url'), metadata.get('original') or {}), (media.get('source_mp4'), metadata.get('playable') or {})) if url] - self._sort_formats(formats) - author = json_data.get('account') or {} entries.append({ 'id': f'{post_id}-{idx}', diff --git a/yt_dlp/extractor/gaia.py b/yt_dlp/extractor/gaia.py index 4ace0544a..c84386f2c 100644 --- a/yt_dlp/extractor/gaia.py +++ b/yt_dlp/extractor/gaia.py @@ -88,7 +88,6 @@ class GaiaIE(InfoExtractor): media_id, headers=headers) formats = self._extract_m3u8_formats( media['mediaUrls']['bcHLS'], media_id, 'mp4') - self._sort_formats(formats) subtitles = {} text_tracks = media.get('textTracks', {}) diff --git a/yt_dlp/extractor/gamespot.py b/yt_dlp/extractor/gamespot.py index e1d317377..8dec2522c 100644 --- a/yt_dlp/extractor/gamespot.py +++ b/yt_dlp/extractor/gamespot.py @@ -65,8 +65,6 @@ class GameSpotIE(OnceIE): formats.extend(self._extract_mpd_formats( mpd_url, page_id, mpd_id='dash', fatal=False)) - self._sort_formats(formats) - return { 'id': data_video.get('guid') or page_id, 'display_id': page_id, diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py index 76ddcc40e..e0bbdae0a 100644 --- a/yt_dlp/extractor/gaskrank.py +++ b/yt_dlp/extractor/gaskrank.py @@ -93,6 +93,5 @@ class GaskrankIE(InfoExtractor): 'view_count': view_count, 'average_rating': average_rating, }) - self._sort_formats(entry['formats']) return entry diff --git a/yt_dlp/extractor/gedidigital.py b/yt_dlp/extractor/gedidigital.py index 4cc678021..1878d636d 100644 --- a/yt_dlp/extractor/gedidigital.py +++ b/yt_dlp/extractor/gedidigital.py @@ -186,7 +186,6 @@ class GediDigitalIE(InfoExtractor): duration = int_or_none(v) self._clean_formats(formats) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 21e92cba6..85581e622 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2619,7 +2619,6 @@ class GenericIE(InfoExtractor): 'vcodec': 'none' if m.group('type') == 'audio' else None }] info_dict['direct'] = True - self._sort_formats(formats) info_dict.update({ 'formats': formats, 'subtitles': subtitles, @@ -2637,7 +2636,6 @@ class GenericIE(InfoExtractor): if first_bytes.startswith(b'#EXTM3U'): self.report_detected('M3U playlist') info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') - self._sort_formats(info_dict['formats']) return info_dict # Maybe it's a direct link to a video? @@ -2671,12 +2669,10 @@ class GenericIE(InfoExtractor): elif doc.tag == 'SmoothStreamingMedia': info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url) self.report_detected('ISM manifest') - self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): smil = self._parse_smil(doc, url, video_id) self.report_detected('SMIL file') - self._sort_formats(smil['formats']) return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': self.report_detected('XSPF playlist') @@ -2691,12 +2687,10 @@ class GenericIE(InfoExtractor): mpd_base_url=full_response.geturl().rpartition('/')[0], mpd_url=url) self.report_detected('DASH manifest') - self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id) self.report_detected('F4M manifest') - self._sort_formats(info_dict['formats']) return info_dict except xml.etree.ElementTree.ParseError: pass @@ -2840,7 +2834,6 @@ class GenericIE(InfoExtractor): }) if formats or subtitles: self.report_detected('video.js embed') - self._sort_formats(formats) return [{'formats': formats, 'subtitles': subtitles}] # Looking for http://schema.org/VideoObject @@ -2923,8 +2916,6 @@ class GenericIE(InfoExtractor): if not formats[-1].get('height'): formats[-1]['quality'] = 1 - self._sort_formats(formats) - return [{ 'id': flashvars['video_id'], 'display_id': display_id, @@ -3073,9 +3064,6 @@ class GenericIE(InfoExtractor): else: entry_info_dict['url'] = video_url - if entry_info_dict.get('formats'): - self._sort_formats(entry_info_dict['formats']) - entries.append(entry_info_dict) if len(entries) > 1: diff --git a/yt_dlp/extractor/genericembeds.py b/yt_dlp/extractor/genericembeds.py index 45e1618ba..9b4f14dd1 100644 --- a/yt_dlp/extractor/genericembeds.py +++ b/yt_dlp/extractor/genericembeds.py @@ -30,7 +30,6 @@ class HTML5MediaEmbedIE(InfoExtractor): make_archive_id('generic', f'{video_id}-{num}' if len(entries) > 1 else video_id), ], }) - self._sort_formats(entry['formats']) yield entry diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index 9bd6200b6..7795dc56f 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -121,8 +121,6 @@ class GettrIE(GettrBaseIE): 'height': int_or_none(post_data.get('vid_hgt')), }) - self._sort_formats(formats) - return { 'id': post_id, 'title': title, @@ -192,8 +190,6 @@ class GettrStreamingIE(GettrBaseIE): 'url': urljoin(self._MEDIA_BASE_URL, thumbnail), } for thumbnail in try_get(video_info, lambda x: x['postData']['imgs'], list) or []] - self._sort_formats(formats) - return { 'id': video_id, 'title': try_get(video_info, lambda x: x['postData']['ttl'], str), diff --git a/yt_dlp/extractor/gfycat.py b/yt_dlp/extractor/gfycat.py index 9d091c113..edc2e56e4 100644 --- a/yt_dlp/extractor/gfycat.py +++ b/yt_dlp/extractor/gfycat.py @@ -127,7 +127,6 @@ class GfycatIE(InfoExtractor): 'filesize': filesize, 'quality': quality(format_id), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/giantbomb.py b/yt_dlp/extractor/giantbomb.py index 5d6b208aa..112572366 100644 --- a/yt_dlp/extractor/giantbomb.py +++ b/yt_dlp/extractor/giantbomb.py @@ -74,8 +74,6 @@ class GiantBombIE(InfoExtractor): if youtube_id: return self.url_result(youtube_id, 'Youtube') - self._sort_formats(formats) - return { 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/giga.py b/yt_dlp/extractor/giga.py index e728598f7..b59c129ab 100644 --- a/yt_dlp/extractor/giga.py +++ b/yt_dlp/extractor/giga.py @@ -59,7 +59,6 @@ class GigaIE(InfoExtractor): 'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]), 'quality': quality(fmt['quality']), }) - self._sort_formats(formats) title = self._html_search_meta( 'title', webpage, 'title', fatal=True) diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py index fb2a3fab2..a7be2cb76 100644 --- a/yt_dlp/extractor/globo.py +++ b/yt_dlp/extractor/globo.py @@ -139,7 +139,6 @@ class GloboIE(InfoExtractor): fmts, subtitles = self._extract_m3u8_formats_and_subtitles( signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) formats.extend(fmts) - self._sort_formats(formats) for resource in video['resources']: if resource.get('type') == 'subtitle': diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index 86fe1b024..22aac0db9 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -82,7 +82,6 @@ class GlomexBaseIE(InfoExtractor): if video.get('language'): for fmt in formats: fmt['language'] = video['language'] - self._sort_formats(formats) images = (video.get('images') or []) + [video.get('image') or {}] thumbnails = [{ diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index 9b8723ea1..b075a02e0 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -293,7 +293,6 @@ class GoIE(AdobePassIE): 'height': height, }) formats.append(f) - self._sort_formats(formats) for cc in video_data.get('closedcaption', {}).get('src', []): cc_url = cc.get('value') diff --git a/yt_dlp/extractor/golem.py b/yt_dlp/extractor/golem.py index 8416b5aa4..c33d95019 100644 --- a/yt_dlp/extractor/golem.py +++ b/yt_dlp/extractor/golem.py @@ -51,7 +51,6 @@ class GolemIE(InfoExtractor): 'filesize': self._int(e.findtext('filesize'), 'filesize'), 'ext': determine_ext(e.findtext('./filename')), }) - self._sort_formats(formats) info['formats'] = formats thumbnails = [] diff --git a/yt_dlp/extractor/goodgame.py b/yt_dlp/extractor/goodgame.py index 0866647e6..c17ad56f4 100644 --- a/yt_dlp/extractor/goodgame.py +++ b/yt_dlp/extractor/goodgame.py @@ -41,7 +41,6 @@ class GoodGameIE(InfoExtractor): else: self.raise_no_formats('User is offline', expected=True, video_id=channel_name) - self._sort_formats(formats) return { 'id': player_id, 'formats': formats, diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py index cb123b874..e027ea7c4 100644 --- a/yt_dlp/extractor/googledrive.py +++ b/yt_dlp/extractor/googledrive.py @@ -253,8 +253,6 @@ class GoogleDriveIE(InfoExtractor): if not formats and reason: self.raise_no_formats(reason, expected=True) - self._sort_formats(formats) - hl = get_value('hl') subtitles_id = None ttsurl = get_value('ttsurl') diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py index 31267e1aa..2882b49dd 100644 --- a/yt_dlp/extractor/goplay.py +++ b/yt_dlp/extractor/goplay.py @@ -81,7 +81,6 @@ class GoPlayIE(InfoExtractor): formats, subs = self._extract_m3u8_formats_and_subtitles( api['video']['S'], video_id, ext='mp4', m3u8_id='HLS') - self._sort_formats(formats) info_dict.update({ 'id': video_id, diff --git a/yt_dlp/extractor/gopro.py b/yt_dlp/extractor/gopro.py index 14d6b2187..ae965374c 100644 --- a/yt_dlp/extractor/gopro.py +++ b/yt_dlp/extractor/gopro.py @@ -78,8 +78,6 @@ class GoProIE(InfoExtractor): 'height': int_or_none(fmt.get('height')), }) - self._sort_formats(formats) - title = str_or_none( try_get(metadata, lambda x: x['collection']['title']) or self._html_search_meta(['og:title', 'twitter:title'], webpage) diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index c112c7857..b6cf14117 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -37,7 +37,6 @@ class GronkhIE(InfoExtractor): 'url': data_json['vtt_url'], 'ext': 'vtt', }) - self._sort_formats(formats) return { 'id': id, 'title': data_json.get('title'), diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py index f54628665..530bdb727 100644 --- a/yt_dlp/extractor/hbo.py +++ b/yt_dlp/extractor/hbo.py @@ -112,7 +112,6 @@ class HBOBaseIE(InfoExtractor): 'width': format_info.get('width'), 'height': format_info.get('height'), }) - self._sort_formats(formats) thumbnails = [] card_sizes = xpath_element(video_data, 'titleCardSizes') diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py index 9aa1325af..d1a400d8c 100644 --- a/yt_dlp/extractor/hearthisat.py +++ b/yt_dlp/extractor/hearthisat.py @@ -81,7 +81,6 @@ class HearThisAtIE(InfoExtractor): 'acodec': ext, 'quality': 2, # Usually better quality }) - self._sort_formats(formats) return { 'id': track_id, diff --git a/yt_dlp/extractor/heise.py b/yt_dlp/extractor/heise.py index 86661d75a..27d737c04 100644 --- a/yt_dlp/extractor/heise.py +++ b/yt_dlp/extractor/heise.py @@ -194,7 +194,6 @@ class HeiseIE(InfoExtractor): 'format_id': '%s_%s' % (ext, label), 'height': height, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/hellporno.py b/yt_dlp/extractor/hellporno.py index fd0327228..fa32b27c8 100644 --- a/yt_dlp/extractor/hellporno.py +++ b/yt_dlp/extractor/hellporno.py @@ -39,7 +39,6 @@ class HellPornoIE(InfoExtractor): title = remove_end(self._html_extract_title(webpage), ' - Hell Porno') info = self._parse_html5_media_entries(url, webpage, display_id)[0] - self._sort_formats(info['formats']) video_id = self._search_regex( (r'chs_object\s*=\s*["\'](\d+)', diff --git a/yt_dlp/extractor/helsinki.py b/yt_dlp/extractor/helsinki.py index b7c826055..e518cae1a 100644 --- a/yt_dlp/extractor/helsinki.py +++ b/yt_dlp/extractor/helsinki.py @@ -29,7 +29,6 @@ class HelsinkiIE(InfoExtractor): 'url': s['file'], 'ext': 'mp4', } for s in params['sources']] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 50d49adf0..3a53f2c45 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -103,7 +103,6 @@ class HiDiveIE(InfoExtractor): f['language'] = audio f['format_note'] = f'{version}, {extra}' formats.extend(frmt) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py index fdcf6770d..f0c689883 100644 --- a/yt_dlp/extractor/hitbox.py +++ b/yt_dlp/extractor/hitbox.py @@ -118,7 +118,6 @@ class HitboxIE(InfoExtractor): 'tbr': bitrate, 'format_note': label, }) - self._sort_formats(formats) metadata = self._extract_metadata( 'https://www.smashcast.tv/api/media/video', video_id) @@ -200,7 +199,6 @@ class HitboxLiveIE(HitboxIE): # XXX: Do not subclass from concrete IE 'page_url': url, 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf', }) - self._sort_formats(formats) metadata = self._extract_metadata( 'https://www.smashcast.tv/api/media/live', video_id) diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py index 4c616d1dd..10879564f 100644 --- a/yt_dlp/extractor/hketv.py +++ b/yt_dlp/extractor/hketv.py @@ -137,7 +137,6 @@ class HKETVIE(InfoExtractor): 'width': w, 'height': h, }) - self._sort_formats(formats) subtitles = {} tracks = try_get(playlist0, lambda x: x['tracks'], list) or [] diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 48aa6e94a..8725c9436 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -227,7 +227,6 @@ class HotStarIE(HotStarBaseIE): if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) - self._sort_formats(formats) for f in formats: f.setdefault('http_headers', {}).update(headers) diff --git a/yt_dlp/extractor/howstuffworks.py b/yt_dlp/extractor/howstuffworks.py index c49c0899e..238fc0b42 100644 --- a/yt_dlp/extractor/howstuffworks.py +++ b/yt_dlp/extractor/howstuffworks.py @@ -75,8 +75,6 @@ class HowStuffWorksIE(InfoExtractor): 'vbr': vbr, }) - self._sort_formats(formats) - return { 'id': '%s' % video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/hrfensehen.py b/yt_dlp/extractor/hrfensehen.py index 447782019..35e9f67c4 100644 --- a/yt_dlp/extractor/hrfensehen.py +++ b/yt_dlp/extractor/hrfensehen.py @@ -58,8 +58,6 @@ class HRFernsehenIE(InfoExtractor): stream_format['tbr'] = int_or_none(quality_information.group(4)) stream_formats.append(stream_format) - - self._sort_formats(stream_formats) return stream_formats def _real_extract(self, url): diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py index 773ae0c9a..cfec80d14 100644 --- a/yt_dlp/extractor/hrti.py +++ b/yt_dlp/extractor/hrti.py @@ -144,7 +144,6 @@ class HRTiIE(HRTiBaseIE): formats = self._extract_m3u8_formats( m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) description = clean_html(title_info.get('summary_long')) age_limit = parse_age_limit(video.get('parental_control', {}).get('rating')) diff --git a/yt_dlp/extractor/hse.py b/yt_dlp/extractor/hse.py index 9faf46a5d..3cb21d2dd 100644 --- a/yt_dlp/extractor/hse.py +++ b/yt_dlp/extractor/hse.py @@ -25,7 +25,6 @@ class HSEShowBaseInfoExtractor(InfoExtractor): fmts, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, ext='mp4') formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) - self._sort_formats(formats) return formats, subtitles diff --git a/yt_dlp/extractor/huffpost.py b/yt_dlp/extractor/huffpost.py index 27ebc8b6c..69fdc34ef 100644 --- a/yt_dlp/extractor/huffpost.py +++ b/yt_dlp/extractor/huffpost.py @@ -79,8 +79,6 @@ class HuffPostIE(InfoExtractor): 'vcodec': 'none' if key.startswith('audio/') else None, }) - self._sort_formats(formats) - return { 'id': video_id, 'title': video_title, diff --git a/yt_dlp/extractor/hungama.py b/yt_dlp/extractor/hungama.py index 717f50a83..2e9939601 100644 --- a/yt_dlp/extractor/hungama.py +++ b/yt_dlp/extractor/hungama.py @@ -53,7 +53,6 @@ class HungamaIE(InfoExtractor): }) formats = self._extract_m3u8_formats(video_json['stream_url'], video_id, ext='mp4', m3u8_id='hls') - self._sort_formats(formats) json_ld = self._search_json_ld( self._download_webpage(url, video_id, fatal=False) or '', video_id, fatal=False) diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index c05e77c32..b6e9eec24 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -93,8 +93,6 @@ class HuyaLiveIE(InfoExtractor): **self._RESOLUTION.get(si.get('sDisplayName'), {}), }) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/icareus.py b/yt_dlp/extractor/icareus.py index dc7a2f0ba..d081cf42e 100644 --- a/yt_dlp/extractor/icareus.py +++ b/yt_dlp/extractor/icareus.py @@ -169,7 +169,6 @@ class IcareusIE(InfoExtractor): 'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail')) }] - self._sort_formats(formats) return merge_dicts({ 'id': video_id, 'title': None, diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index ffff36cc1..9d55ddc02 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -73,8 +73,6 @@ class IchinanaLiveIE(InfoExtractor): 'acodec': 'aac', }) - self._sort_formats(formats) - return { 'id': video_id, 'title': uploader or video_id, @@ -147,8 +145,6 @@ class IchinanaLiveClipIE(InfoExtractor): 'http_headers': {'Referer': url}, }) - self._sort_formats(formats) - return { 'id': video_id, 'title': uploader or video_id, diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py index bfb1e9d64..d4797d35e 100644 --- a/yt_dlp/extractor/ign.py +++ b/yt_dlp/extractor/ign.py @@ -102,8 +102,6 @@ class IGNIE(IGNBaseIE): 'url': mezzanine_url, }) - self._sort_formats(formats) - thumbnails = [] for thumbnail in (video.get('thumbnails') or []): thumbnail_url = thumbnail.get('url') diff --git a/yt_dlp/extractor/imdb.py b/yt_dlp/extractor/imdb.py index 74cab7dc1..557a3b7b7 100644 --- a/yt_dlp/extractor/imdb.py +++ b/yt_dlp/extractor/imdb.py @@ -100,7 +100,6 @@ class ImdbIE(InfoExtractor): 'ext': ext, 'quality': quality(format_id), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py index 5b8bfda96..8e220fd9f 100644 --- a/yt_dlp/extractor/imggaming.py +++ b/yt_dlp/extractor/imggaming.py @@ -103,7 +103,6 @@ class ImgGamingBaseIE(InfoExtractor): formats.extend(self._extract_mpd_formats( media_url, media_id, mpd_id='dash', fatal=False, headers=self._MANIFEST_HEADERS)) - self._sort_formats(formats) subtitles = {} for subtitle in video_data.get('subtitles', []): diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index 21c56d879..bff6ed57f 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -84,8 +84,6 @@ class ImgurIE(InfoExtractor): }, }) - self._sort_formats(formats) - return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/indavideo.py b/yt_dlp/extractor/indavideo.py index b397c168c..4fa97d8bb 100644 --- a/yt_dlp/extractor/indavideo.py +++ b/yt_dlp/extractor/indavideo.py @@ -89,7 +89,6 @@ class IndavideoEmbedIE(InfoExtractor): 'url': video_url, 'height': height, }) - self._sort_formats(formats) timestamp = video.get('date') if timestamp: diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 6b31701eb..192bcfe35 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -128,8 +128,6 @@ class InfoQIE(BokeCCBaseIE): + self._extract_http_video(webpage) + self._extract_http_audio(webpage, video_id)) - self._sort_formats(formats) - return { 'id': video_id, 'title': video_title, diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index fc08f377c..02335138f 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -155,7 +155,6 @@ class InstagramBaseIE(InfoExtractor): } for format in videos_list or []] if dash_manifest_raw: formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, media_id), mpd_id='dash')) - self._sort_formats(formats) thumbnails = [{ 'url': thumbnail.get('url'), @@ -494,7 +493,6 @@ class InstagramIE(InstagramBaseIE): dash = traverse_obj(media, ('dash_info', 'video_dash_manifest')) if dash: formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) - self._sort_formats(formats) comment_data = traverse_obj(media, ('edge_media_to_parent_comment', 'edges')) comments = [{ diff --git a/yt_dlp/extractor/internazionale.py b/yt_dlp/extractor/internazionale.py index c8f70785f..1b1cb574a 100644 --- a/yt_dlp/extractor/internazionale.py +++ b/yt_dlp/extractor/internazionale.py @@ -60,7 +60,6 @@ class InternazionaleIE(InfoExtractor): entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) formats.extend(self._extract_mpd_formats( video_base + 'mpd', display_id, mpd_id='dash', fatal=False)) - self._sort_formats(formats) timestamp = unified_timestamp(self._html_search_meta( 'article:published_time', webpage, 'timestamp')) diff --git a/yt_dlp/extractor/internetvideoarchive.py b/yt_dlp/extractor/internetvideoarchive.py index 6a8e30d73..9d2574cb0 100644 --- a/yt_dlp/extractor/internetvideoarchive.py +++ b/yt_dlp/extractor/internetvideoarchive.py @@ -48,7 +48,6 @@ class InternetVideoArchiveIE(InfoExtractor): replace_url('.mpd'), video_id, mpd_id='dash', fatal=False)) formats.extend(self._extract_ism_formats( replace_url('Manifest'), video_id, ism_id='mss', fatal=False)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index c98fe5b42..181820542 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -148,7 +148,6 @@ class IPrimaIE(InfoExtractor): elif manifest_type == 'DASH' or ext == 'mpd': formats += self._extract_mpd_formats( manifest_url, video_id, mpd_id='dash', fatal=False) - self._sort_formats(formats) final_result = self._search_json_ld(webpage, video_id, default={}) final_result.update({ @@ -248,8 +247,6 @@ class IPrimaCNNIE(InfoExtractor): if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: self.raise_geo_restricted(countries=['CZ'], metadata_available=True) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index bb77647f8..dbc688fb9 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -385,7 +385,6 @@ class IqiyiIE(InfoExtractor): self._sleep(5, video_id) - self._sort_formats(formats) title = (get_element_by_id('widget-videotitle', webpage) or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title')) @@ -667,8 +666,6 @@ class IqIE(InfoExtractor): }) formats.extend(extracted_formats) - self._sort_formats(formats) - for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict, default=[]): lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name')) subtitles.setdefault(lang, []).extend([{ diff --git a/yt_dlp/extractor/islamchannel.py b/yt_dlp/extractor/islamchannel.py index bac852b12..253a846b7 100644 --- a/yt_dlp/extractor/islamchannel.py +++ b/yt_dlp/extractor/islamchannel.py @@ -41,7 +41,6 @@ class IslamChannelIE(InfoExtractor): traverse_obj(show_stream, ('response', 'tokenization', 'url')), video_id, headers=headers) formats, subs = self._extract_m3u8_formats_and_subtitles(traverse_obj(streams, ('Streams', 'Adaptive')), video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 26d77a469..9ac7be307 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -172,7 +172,6 @@ class ITVIE(InfoExtractor): formats.append({ 'url': href, }) - self._sort_formats(formats) info = self._search_json_ld(webpage, video_id, default={}) if not info: json_ld = self._parse_json(self._search_regex( diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 6772fcbb9..dc6a48196 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -166,7 +166,6 @@ class IviIE(InfoExtractor): 'quality': quality(content_format), 'filesize': int_or_none(f.get('size_in_bytes')), }) - self._sort_formats(formats) compilation = result.get('compilation') episode = title if compilation else None diff --git a/yt_dlp/extractor/ivideon.py b/yt_dlp/extractor/ivideon.py index 538a961b7..7d1e554c2 100644 --- a/yt_dlp/extractor/ivideon.py +++ b/yt_dlp/extractor/ivideon.py @@ -67,7 +67,6 @@ class IvideonIE(InfoExtractor): 'ext': 'flv', 'quality': quality(format_id), } for format_id in self._QUALITIES] - self._sort_formats(formats) return { 'id': server_id, diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index f77c5d44d..ec3e59c6d 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -116,8 +116,6 @@ class IwaraIE(IwaraBaseIE): 'quality': 1 if format_id == 'Source' else 0, }) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/ixigua.py b/yt_dlp/extractor/ixigua.py index 163edf480..1f086d2bd 100644 --- a/yt_dlp/extractor/ixigua.py +++ b/yt_dlp/extractor/ixigua.py @@ -67,7 +67,6 @@ class IxiguaIE(InfoExtractor): json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video'] formats = list(self._media_selector(json_data.get('videoResource'))) - self._sort_formats(formats) return { 'id': video_id, 'title': json_data.get('title'), diff --git a/yt_dlp/extractor/izlesene.py b/yt_dlp/extractor/izlesene.py index 6520ecf6d..5cdf8709d 100644 --- a/yt_dlp/extractor/izlesene.py +++ b/yt_dlp/extractor/izlesene.py @@ -78,7 +78,6 @@ class IzleseneIE(InfoExtractor): 'ext': ext, 'height': height, }) - self._sort_formats(formats) description = self._og_search_description(webpage, default=None) thumbnail = video.get('posterURL') or self._proto_relative_url( diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py index 6840654cc..84c3225e4 100644 --- a/yt_dlp/extractor/jable.py +++ b/yt_dlp/extractor/jable.py @@ -45,7 +45,6 @@ class JableIE(InfoExtractor): webpage = self._download_webpage(url, video_id) formats = self._extract_m3u8_formats( self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index 578e57a67..a2bbba397 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -87,7 +87,6 @@ class JamendoIE(InfoExtractor): ('ogg1', 'ogg', 'ogg'), ('flac', 'flac', 'flac'), ))] - self._sort_formats(formats) urls = [] thumbnails = [] diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py index f2f50db7a..6c650568a 100644 --- a/yt_dlp/extractor/japandiet.py +++ b/yt_dlp/extractor/japandiet.py @@ -122,7 +122,6 @@ class ShugiinItvLiveRoomIE(ShugiinItvBaseIE): formats, subtitles = self._extract_m3u8_formats_and_subtitles( f'https://hlslive.shugiintv.go.jp/{room_id}/amlst:{room_id}/playlist.m3u8', room_id, ext='mp4') - self._sort_formats(formats) return { 'id': room_id, @@ -160,7 +159,6 @@ class ShugiinItvVodIE(ShugiinItvBaseIE): m3u8_url = re.sub(r'^http://', 'https://', m3u8_url) formats, subtitles = self._extract_m3u8_formats_and_subtitles( m3u8_url, video_id, ext='mp4') - self._sort_formats(formats) title = self._html_search_regex( (r'<td\s+align="left">(.+)\s*\(\d+分\)', @@ -264,7 +262,6 @@ class SangiinIE(InfoExtractor): 'm3u8 url', group=2) formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/jixie.py b/yt_dlp/extractor/jixie.py index 7480af050..4830e61c1 100644 --- a/yt_dlp/extractor/jixie.py +++ b/yt_dlp/extractor/jixie.py @@ -31,7 +31,6 @@ class JixieBaseIE(InfoExtractor): 'ext': 'mp4', }) - self._sort_formats(formats) return { 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py index 298b37823..9b622845a 100644 --- a/yt_dlp/extractor/joj.py +++ b/yt_dlp/extractor/joj.py @@ -81,7 +81,6 @@ class JojIE(InfoExtractor): r'(\d+)[pP]', format_id or path, 'height', default=None)), }) - self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index a5014d931..1f0f0a5d5 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -120,7 +120,6 @@ class KakaoIE(InfoExtractor): 'filesize': int_or_none(fmt.get('filesize')), 'tbr': int_or_none(fmt.get('kbps')), }) - self._sort_formats(formats) thumbs = [] for thumb in clip.get('clipChapterThumbnailList') or []: diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index 677f989a7..95e2deea5 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -518,8 +518,6 @@ class KalturaIE(InfoExtractor): formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - self._sort_formats(formats) - if captions: for caption in captions.get('objects', []): # Continue if caption is not ready diff --git a/yt_dlp/extractor/keezmovies.py b/yt_dlp/extractor/keezmovies.py index 1c2d5c01c..b50da420c 100644 --- a/yt_dlp/extractor/keezmovies.py +++ b/yt_dlp/extractor/keezmovies.py @@ -5,7 +5,6 @@ from ..aes import aes_decrypt_text from ..compat import compat_urllib_parse_unquote from ..utils import ( determine_ext, - ExtractorError, format_field, int_or_none, str_to_int, @@ -103,12 +102,6 @@ class KeezMoviesIE(InfoExtractor): self.raise_no_formats( 'Video %s is no longer available' % video_id, expected=True) - try: - self._sort_formats(formats) - except ExtractorError: - if fatal: - raise - if not title: title = self._html_search_regex( r'<h1[^>]*>([^<]+)', webpage, 'title') diff --git a/yt_dlp/extractor/kelbyone.py b/yt_dlp/extractor/kelbyone.py index dea056c12..2ca9ad426 100644 --- a/yt_dlp/extractor/kelbyone.py +++ b/yt_dlp/extractor/kelbyone.py @@ -59,7 +59,6 @@ class KelbyOneIE(InfoExtractor): subtitles.setdefault('en', []).append({ 'url': track['file'], }) - self._sort_formats(formats) yield { 'id': video_id, 'title': item['title'], diff --git a/yt_dlp/extractor/kinja.py b/yt_dlp/extractor/kinja.py index 3747d8eea..df1386fb8 100644 --- a/yt_dlp/extractor/kinja.py +++ b/yt_dlp/extractor/kinja.py @@ -147,7 +147,6 @@ class KinjaEmbedIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) thumbnail = None poster = data.get('poster') or {} @@ -195,8 +194,6 @@ class KinjaEmbedIE(InfoExtractor): 'url': fallback_rendition_url, }) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/kinopoisk.py b/yt_dlp/extractor/kinopoisk.py index 84a2489a3..5db908349 100644 --- a/yt_dlp/extractor/kinopoisk.py +++ b/yt_dlp/extractor/kinopoisk.py @@ -44,7 +44,6 @@ class KinoPoiskIE(InfoExtractor): formats = self._extract_m3u8_formats( data['playlistEntity']['uri'], video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) description = dict_get( film, ('descriptscription', 'description', diff --git a/yt_dlp/extractor/konserthusetplay.py b/yt_dlp/extractor/konserthusetplay.py index 1e177c363..10767f1b6 100644 --- a/yt_dlp/extractor/konserthusetplay.py +++ b/yt_dlp/extractor/konserthusetplay.py @@ -95,8 +95,6 @@ class KonserthusetPlayIE(InfoExtractor): 'url': fallback_url, }) - self._sort_formats(formats) - title = player_config.get('title') or media['title'] description = player_config.get('mediaInfo', {}).get('description') thumbnail = media.get('image') diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py index 892d355ba..9cfec5eb9 100644 --- a/yt_dlp/extractor/koo.py +++ b/yt_dlp/extractor/koo.py @@ -101,7 +101,6 @@ class KooIE(InfoExtractor): if not formats: self.raise_no_formats('No video/audio found at the provided url.', expected=True) - self._sort_formats(formats) return { 'id': id, 'title': clean_html(item_json.get('title')), diff --git a/yt_dlp/extractor/kusi.py b/yt_dlp/extractor/kusi.py index 4fec2c2b2..a23ad8945 100644 --- a/yt_dlp/extractor/kusi.py +++ b/yt_dlp/extractor/kusi.py @@ -71,7 +71,6 @@ class KUSIIE(InfoExtractor): 'width': int_or_none(quality.attrib.get('width')), 'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py index 0c9518e66..cfec1c50f 100644 --- a/yt_dlp/extractor/kuwo.py +++ b/yt_dlp/extractor/kuwo.py @@ -104,7 +104,6 @@ class KuwoIE(KuwoBaseIE): lrc_content = None formats = self._get_formats(song_id) - self._sort_formats(formats) album_id = self._html_search_regex( r'<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"', @@ -339,8 +338,6 @@ class KuwoMvIE(KuwoBaseIE): 'format_id': 'mv', }) - self._sort_formats(formats) - return { 'id': song_id, 'title': song_name, diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py index 8ce44cc13..68dc1d4df 100644 --- a/yt_dlp/extractor/la7.py +++ b/yt_dlp/extractor/la7.py @@ -78,8 +78,6 @@ class LA7IE(InfoExtractor): if http_f: formats.append(http_f) - self._sort_formats(formats) - return { 'id': video_id, 'title': self._og_search_title(webpage, default=None), @@ -136,7 +134,6 @@ class LA7PodcastEpisodeIE(InfoExtractor): 'format_id': ext, 'ext': ext, }] - self._sort_formats(formats) title = self._html_search_regex( (r'<div class="title">(?P<title>.+?)</', diff --git a/yt_dlp/extractor/laola1tv.py b/yt_dlp/extractor/laola1tv.py index a90ed16a0..416dd7eb4 100644 --- a/yt_dlp/extractor/laola1tv.py +++ b/yt_dlp/extractor/laola1tv.py @@ -49,7 +49,6 @@ class Laola1TvEmbedIE(InfoExtractor): formats = self._extract_akamai_formats( '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']), video_id) - self._sort_formats(formats) return formats def _real_extract(self, url): diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index b2b61abac..b5def1e07 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -250,7 +250,6 @@ class LBRYIE(LBRYBaseIE): if determine_ext(final_url) == 'm3u8': info['formats'] = self._extract_m3u8_formats( final_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', live=is_live, headers=headers) - self._sort_formats(info['formats']) else: info['url'] = streaming_url return { diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py index bee4e7587..3a9b30a3c 100644 --- a/yt_dlp/extractor/lecture2go.py +++ b/yt_dlp/extractor/lecture2go.py @@ -49,8 +49,6 @@ class Lecture2GoIE(InfoExtractor): 'url': url, }) - self._sort_formats(formats) - creator = self._html_search_regex( r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False) duration = parse_duration(self._html_search_regex( diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index c3d0cb193..973764c63 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -137,7 +137,6 @@ class LecturioIE(LecturioBaseIE): 'height': int(mobj.group(1)), }) formats.append(f) - self._sort_formats(formats) subtitles = {} automatic_captions = {} diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py index 258e396cb..85033b8f8 100644 --- a/yt_dlp/extractor/leeco.py +++ b/yt_dlp/extractor/leeco.py @@ -182,7 +182,6 @@ class LeIE(InfoExtractor): f['height'] = int_or_none(format_id[:-1]) formats.append(f) - self._sort_formats(formats, ('res', 'quality')) publish_time = parse_iso8601(self._html_search_regex( r'发布时间 ([^<>]+) ', page, 'publish time', default=None), @@ -196,6 +195,7 @@ class LeIE(InfoExtractor): 'thumbnail': playurl['pic'], 'description': description, 'timestamp': publish_time, + '_format_sort_fields': ('res', 'quality'), } @@ -356,7 +356,6 @@ class LetvCloudIE(InfoExtractor): media_id = uu + '_' + vu formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id) - self._sort_formats(formats) return { 'id': media_id, diff --git a/yt_dlp/extractor/lego.py b/yt_dlp/extractor/lego.py index 7d0238a1f..811b44758 100644 --- a/yt_dlp/extractor/lego.py +++ b/yt_dlp/extractor/lego.py @@ -113,7 +113,6 @@ class LEGOIE(InfoExtractor): 'width': quality[2], }), formats.append(f) - self._sort_formats(formats) subtitles = {} sub_file_id = video.get('SubFileId') diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py index afe3c98a1..b76ca0908 100644 --- a/yt_dlp/extractor/libraryofcongress.py +++ b/yt_dlp/extractor/libraryofcongress.py @@ -126,8 +126,6 @@ class LibraryOfCongressIE(InfoExtractor): 'filesize_approx': parse_filesize(m.group('size')), }) - self._sort_formats(formats) - duration = float_or_none(data.get('duration')) view_count = int_or_none(data.get('viewCount')) diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py index 8c7d2064d..919cfcb37 100644 --- a/yt_dlp/extractor/lifenews.py +++ b/yt_dlp/extractor/lifenews.py @@ -223,8 +223,6 @@ class LifeEmbedIE(InfoExtractor): else: extract_original(video_url) - self._sort_formats(formats) - thumbnail = thumbnail or self._search_regex( r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None) diff --git a/yt_dlp/extractor/likee.py b/yt_dlp/extractor/likee.py index b53e7a5ca..74ee2bea9 100644 --- a/yt_dlp/extractor/likee.py +++ b/yt_dlp/extractor/likee.py @@ -122,7 +122,6 @@ class LikeeIE(InfoExtractor): 'width': info.get('video_width'), 'quality': 1, }] - self._sort_formats(formats) return { 'id': video_id, 'title': info.get('msgText'), diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index 90065094b..e11ec43d6 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -179,8 +179,6 @@ class LimelightBaseIE(InfoExtractor): 'ext': ext, }) - self._sort_formats(formats) - subtitles = {} for flag in mobile_item.get('flags'): if flag == 'ClosedCaptions': diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py index 09c512e50..3fab9c8a5 100644 --- a/yt_dlp/extractor/line.py +++ b/yt_dlp/extractor/line.py @@ -98,7 +98,6 @@ class LineLiveIE(LineLiveBaseIE): archive_status = item.get('archiveStatus') if archive_status != 'ARCHIVED': self.raise_no_formats('this video has been ' + archive_status.lower(), expected=True) - self._sort_formats(formats) info['formats'] = formats return info diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 27f1080b4..2bf2e9a11 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -111,8 +111,6 @@ class LinkedInIE(LinkedInBaseIE): 'tbr': float_or_none(source.get('data-bitrate'), scale=1000), } for source in sources] - self._sort_formats(formats) - return { 'id': video_id, 'formats': formats, @@ -187,10 +185,6 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): streaming_url, video_slug, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - # It seems like this would be correctly handled by default - # However, unless someone can confirm this, the old - # behaviour is being kept as-is - self._sort_formats(formats, ('res', 'source_preference')) subtitles = {} duration = int_or_none(video_data.get('durationInSeconds')) transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list) @@ -208,6 +202,10 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): 'timestamp': float_or_none(video_data.get('publishedOn'), 1000), 'duration': duration, 'subtitles': subtitles, + # It seems like this would be correctly handled by default + # However, unless someone can confirm this, the old + # behaviour is being kept as-is + '_format_sort_fields': ('res', 'source_preference') } diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py index bf22855a9..a570248b7 100644 --- a/yt_dlp/extractor/linuxacademy.py +++ b/yt_dlp/extractor/linuxacademy.py @@ -218,7 +218,6 @@ class LinuxAcademyIE(InfoExtractor): formats = self._extract_m3u8_formats( m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) info = { 'id': item_id, 'formats': formats, diff --git a/yt_dlp/extractor/livestream.py b/yt_dlp/extractor/livestream.py index 70449dce5..d883eafcf 100644 --- a/yt_dlp/extractor/livestream.py +++ b/yt_dlp/extractor/livestream.py @@ -126,7 +126,6 @@ class LivestreamIE(InfoExtractor): if f4m_url: formats.extend(self._extract_f4m_formats( f4m_url, video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) comments = [{ 'author_id': comment.get('author_id'), @@ -171,7 +170,6 @@ class LivestreamIE(InfoExtractor): 'url': rtsp_url, 'format_id': 'rtsp', }) - self._sort_formats(formats) return { 'id': broadcast_id, @@ -300,7 +298,6 @@ class LivestreamOriginalIE(InfoExtractor): 'format_id': 'rtsp', }) - self._sort_formats(formats) return formats def _extract_folder(self, url, folder_id): diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py index 9ea08ec5a..6282d2eaf 100644 --- a/yt_dlp/extractor/lnkgo.py +++ b/yt_dlp/extractor/lnkgo.py @@ -67,7 +67,6 @@ class LnkGoIE(InfoExtractor): formats = self._extract_m3u8_formats( self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''), video_id, 'mp4', 'm3u8_native') - self._sort_formats(formats) return { 'id': video_id, @@ -149,7 +148,6 @@ class LnkIE(InfoExtractor): formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) - self._sort_formats(formats) return { 'id': id, 'title': video_json.get('title'), diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index a49fd592f..80d4d1cdb 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -37,7 +37,6 @@ class LRTStreamIE(LRTBaseIE): fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, 'mp4', m3u8_id='hls', live=True) formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) - self._sort_formats(formats) stream_title = self._extract_js_var(webpage, 'video_title', 'LRT') return { diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py index 1ae7f9d4f..768ce913e 100644 --- a/yt_dlp/extractor/lynda.py +++ b/yt_dlp/extractor/lynda.py @@ -157,7 +157,6 @@ class LyndaIE(LyndaBaseIE): 'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id, 'height': int_or_none(format_id), }) - self._sort_formats(formats) conviva = self._download_json( 'https://www.lynda.com/ajax/player/conviva', video_id, @@ -207,7 +206,6 @@ class LyndaIE(LyndaBaseIE): } for format_id, video_url in prioritized_stream.items()]) self._check_formats(formats, video_id) - self._sort_formats(formats) subtitles = self.extract_subtitles(video_id) diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py index 5f30d0eaa..387d211fe 100644 --- a/yt_dlp/extractor/mailru.py +++ b/yt_dlp/extractor/mailru.py @@ -160,7 +160,6 @@ class MailRuIE(InfoExtractor): 'height': height, 'http_headers': headers, }) - self._sort_formats(formats) meta_data = video_data['meta'] title = remove_end(meta_data['title'], '.mp4') diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py index 213a1df57..fe5589d59 100644 --- a/yt_dlp/extractor/mainstreaming.py +++ b/yt_dlp/extractor/mainstreaming.py @@ -197,8 +197,6 @@ class MainStreamingIE(InfoExtractor): subtitles = self._merge_subtitles(m3u8_subs, mpd_subs) formats.extend(m3u8_formats + mpd_formats) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/malltv.py b/yt_dlp/extractor/malltv.py index 02f226be5..e1031d8da 100644 --- a/yt_dlp/extractor/malltv.py +++ b/yt_dlp/extractor/malltv.py @@ -72,7 +72,6 @@ class MallTVIE(InfoExtractor): formats = self._extract_m3u8_formats( video['VideoSource'], video_id, 'mp4', 'm3u8_native') - self._sort_formats(formats) subtitles = {} for s in (video.get('Subtitles') or {}): diff --git a/yt_dlp/extractor/mangomolo.py b/yt_dlp/extractor/mangomolo.py index 568831aa8..efaf66fa2 100644 --- a/yt_dlp/extractor/mangomolo.py +++ b/yt_dlp/extractor/mangomolo.py @@ -44,7 +44,6 @@ class MangomoloBaseIE(InfoExtractor): ], webpage, 'format url') formats = self._extract_wowza_formats( format_url, page_id, m3u8_entry_protocol, ['smil']) - self._sort_formats(formats) return { 'id': page_id, diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py index dc8653f5d..2792e6e70 100644 --- a/yt_dlp/extractor/manoto.py +++ b/yt_dlp/extractor/manoto.py @@ -54,7 +54,6 @@ class ManotoTVIE(InfoExtractor): episode_json = self._download_json(_API_URL.format('showmodule', 'episodedetails', video_id), video_id) details = episode_json.get('details', {}) formats = self._extract_m3u8_formats(details.get('videoM3u8Url'), video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, 'series': details.get('showTitle'), @@ -126,7 +125,6 @@ class ManotoTVLiveIE(InfoExtractor): details = json.get('details', {}) video_url = details.get('liveUrl') formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True) - self._sort_formats(formats) return { 'id': video_id, 'title': 'Manoto TV Live', diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index 63ff5f054..741745378 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -135,8 +135,6 @@ class ManyVidsIE(InfoExtractor): if 'transcoded' in f['format_id']: f['preference'] = f.get('preference', -1) - 1 - self._sort_formats(formats) - def get_likes(): likes = self._search_regex( r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ), diff --git a/yt_dlp/extractor/massengeschmacktv.py b/yt_dlp/extractor/massengeschmacktv.py index 4508e4391..7dacb43e0 100644 --- a/yt_dlp/extractor/massengeschmacktv.py +++ b/yt_dlp/extractor/massengeschmacktv.py @@ -65,8 +65,6 @@ class MassengeschmackTVIE(InfoExtractor): 'vcodec': 'none' if format_id.startswith('Audio') else None, }) - self._sort_formats(formats) - return { 'id': episode, 'title': title, diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py index d1ce07f10..716f1c961 100644 --- a/yt_dlp/extractor/masters.py +++ b/yt_dlp/extractor/masters.py @@ -25,7 +25,6 @@ class MastersIE(InfoExtractor): f'https://www.masters.com/relatedcontent/rest/v2/masters_v1/en/content/masters_v1_{video_id}_en', video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles(traverse_obj(content_resp, ('media', 'm3u8')), video_id, 'mp4') - self._sort_formats(formats) thumbnails = [{'id': name, 'url': url} for name, url in traverse_obj(content_resp, ('images', 0), default={}).items()] diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py index 94ae20b26..a67fa9fe4 100644 --- a/yt_dlp/extractor/matchtv.py +++ b/yt_dlp/extractor/matchtv.py @@ -43,7 +43,6 @@ class MatchTVIE(InfoExtractor): })['data']['videoUrl'] f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') formats = self._extract_f4m_formats(f4m_url, video_id) - self._sort_formats(formats) return { 'id': video_id, 'title': 'Матч ТВ - Прямой эфир', diff --git a/yt_dlp/extractor/mdr.py b/yt_dlp/extractor/mdr.py index b44cf809a..49f5b49a4 100644 --- a/yt_dlp/extractor/mdr.py +++ b/yt_dlp/extractor/mdr.py @@ -162,8 +162,6 @@ class MDRIE(InfoExtractor): formats.append(f) - self._sort_formats(formats) - description = xpath_text(doc, './broadcast/broadcastDescription', 'description') timestamp = parse_iso8601( xpath_text( diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 80efcc764..82be823b8 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -150,8 +150,6 @@ class MedalTVIE(InfoExtractor): 'An unknown error occurred ({0}).'.format(error), video_id=video_id) - self._sort_formats(formats) - # Necessary because the id of the author is not known in advance. # Won't raise an issue if no profile can be found as this is optional. author = traverse_obj(api_response, ('pageProps', 'profile')) or {} diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index f9a449377..46365081b 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -89,7 +89,6 @@ class MediaKlikkIE(InfoExtractor): formats = self._extract_wowza_formats( playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash']) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/medialaan.py b/yt_dlp/extractor/medialaan.py index 6daa50846..bce20dcfd 100644 --- a/yt_dlp/extractor/medialaan.py +++ b/yt_dlp/extractor/medialaan.py @@ -100,7 +100,6 @@ class MedialaanIE(InfoExtractor): 'ext': ext, 'url': src, }) - self._sort_formats(formats) return { 'id': production_id, diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index a3b5491d2..61bdb2a3f 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -247,8 +247,6 @@ class MediasetIE(ThePlatformBaseIE): if (first_e or geo_e) and not formats: raise geo_e or first_e - self._sort_formats(formats) - feed_data = self._download_json( 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/' + guid, guid, fatal=False) diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index 0ffd01cd2..fe549c49f 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -264,8 +264,6 @@ class MediasiteIE(InfoExtractor): }) formats.extend(stream_formats) - self._sort_formats(formats) - # XXX: Presentation['Presenters'] # XXX: Presentation['Transcript'] diff --git a/yt_dlp/extractor/mediaworksnz.py b/yt_dlp/extractor/mediaworksnz.py index 651239bd4..62e37d24a 100644 --- a/yt_dlp/extractor/mediaworksnz.py +++ b/yt_dlp/extractor/mediaworksnz.py @@ -90,8 +90,6 @@ class MediaWorksNZVODIE(InfoExtractor): audio_format.setdefault('acodec', 'aac') formats.append(audio_format) - self._sort_formats(formats) - return { 'id': video_id, 'title': asset.get('title'), diff --git a/yt_dlp/extractor/megatvcom.py b/yt_dlp/extractor/megatvcom.py index 54c7b7f9f..2f3f11f51 100644 --- a/yt_dlp/extractor/megatvcom.py +++ b/yt_dlp/extractor/megatvcom.py @@ -87,7 +87,6 @@ class MegaTVComIE(MegaTVComBaseIE): formats, subs = [{'url': source}], {} if player_attrs.get('subs'): self._merge_subtitles({'und': [{'url': player_attrs['subs']}]}, target=subs) - self._sort_formats(formats) return { 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/melonvod.py b/yt_dlp/extractor/melonvod.py index 0cbc961c4..1d3fff856 100644 --- a/yt_dlp/extractor/melonvod.py +++ b/yt_dlp/extractor/melonvod.py @@ -44,7 +44,6 @@ class MelonVODIE(InfoExtractor): formats = self._extract_m3u8_formats( stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls') - self._sort_formats(formats) artist_list = play_info.get('artistList') artist = None diff --git a/yt_dlp/extractor/metacafe.py b/yt_dlp/extractor/metacafe.py index 048c74e68..d7f5def0e 100644 --- a/yt_dlp/extractor/metacafe.py +++ b/yt_dlp/extractor/metacafe.py @@ -267,7 +267,6 @@ class MetacafeIE(InfoExtractor): 'url': video_url, 'ext': video_ext, }] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/metacritic.py b/yt_dlp/extractor/metacritic.py index 543bdffad..14410549a 100644 --- a/yt_dlp/extractor/metacritic.py +++ b/yt_dlp/extractor/metacritic.py @@ -49,7 +49,6 @@ class MetacriticIE(InfoExtractor): 'format_id': rate_str, 'tbr': int(rate_str), }) - self._sort_formats(formats) description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>', webpage, 'description', flags=re.DOTALL) diff --git a/yt_dlp/extractor/mgoon.py b/yt_dlp/extractor/mgoon.py index c41c51384..2388a7192 100644 --- a/yt_dlp/extractor/mgoon.py +++ b/yt_dlp/extractor/mgoon.py @@ -68,7 +68,6 @@ class MgoonIE(InfoExtractor): 'ext': fmt['format'], }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py index 37594d12d..edc92b371 100644 --- a/yt_dlp/extractor/mgtv.py +++ b/yt_dlp/extractor/mgtv.py @@ -117,7 +117,6 @@ class MGTVIE(InfoExtractor): }, 'format_note': stream.get('name'), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/microsoftembed.py b/yt_dlp/extractor/microsoftembed.py index 1425a0159..f71ab3e92 100644 --- a/yt_dlp/extractor/microsoftembed.py +++ b/yt_dlp/extractor/microsoftembed.py @@ -39,7 +39,6 @@ class MicrosoftEmbedIE(InfoExtractor): 'height': source.get('heightPixels'), 'width': source.get('widthPixels'), }) - self._sort_formats(formats) subtitles = { lang: [{ diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py index 2dde82a75..9b50996b7 100644 --- a/yt_dlp/extractor/microsoftstream.py +++ b/yt_dlp/extractor/microsoftstream.py @@ -101,7 +101,6 @@ class MicrosoftStreamIE(InfoExtractor): playlist['playbackUrl'], video_id, ism_id='mss', fatal=False, headers=headers)) formats = [merge_dicts(f, {'language': language}) for f in formats] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/microsoftvirtualacademy.py b/yt_dlp/extractor/microsoftvirtualacademy.py index f15f00ee5..b759b1860 100644 --- a/yt_dlp/extractor/microsoftvirtualacademy.py +++ b/yt_dlp/extractor/microsoftvirtualacademy.py @@ -93,7 +93,6 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): 'acodec': acodec, 'vcodec': vcodec, }) - self._sort_formats(formats) subtitles = {} for source in settings.findall('.//MarkerResourceSource'): diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index c7a61dfa0..f64d575dc 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -74,8 +74,6 @@ class MildomIE(MildomBaseIE): for fmt in formats: fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/' - self._sort_formats(formats) - return { 'id': result_video_id, 'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'), @@ -166,8 +164,6 @@ class MildomVodIE(MildomBaseIE): 'ext': 'mp4' }) - self._sort_formats(formats) - return { 'id': video_id, 'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'), diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py index 85dd5fd79..2fb17920c 100644 --- a/yt_dlp/extractor/minds.py +++ b/yt_dlp/extractor/minds.py @@ -92,7 +92,6 @@ class MindsIE(MindsBaseIE): 'height': int_or_none(source.get('size')), 'url': src, }) - self._sort_formats(formats) entity = video.get('entity') or entity owner = entity.get('ownerObj') or {} diff --git a/yt_dlp/extractor/minoto.py b/yt_dlp/extractor/minoto.py index e799cd3bc..8d18179c7 100644 --- a/yt_dlp/extractor/minoto.py +++ b/yt_dlp/extractor/minoto.py @@ -35,7 +35,6 @@ class MinotoIE(InfoExtractor): 'height': int_or_none(fmt.get('height')), **parse_codecs(fmt.get('codecs')), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py index 8192f2b46..0a8ee0c3a 100644 --- a/yt_dlp/extractor/mirrativ.py +++ b/yt_dlp/extractor/mirrativ.py @@ -55,7 +55,6 @@ class MirrativIE(MirrativBaseIE): hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', live=is_live) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py index becc56a2b..fb5a08ca2 100644 --- a/yt_dlp/extractor/mixcloud.py +++ b/yt_dlp/extractor/mixcloud.py @@ -169,8 +169,6 @@ class MixcloudIE(MixcloudBaseIE): if not formats and cloudcast.get('isExclusive'): self.raise_login_required(metadata_available=True) - self._sort_formats(formats) - comments = [] for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []): node = edge.get('node') or {} diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index 2f0f2deab..72057dc97 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -54,7 +54,6 @@ class MLBBaseIE(InfoExtractor): 'width': int(mobj.group(1)), }) formats.append(f) - self._sort_formats(formats) thumbnails = [] for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []): @@ -339,7 +338,6 @@ class MLBTVIE(InfoExtractor): formats.extend(f) self._merge_subtitles(s, target=subtitles) - self._sort_formats(formats) return { 'id': video_id, 'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False), diff --git a/yt_dlp/extractor/mnet.py b/yt_dlp/extractor/mnet.py index 65e3d476a..98bab2e10 100644 --- a/yt_dlp/extractor/mnet.py +++ b/yt_dlp/extractor/mnet.py @@ -59,7 +59,6 @@ class MnetIE(InfoExtractor): m3u8_url += '?' + token formats = self._extract_wowza_formats( m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m']) - self._sort_formats(formats) description = info.get('ment') duration = parse_duration(info.get('time')) diff --git a/yt_dlp/extractor/mocha.py b/yt_dlp/extractor/mocha.py index 27d2d9c2c..5f72b810b 100644 --- a/yt_dlp/extractor/mocha.py +++ b/yt_dlp/extractor/mocha.py @@ -42,8 +42,6 @@ class MochaVideoIE(InfoExtractor): formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - self._sort_formats(formats) - return { 'id': video_id, 'display_id': json_data.get('slug') or video_slug, diff --git a/yt_dlp/extractor/moviezine.py b/yt_dlp/extractor/moviezine.py index 5757322d6..cffcdcfb5 100644 --- a/yt_dlp/extractor/moviezine.py +++ b/yt_dlp/extractor/moviezine.py @@ -29,8 +29,6 @@ class MoviezineIE(InfoExtractor): 'ext': 'mp4', }] - self._sort_formats(formats) - return { 'id': video_id, 'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'), diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py index 6f4935e51..f91c53eba 100644 --- a/yt_dlp/extractor/msn.py +++ b/yt_dlp/extractor/msn.py @@ -131,7 +131,6 @@ class MSNIE(InfoExtractor): 'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)), 'quality': 1 if format_id == '1001' else None, }) - self._sort_formats(formats) subtitles = {} for file_ in video.get('files', []): diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index b2009dc5b..d91be6270 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -102,8 +102,6 @@ class MTVServicesInfoExtractor(InfoExtractor): }]) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') - if formats: - self._sort_formats(formats) return formats def _extract_subtitles(self, mdoc, mtvn_id): @@ -202,8 +200,6 @@ class MTVServicesInfoExtractor(InfoExtractor): if not formats: return None - self._sort_formats(formats) - return { 'title': title, 'formats': formats, diff --git a/yt_dlp/extractor/muenchentv.py b/yt_dlp/extractor/muenchentv.py index b9681d1bd..36a2d4688 100644 --- a/yt_dlp/extractor/muenchentv.py +++ b/yt_dlp/extractor/muenchentv.py @@ -60,7 +60,6 @@ class MuenchenTVIE(InfoExtractor): 'format_id': format_id, 'preference': -100 if '.smil' in s['file'] else 0, # Strictly inferior than all other formats? }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/mwave.py b/yt_dlp/extractor/mwave.py index 0cbb16736..efbfd9d43 100644 --- a/yt_dlp/extractor/mwave.py +++ b/yt_dlp/extractor/mwave.py @@ -47,7 +47,6 @@ class MwaveIE(InfoExtractor): continue formats.extend( self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/myspace.py b/yt_dlp/extractor/myspace.py index 63d36c30a..345109837 100644 --- a/yt_dlp/extractor/myspace.py +++ b/yt_dlp/extractor/myspace.py @@ -122,7 +122,6 @@ class MySpaceIE(InfoExtractor): else: raise ExtractorError( 'Found song but don\'t know how to download it') - self._sort_formats(formats) return { 'id': video_id, 'title': self._og_search_title(webpage), @@ -140,7 +139,6 @@ class MySpaceIE(InfoExtractor): video.get('streamUrl'), video.get('hlsStreamUrl'), video.get('mp4StreamUrl'), int_or_none(video.get('width')), int_or_none(video.get('height'))) - self._sort_formats(formats) return { 'id': video_id, 'title': video['title'], diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py index cc0ff533e..55345f398 100644 --- a/yt_dlp/extractor/n1.py +++ b/yt_dlp/extractor/n1.py @@ -24,8 +24,6 @@ class N1InfoAssetIE(InfoExtractor): formats = self._extract_m3u8_formats( url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - self._sort_formats(formats) - return { 'id': video_id, 'title': video_id, diff --git a/yt_dlp/extractor/nate.py b/yt_dlp/extractor/nate.py index c83b2acbd..5e74caa7f 100644 --- a/yt_dlp/extractor/nate.py +++ b/yt_dlp/extractor/nate.py @@ -68,7 +68,6 @@ class NateIE(InfoExtractor): 'height': self._QUALITY.get(f_url[-2:]), 'quality': int_or_none(f_url[-2:]), } for f_url in video_data.get('smcUriList') or []] - self._sort_formats(formats) return { 'id': id, 'title': video_data.get('clipTitle'), diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index 3c4e73535..b5425c744 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -67,7 +67,6 @@ class NaverBaseIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( update_url_query(stream_url, query), video_id, 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False)) - self._sort_formats(formats) replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x) @@ -239,7 +238,6 @@ class NaverLiveIE(InfoExtractor): quality.get('url'), video_id, 'mp4', m3u8_id=quality.get('qualityId'), live=True )) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index e95c1b795..d8fc82488 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -92,7 +92,6 @@ class NBAWatchBaseIE(NBACVPBaseIE): formats.extend(cvp_info['formats']) info = merge_dicts(info, cvp_info) - self._sort_formats(formats) info['formats'] = formats return info @@ -318,7 +317,6 @@ class NBABaseIE(NBACVPBaseIE): subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles']) info = merge_dicts(info, cvp_info) - self._sort_formats(formats) else: info.update(self._embed_url_result(team, video['videoId'])) diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index dbc82de9f..1ea6355b5 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -305,7 +305,6 @@ class NBCSportsStreamIE(AdobePassIE): 'resourceId': base64.b64encode(resource.encode()).decode(), }).encode())['tokenizedUrl'] formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, 'title': title, @@ -437,7 +436,6 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'tbr': tbr, 'ext': 'mp4', }) - self._sort_formats(formats) subtitles = {} closed_captioning = video_data.get('closedCaptioning') @@ -581,7 +579,6 @@ class NBCOlympicsStreamIE(AdobePassIE): # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to # download with ffmpeg without this option f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']} - self._sort_formats(formats) return { 'id': pid, @@ -745,7 +742,6 @@ class NBCStationsIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( manifest_url, video_id, 'mp4', headers=headers, m3u8_id='hls', fatal=live, live=live, errnote='No HLS formats found')) - self._sort_formats(formats) return { 'id': str_or_none(video_id), diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index 90a658cd8..41ea3629a 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -266,7 +266,6 @@ class NDREmbedBaseIE(InfoExtractor): # XXX: Conventionally, Concrete class name ff['vcodec'] = 'none' ff['ext'] = ext or 'mp3' formats.append(ff) - self._sort_formats(formats) config = playlist['config'] diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 5cf96ad7e..595709899 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -236,7 +236,6 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): song_id, 'Downloading song info')['songs'][0] formats = self.extract_formats(info) - self._sort_formats(formats) lyrics_info = self.query_api( 'song/lyric?id=%s&lv=-1&tv=-1' % song_id, @@ -412,7 +411,6 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE): {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)} for brs, mv_url in info['brs'].items() ] - self._sort_formats(formats) return { 'id': mv_id, @@ -482,7 +480,6 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']): formats = self.extract_formats(info['mainSong']) - self._sort_formats(formats) return { 'id': info['mainSong']['id'], diff --git a/yt_dlp/extractor/netzkino.py b/yt_dlp/extractor/netzkino.py index 49b29b67c..9c314e223 100644 --- a/yt_dlp/extractor/netzkino.py +++ b/yt_dlp/extractor/netzkino.py @@ -72,7 +72,6 @@ class NetzkinoIE(InfoExtractor): 'ext': 'mp4', 'url': tpl.replace('{}', film_fn) + suffix[key], } for key, tpl in templates.items()] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index ba24720e3..9e3286dfe 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -172,7 +172,6 @@ class NewgroundsIE(InfoExtractor): if video_type_description == 'Audio File': formats[0]['vcodec'] = 'none' self._check_formats(formats, media_id) - self._sort_formats(formats) return { 'id': media_id, diff --git a/yt_dlp/extractor/newspicks.py b/yt_dlp/extractor/newspicks.py index a368ce4e0..b6334dcba 100644 --- a/yt_dlp/extractor/newspicks.py +++ b/yt_dlp/extractor/newspicks.py @@ -29,7 +29,6 @@ class NewsPicksIE(InfoExtractor): if not entries: raise ExtractorError('No HTML5 media elements found') info = entries[0] - self._sort_formats(info['formats']) title = self._html_search_meta('og:title', webpage, fatal=False) description = self._html_search_meta( diff --git a/yt_dlp/extractor/newstube.py b/yt_dlp/extractor/newstube.py index 20db46057..820eb4ba7 100644 --- a/yt_dlp/extractor/newstube.py +++ b/yt_dlp/extractor/newstube.py @@ -64,7 +64,6 @@ class NewstubeIE(InfoExtractor): formats.append(f) self._check_formats(formats, video_guid) - self._sort_formats(formats) return { 'id': video_guid, diff --git a/yt_dlp/extractor/newsy.py b/yt_dlp/extractor/newsy.py index 9fde6c079..a5a7b168c 100644 --- a/yt_dlp/extractor/newsy.py +++ b/yt_dlp/extractor/newsy.py @@ -36,7 +36,6 @@ class NewsyIE(InfoExtractor): fmts, subs = self._extract_m3u8_formats_and_subtitles(data_json['stream'], display_id) formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) - self._sort_formats(formats) return merge_dicts(ld_json, { 'id': data_json['id'], 'display_id': display_id, diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index 69c48652c..b4874c8f3 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -452,8 +452,6 @@ class NexxIE(InfoExtractor): else: self.raise_no_formats(f'{cdn} formats are currently not supported', video_id) - self._sort_formats(formats) - subtitles = {} for sub in video.get('captiondata') or []: if sub.get('data'): diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py index 79c6aaf0c..38e068af4 100644 --- a/yt_dlp/extractor/nfb.py +++ b/yt_dlp/extractor/nfb.py @@ -35,7 +35,6 @@ class NFBIE(InfoExtractor): player, 'source', default=None, fatal=True) formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py index e6f98b036..febad8fdf 100644 --- a/yt_dlp/extractor/nfhsnetwork.py +++ b/yt_dlp/extractor/nfhsnetwork.py @@ -124,7 +124,6 @@ class NFHSNetworkIE(InfoExtractor): video_id).get('video_url') formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive) - self._sort_formats(formats, ['res', 'tbr']) return { 'id': video_id, @@ -137,5 +136,6 @@ class NFHSNetworkIE(InfoExtractor): 'uploader_url': uploaderPage, 'location': location, 'upload_date': upload_date, - 'is_live': isLive + 'is_live': isLive, + '_format_sort_fields': ('res', 'tbr'), } diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py index 106566611..29c53d5a5 100644 --- a/yt_dlp/extractor/nfl.py +++ b/yt_dlp/extractor/nfl.py @@ -71,7 +71,6 @@ class NFLBaseIE(InfoExtractor): ext = determine_ext(item_url) if ext == 'm3u8': info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4') - self._sort_formats(info['formats']) else: info['url'] = item_url if item.get('audio') is True: diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 517660ef1..59702b247 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -78,7 +78,6 @@ class NhkBaseIE(InfoExtractor): m3u8_id='hls', fatal=False) for f in info['formats']: f['language'] = lang - self._sort_formats(info['formats']) else: info.update({ '_type': 'url_transparent', @@ -240,7 +239,6 @@ class NhkForSchoolBangumiIE(InfoExtractor): formats = self._extract_m3u8_formats( f'https://nhks-vh.akamaihd.net/i/das/{video_id[0:8]}/{video_id}_V_000.f4v/master.m3u8', video_id, ext='mp4', m3u8_id='hls') - self._sort_formats(formats) duration = parse_duration(base_values.get('r_duration')) diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py index 884f9e2ae..2521c40e0 100644 --- a/yt_dlp/extractor/nhl.py +++ b/yt_dlp/extractor/nhl.py @@ -48,7 +48,6 @@ class NHLBaseIE(InfoExtractor): 'height': height, 'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)), }) - self._sort_formats(formats) thumbnails = [] cuts = video_data.get('image', {}).get('cuts') or [] diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index e131b044a..210303759 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -425,8 +425,6 @@ class NiconicoIE(InfoExtractor): if fmt: formats.append(fmt) - self._sort_formats(formats) - # Start extracting information tags = None if webpage: diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 462caf466..31df42f4f 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -43,7 +43,6 @@ class NineCNineMediaIE(InfoExtractor): formats.extend(self._extract_mpd_formats( manifest_base_url + 'mpd', content_id, mpd_id='dash', fatal=False)) - self._sort_formats(formats) thumbnails = [] for image in (content.get('Images') or []): diff --git a/yt_dlp/extractor/ninegag.py b/yt_dlp/extractor/ninegag.py index 86e710f2b..865ad99ac 100644 --- a/yt_dlp/extractor/ninegag.py +++ b/yt_dlp/extractor/ninegag.py @@ -116,7 +116,6 @@ class NineGagIE(InfoExtractor): 'format_id': image_id, }) formats.append(common) - self._sort_formats(formats) section = traverse_obj(post, ('postSection', 'name')) diff --git a/yt_dlp/extractor/njpwworld.py b/yt_dlp/extractor/njpwworld.py index e761cf257..7b8a526f0 100644 --- a/yt_dlp/extractor/njpwworld.py +++ b/yt_dlp/extractor/njpwworld.py @@ -69,8 +69,6 @@ class NJPWWorldIE(InfoExtractor): formats += self._extract_m3u8_formats( player_url, video_id, 'mp4', 'm3u8_native', m3u8_id=kind, fatal=False, quality=int(kind == 'high')) - self._sort_formats(formats) - tag_block = get_element_by_class('tag-block', webpage) tags = re.findall( r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py index 35b64530f..1aa9705be 100644 --- a/yt_dlp/extractor/nobelprize.py +++ b/yt_dlp/extractor/nobelprize.py @@ -48,7 +48,6 @@ class NobelPrizeIE(InfoExtractor): formats.append({ 'url': source_src, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/noodlemagazine.py b/yt_dlp/extractor/noodlemagazine.py index 3e04da67e..e6208956f 100644 --- a/yt_dlp/extractor/noodlemagazine.py +++ b/yt_dlp/extractor/noodlemagazine.py @@ -47,8 +47,6 @@ class NoodleMagazineIE(InfoExtractor): 'ext': source.get('type'), } for source in playlist_info.get('sources')] - self._sort_formats(formats) - return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index 6875d26ba..8bd3fd472 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -123,7 +123,6 @@ class NovaEmbedIE(InfoExtractor): if not formats and has_drm: self.report_drm(video_id) - self._sort_formats(formats) title = self._og_search_title( webpage, default=None) or self._search_regex( @@ -308,7 +307,6 @@ class NovaIE(InfoExtractor): formats = [{ 'url': video_url, }] - self._sort_formats(formats) title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) thumbnail = config.get('poster') diff --git a/yt_dlp/extractor/novaplay.py b/yt_dlp/extractor/novaplay.py index 152b93bd4..92d1d136c 100644 --- a/yt_dlp/extractor/novaplay.py +++ b/yt_dlp/extractor/novaplay.py @@ -55,7 +55,6 @@ class NovaPlayIE(InfoExtractor): 'Authorization': f'Bearer {self._access_token}' })[0]['links']['play']['href'] formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index b42a56f7e..59d259f9d 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -71,7 +71,6 @@ class NozIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py index b307e6a78..f18cb9e28 100644 --- a/yt_dlp/extractor/npo.py +++ b/yt_dlp/extractor/npo.py @@ -247,8 +247,6 @@ class NPOIE(NPOBaseIE): if not self.get_param('allow_unplayable_formats') and drm: self.report_drm(video_id) - self._sort_formats(formats) - info = { 'id': video_id, 'title': video_id, @@ -454,8 +452,6 @@ class NPOIE(NPOBaseIE): 'quality': stream.get('kwaliteit'), }) - self._sort_formats(formats) - subtitles = {} if metadata.get('tt888') == 'ja': subtitles['nl'] = [{ diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py index e677e862d..4b6855c93 100644 --- a/yt_dlp/extractor/npr.py +++ b/yt_dlp/extractor/npr.py @@ -121,8 +121,6 @@ class NprIE(InfoExtractor): m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False) formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False) - self._sort_formats(formats) - entries.append({ 'id': media_id, 'title': media.get('title', {}).get('$text') or playlist_title, diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 14951f8e1..88d08e5e3 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -180,7 +180,6 @@ class NRKIE(NRKBaseIE): 'format_id': asset_format, 'vcodec': 'none', }) - self._sort_formats(formats) data = call_playback_api('metadata') diff --git a/yt_dlp/extractor/ntvde.py b/yt_dlp/extractor/ntvde.py index d252ced86..6d7ea3d18 100644 --- a/yt_dlp/extractor/ntvde.py +++ b/yt_dlp/extractor/ntvde.py @@ -60,7 +60,6 @@ class NTVDeIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', quality=1, m3u8_id='hls', fatal=False)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/ntvru.py b/yt_dlp/extractor/ntvru.py index c8df110e8..8d5877daa 100644 --- a/yt_dlp/extractor/ntvru.py +++ b/yt_dlp/extractor/ntvru.py @@ -115,7 +115,6 @@ class NTVRuIE(InfoExtractor): 'url': file_, 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)), }) - self._sort_formats(formats) return { 'id': xpath_text(video, './id'), diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index fafcc8f4b..6ac351cb0 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -80,7 +80,6 @@ class NuvidIE(InfoExtractor): } for quality, source in video_data.get('files').items() if source] self._check_formats(formats, video_id) - self._sort_formats(formats) duration = parse_duration(traverse_obj(video_data, 'duration', 'duration_format')) thumbnails = [ diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py index fe6986a82..2e21edbb4 100644 --- a/yt_dlp/extractor/nytimes.py +++ b/yt_dlp/extractor/nytimes.py @@ -72,7 +72,6 @@ class NYTimesBaseIE(InfoExtractor): 'tbr': int_or_none(video.get('bitrate'), 1000) or None, 'ext': ext, }) - self._sort_formats(formats) thumbnails = [] for image in video_data.get('images', []): diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 195563bbb..4f325f087 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -370,8 +370,6 @@ class OdnoklassnikiIE(InfoExtractor): if payment_info: self.raise_no_formats('This video is paid, subscribe to download it', expected=True) - self._sort_formats(formats) - info['formats'] = formats return info diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 42ea94905..61d1f4048 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -54,7 +54,6 @@ class OlympicsReplayIE(InfoExtractor): m3u8_url = self._download_json( f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls') - self._sort_formats(formats) return { 'id': uuid, diff --git a/yt_dlp/extractor/on24.py b/yt_dlp/extractor/on24.py index 779becc70..9a4abc98d 100644 --- a/yt_dlp/extractor/on24.py +++ b/yt_dlp/extractor/on24.py @@ -76,7 +76,6 @@ class On24IE(InfoExtractor): 'vcodec': 'none', 'acodec': 'wav' }) - self._sort_formats(formats) return { 'id': event_id, diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py index 41815bef1..591d15732 100644 --- a/yt_dlp/extractor/onefootball.py +++ b/yt_dlp/extractor/onefootball.py @@ -36,7 +36,6 @@ class OneFootballIE(InfoExtractor): data_json = self._search_json_ld(webpage, id) m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url') formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) - self._sort_formats(formats) return { 'id': id, 'title': data_json.get('title'), diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py index ea46d7def..0d59e8cb4 100644 --- a/yt_dlp/extractor/onet.py +++ b/yt_dlp/extractor/onet.py @@ -80,7 +80,6 @@ class OnetBaseIE(InfoExtractor): 'vbr': float_or_none(f.get('video_bitrate')), }) formats.append(http_f) - self._sort_formats(formats) meta = video.get('meta', {}) diff --git a/yt_dlp/extractor/ooyala.py b/yt_dlp/extractor/ooyala.py index 146c1f981..65afccdb1 100644 --- a/yt_dlp/extractor/ooyala.py +++ b/yt_dlp/extractor/ooyala.py @@ -85,7 +85,6 @@ class OoyalaBaseIE(InfoExtractor): if not formats and not auth_data.get('authorized'): self.raise_no_formats('%s said: %s' % ( self.IE_NAME, auth_data['message']), expected=True) - self._sort_formats(formats) subtitles = {} for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items(): diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py index c640224dd..fa46757f7 100644 --- a/yt_dlp/extractor/opencast.py +++ b/yt_dlp/extractor/opencast.py @@ -92,8 +92,6 @@ class OpencastBaseIE(InfoExtractor): }) formats.append(track_obj) - self._sort_formats(formats) - return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 6c1eb8f3a..86dc9bb89 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -50,8 +50,6 @@ class OpenRecBaseIE(InfoExtractor): formats = list(self._expand_media(video_id, new_media)) is_live = False - self._sort_formats(formats) - return { 'id': video_id, 'title': get_first(movie_stores, 'title'), @@ -113,7 +111,6 @@ class OpenRecCaptureIE(OpenRecBaseIE): formats = self._extract_m3u8_formats( capture_data.get('source'), video_id, ext='mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py index 09b121422..d49909d52 100644 --- a/yt_dlp/extractor/ora.py +++ b/yt_dlp/extractor/ora.py @@ -54,7 +54,6 @@ class OraTVIE(InfoExtractor): 'format_id': q, 'quality': preference(q), }) - self._sort_formats(formats) else: return self.url_result(self._search_regex( r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 24abf7f26..e9d23a4d1 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -134,8 +134,6 @@ class ORFTVthekIE(InfoExtractor): HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking', errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats') - self._sort_formats(formats) - subtitles = {} for sub in sd.get('subtitles', []): sub_src = sub.get('src') @@ -407,7 +405,6 @@ class ORFIPTVIE(InfoExtractor): format_url, video_id, 'mp4', m3u8_id=format_id)) else: continue - self._sort_formats(formats) title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at') description = self._og_search_description(webpage) @@ -507,7 +504,6 @@ class ORFFM4StoryIE(InfoExtractor): format_url, video_id, 'mp4', m3u8_id=format_id)) else: continue - self._sort_formats(formats) title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at') if idx >= 1: diff --git a/yt_dlp/extractor/pandoratv.py b/yt_dlp/extractor/pandoratv.py index 3747f31d2..ccc78da57 100644 --- a/yt_dlp/extractor/pandoratv.py +++ b/yt_dlp/extractor/pandoratv.py @@ -112,7 +112,6 @@ class PandoraTVIE(InfoExtractor): 'url': format_url, 'height': int(height), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py index 5f5edb26b..32c103bc1 100644 --- a/yt_dlp/extractor/panopto.py +++ b/yt_dlp/extractor/panopto.py @@ -407,7 +407,6 @@ class PanoptoIE(PanoptoBaseIE): subtitles = self._merge_subtitles( podcast_subtitles, streams_subtitles, self.extract_subtitles(base_url, video_id, delivery)) - self._sort_formats(formats) self.mark_watched(base_url, video_id, delivery_info) return { diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py index f31ae576c..0b547917c 100644 --- a/yt_dlp/extractor/parlview.py +++ b/yt_dlp/extractor/parlview.py @@ -44,7 +44,6 @@ class ParlviewIE(InfoExtractor): elif stream.get('streamType') != 'VOD': self.raise_no_formats('Unknown type of stream was detected: "%s"' % str(stream.get('streamType'))) formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native') - self._sort_formats(formats) media_info = self._download_webpage( self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 43c90c8f1..529aba178 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -277,7 +277,6 @@ class PatreonIE(PatreonBaseIE): } elif name == 'video': formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id) - self._sort_formats(formats) return { **info, 'formats': formats, diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index 4e6674e85..5bdf561db 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -660,7 +660,6 @@ class PBSIE(InfoExtractor): for f in formats: if (f.get('format_note') or '').endswith(' AD'): # Audio description f['language_preference'] = -10 - self._sort_formats(formats) rating_str = info.get('rating') if rating_str is not None: diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py index e76305acd..e27e5a7ba 100644 --- a/yt_dlp/extractor/pearvideo.py +++ b/yt_dlp/extractor/pearvideo.py @@ -45,7 +45,6 @@ class PearVideoIE(InfoExtractor): 'format_id': k, 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v] - self._sort_formats(formats) title = self._search_regex( (r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)', diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index fd25b5adb..2d9b9a742 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -40,7 +40,6 @@ class PeekVidsIE(InfoExtractor): } for name, url in srcs.items() if len(name) > 8 and name.startswith('data-src')] if not formats: formats = [{'url': url} for url in srcs.values()] - self._sort_formats(formats) info = self._search_json_ld(webpage, video_id, expected_type='VideoObject') info.update({ diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 6d280e41c..68e15737b 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -1233,7 +1233,6 @@ class PeerTubeIE(InfoExtractor): else: f['fps'] = int_or_none(file_.get('fps')) formats.append(f) - self._sort_formats(formats) description = video.get('description') if description and len(description) >= 250: diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py index 821abe496..a709e21b4 100644 --- a/yt_dlp/extractor/peertv.py +++ b/yt_dlp/extractor/peertv.py @@ -43,8 +43,6 @@ class PeerTVIE(InfoExtractor): formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') - self._sort_formats(formats) - return { 'id': video_id, 'title': self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title').replace('\xa0', ' '), diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py index 3fc05d1f2..4835822cf 100644 --- a/yt_dlp/extractor/peloton.py +++ b/yt_dlp/extractor/peloton.py @@ -157,7 +157,6 @@ class PelotonIE(InfoExtractor): 'title': segment.get('name') } for segment in traverse_obj(metadata, ('segments', 'segment_list'))] - self._sort_formats(formats) return { 'id': video_id, 'title': ride_data.get('title'), diff --git a/yt_dlp/extractor/performgroup.py b/yt_dlp/extractor/performgroup.py index 824495f40..f4d7f22d0 100644 --- a/yt_dlp/extractor/performgroup.py +++ b/yt_dlp/extractor/performgroup.py @@ -65,7 +65,6 @@ class PerformGroupIE(InfoExtractor): 'vbr': int_or_none(c.get('videoRate'), 1000), 'abr': int_or_none(c.get('audioRate'), 1000), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py index 2ff6589d5..84bcf1573 100644 --- a/yt_dlp/extractor/periscope.py +++ b/yt_dlp/extractor/periscope.py @@ -127,7 +127,6 @@ class PeriscopeIE(PeriscopeBaseIE): } self._add_width_and_height(rtmp_format) formats.append(rtmp_format) - self._sort_formats(formats) info['formats'] = formats return info diff --git a/yt_dlp/extractor/philharmoniedeparis.py b/yt_dlp/extractor/philharmoniedeparis.py index 5ea2b6393..e8494a084 100644 --- a/yt_dlp/extractor/philharmoniedeparis.py +++ b/yt_dlp/extractor/philharmoniedeparis.py @@ -75,7 +75,6 @@ class PhilharmonieDeParisIE(InfoExtractor): m3u8_id='hls', fatal=False)) if not formats and not self.get_param('ignore_no_formats'): return - self._sort_formats(formats) return { 'title': title, 'formats': formats, diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py index 54999a832..36a062def 100644 --- a/yt_dlp/extractor/picarto.py +++ b/yt_dlp/extractor/picarto.py @@ -64,7 +64,6 @@ class PicartoIE(InfoExtractor): formats.append({ 'url': source_url, }) - self._sort_formats(formats) mature = metadata.get('adult') if mature is None: @@ -114,7 +113,6 @@ class PicartoVodIE(InfoExtractor): formats = self._extract_m3u8_formats( vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index fba7242f5..cc60b304e 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -153,8 +153,6 @@ class PikselIE(InfoExtractor): re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id, transform_source=transform_source, fatal=False)) - self._sort_formats(formats, ('tbr', )) # Incomplete resolution information - subtitles = {} for caption in video_data.get('captions', []): caption_url = caption.get('url') @@ -170,4 +168,5 @@ class PikselIE(InfoExtractor): 'timestamp': parse_iso8601(video_data.get('dateadd')), 'formats': formats, 'subtitles': subtitles, + '_format_sort_fields': ('tbr', ), # Incomplete resolution information } diff --git a/yt_dlp/extractor/pinkbike.py b/yt_dlp/extractor/pinkbike.py index 313b5cce0..e4e1caaa2 100644 --- a/yt_dlp/extractor/pinkbike.py +++ b/yt_dlp/extractor/pinkbike.py @@ -49,7 +49,6 @@ class PinkbikeIE(InfoExtractor): 'format_id': format_id, 'height': height, }) - self._sort_formats(formats) title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike') description = self._html_search_regex( diff --git a/yt_dlp/extractor/pinterest.py b/yt_dlp/extractor/pinterest.py index 171f9e4eb..2c6cd6d4b 100644 --- a/yt_dlp/extractor/pinterest.py +++ b/yt_dlp/extractor/pinterest.py @@ -52,7 +52,6 @@ class PinterestBaseIE(InfoExtractor): 'height': int_or_none(format_dict.get('height')), 'duration': duration, }) - self._sort_formats(formats) description = data.get('description') or data.get('description_html') or data.get('seo_description') timestamp = unified_timestamp(data.get('created_at')) diff --git a/yt_dlp/extractor/pixivsketch.py b/yt_dlp/extractor/pixivsketch.py index bfdb8b24e..850c6f23d 100644 --- a/yt_dlp/extractor/pixivsketch.py +++ b/yt_dlp/extractor/pixivsketch.py @@ -71,7 +71,6 @@ class PixivSketchIE(PixivSketchBaseIE): formats = self._extract_m3u8_formats( m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py index 8be08a5bc..dcf18e1f3 100644 --- a/yt_dlp/extractor/pladform.py +++ b/yt_dlp/extractor/pladform.py @@ -111,8 +111,6 @@ class PladformIE(InfoExtractor): if error: fail(error) - self._sort_formats(formats) - webpage = self._download_webpage( 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id, video_id) diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py index 03b9d6aaa..25753fe7e 100644 --- a/yt_dlp/extractor/planetmarathi.py +++ b/yt_dlp/extractor/planetmarathi.py @@ -57,7 +57,6 @@ class PlanetMarathiIE(InfoExtractor): asset_title = id.replace('-', ' ') asset_id = f'{asset["sk"]}_{id}'.replace('#', '-') formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id) - self._sort_formats(formats) entries.append({ 'id': asset_id, 'title': asset_title, diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py index 29d3210ac..b8a441494 100644 --- a/yt_dlp/extractor/platzi.py +++ b/yt_dlp/extractor/platzi.py @@ -127,7 +127,6 @@ class PlatziIE(PlatziBaseIE): format_url, lecture_id, mpd_id=format_id, note='Downloading %s MPD manifest' % server_id, fatal=False)) - self._sort_formats(formats) content = str_or_none(desc.get('content')) description = (clean_html(compat_b64decode(content).decode('utf-8')) diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py index 05dbaf066..316f220f7 100644 --- a/yt_dlp/extractor/playplustv.py +++ b/yt_dlp/extractor/playplustv.py @@ -79,7 +79,6 @@ class PlayPlusTVIE(InfoExtractor): 'width': int_or_none(file_info.get('width')), 'height': int_or_none(file_info.get('height')), }) - self._sort_formats(formats) thumbnails = [] for thumb in media.get('thumbs', []): diff --git a/yt_dlp/extractor/plays.py b/yt_dlp/extractor/plays.py index 700dfe407..9371f7b23 100644 --- a/yt_dlp/extractor/plays.py +++ b/yt_dlp/extractor/plays.py @@ -38,7 +38,6 @@ class PlaysTVIE(InfoExtractor): 'format_id': 'http-' + format_id, 'height': int_or_none(height), }) - self._sort_formats(formats) info.update({ 'id': video_id, diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py index f7e5ddbe7..c418f88cb 100644 --- a/yt_dlp/extractor/playtvak.py +++ b/yt_dlp/extractor/playtvak.py @@ -160,7 +160,6 @@ class PlaytvakIE(InfoExtractor): 'quality': quality(fmt.get('quality')), 'preference': preference, }) - self._sort_formats(formats) title = item['title'] is_live = item['type'] == 'stream' diff --git a/yt_dlp/extractor/playvid.py b/yt_dlp/extractor/playvid.py index 18aeda7de..1e0989d0a 100644 --- a/yt_dlp/extractor/playvid.py +++ b/yt_dlp/extractor/playvid.py @@ -74,7 +74,6 @@ class PlayvidIE(InfoExtractor): 'height': height, 'url': val, }) - self._sort_formats(formats) # Extract title - should be in the flashvars; if not, look elsewhere if video_title is None: diff --git a/yt_dlp/extractor/playwire.py b/yt_dlp/extractor/playwire.py index 683dbf4a5..1057bff3a 100644 --- a/yt_dlp/extractor/playwire.py +++ b/yt_dlp/extractor/playwire.py @@ -62,7 +62,6 @@ class PlaywireIE(InfoExtractor): for a_format in formats: if not dict_get(a_format, ['tbr', 'width', 'height']): a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index b50152ad8..809b65608 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -410,8 +410,6 @@ query viewClip { }) formats.append(clip_f) - self._sort_formats(formats) - duration = int_or_none( clip.get('duration')) or parse_duration(clip.get('formattedDuration')) diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py index 6e8f46fa3..71a05cc7a 100644 --- a/yt_dlp/extractor/plutotv.py +++ b/yt_dlp/extractor/plutotv.py @@ -135,7 +135,6 @@ class PlutoTVIE(InfoExtractor): subtitles = self._merge_subtitles(subtitles, subs) formats, subtitles = self._to_ad_free_formats(video_id, formats, subtitles) - self._sort_formats(formats) info = { 'id': video_id, diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py index e44d951e6..1524a1fb9 100644 --- a/yt_dlp/extractor/polsatgo.py +++ b/yt_dlp/extractor/polsatgo.py @@ -42,7 +42,6 @@ class PolsatGoIE(InfoExtractor): formats = list(self._extract_formats( try_get(media, lambda x: x['playback']['mediaSources']), video_id)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index 514753b64..99244f6b4 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -295,8 +295,6 @@ class PolskieRadioPlayerIE(InfoExtractor): 'url': stream_url, }) - self._sort_formats(formats) - return { 'id': compat_str(channel['id']), 'formats': formats, diff --git a/yt_dlp/extractor/porncom.py b/yt_dlp/extractor/porncom.py index 2ebd3fa09..c8ef240d7 100644 --- a/yt_dlp/extractor/porncom.py +++ b/yt_dlp/extractor/porncom.py @@ -73,8 +73,6 @@ class PornComIE(InfoExtractor): thumbnail = None duration = None - self._sort_formats(formats) - view_count = str_to_int(self._search_regex( (r'Views:\s*</span>\s*<span>\s*([\d,.]+)', r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage, diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py index 26536bc65..51a9cf38f 100644 --- a/yt_dlp/extractor/pornflip.py +++ b/yt_dlp/extractor/pornflip.py @@ -60,7 +60,6 @@ class PornFlipIE(InfoExtractor): r'class="btn btn-down-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False) mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&', '&') formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash') - self._sort_formats(formats) return { 'age_limit': 18, diff --git a/yt_dlp/extractor/pornhd.py b/yt_dlp/extractor/pornhd.py index 06a44ddd1..c8a1ec80b 100644 --- a/yt_dlp/extractor/pornhd.py +++ b/yt_dlp/extractor/pornhd.py @@ -84,7 +84,6 @@ class PornHdIE(InfoExtractor): }) if formats: info['formats'] = formats - self._sort_formats(info['formats']) description = self._html_search_regex( (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>', diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 6afaf5e6e..5d8d7c100 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -456,10 +456,6 @@ class PornHubIE(PornHubBaseIE): continue add_format(video_url) - # field_preference is unnecessary here, but kept for code-similarity with youtube-dl - self._sort_formats( - formats, field_preference=('height', 'width', 'fps', 'format_id')) - model_profile = self._search_json( r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False) video_uploader = self._html_search_regex( diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py index 96d2da7c7..aa48da06b 100644 --- a/yt_dlp/extractor/pornovoisines.py +++ b/yt_dlp/extractor/pornovoisines.py @@ -55,7 +55,6 @@ class PornoVoisinesIE(InfoExtractor): 'height': item.get('height'), 'bitrate': item.get('bitrate'), }) - self._sort_formats(formats) webpage = self._download_webpage(url, video_id) diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py index e4aa4bd35..0e029ce8c 100644 --- a/yt_dlp/extractor/projectveritas.py +++ b/yt_dlp/extractor/projectveritas.py @@ -42,7 +42,6 @@ class ProjectVeritasIE(InfoExtractor): raise ExtractorError('No video on the provided url.', expected=True) playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId')) formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id) - self._sort_formats(formats) return { 'id': video_id, 'title': main_data['title'], diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py index cb5ada1b9..46e2e8a8f 100644 --- a/yt_dlp/extractor/prosiebensat1.py +++ b/yt_dlp/extractor/prosiebensat1.py @@ -156,7 +156,6 @@ class ProSiebenSat1BaseIE(InfoExtractor): 'tbr': tbr, 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), }) - self._sort_formats(formats) return { 'duration': float_or_none(video.get('duration')), diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py index a5dac1dff..482e5705f 100644 --- a/yt_dlp/extractor/puhutv.py +++ b/yt_dlp/extractor/puhutv.py @@ -111,7 +111,6 @@ class PuhuTVIE(InfoExtractor): format_id += '-%sp' % quality f['format_id'] = format_id formats.append(f) - self._sort_formats(formats) creator = try_get( show, lambda x: x['producer']['name'], compat_str) diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py index fa2454df4..92858259a 100644 --- a/yt_dlp/extractor/qqmusic.py +++ b/yt_dlp/extractor/qqmusic.py @@ -122,7 +122,6 @@ class QQMusicIE(InfoExtractor): 'abr': details.get('abr'), }) self._check_formats(formats, mid) - self._sort_formats(formats) actual_lrc_lyrics = ''.join( line + '\n' for line in re.findall( diff --git a/yt_dlp/extractor/r7.py b/yt_dlp/extractor/r7.py index b459efceb..f067a0571 100644 --- a/yt_dlp/extractor/r7.py +++ b/yt_dlp/extractor/r7.py @@ -66,7 +66,6 @@ class R7IE(InfoExtractor): f_copy['protocol'] = 'http' f = f_copy formats.append(f) - self._sort_formats(formats) description = video.get('description') thumbnail = video.get('thumb') diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index 498cc6be9..f10292203 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -125,7 +125,6 @@ class RadikoBaseIE(InfoExtractor): sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]} formats.extend(subformats) - self._sort_formats(formats) return formats diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py index dd6f899a4..72c21d502 100644 --- a/yt_dlp/extractor/radiocanada.py +++ b/yt_dlp/extractor/radiocanada.py @@ -113,7 +113,6 @@ class RadioCanadaIE(InfoExtractor): raise ExtractorError( '%s said: %s' % (self.IE_NAME, error), expected=True) formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') - self._sort_formats(formats) subtitles = {} closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') diff --git a/yt_dlp/extractor/radiode.py b/yt_dlp/extractor/radiode.py index befb0b72b..32c36d557 100644 --- a/yt_dlp/extractor/radiode.py +++ b/yt_dlp/extractor/radiode.py @@ -38,7 +38,6 @@ class RadioDeIE(InfoExtractor): 'abr': stream['bitRate'], 'asr': stream['sampleRate'] } for stream in broadcast['streamUrls']] - self._sort_formats(formats) return { 'id': radio_id, diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index 38420a15d..92e51b7f4 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -46,7 +46,6 @@ class RadioFranceIE(InfoExtractor): for i, fm in enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)) ] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/radiojavan.py b/yt_dlp/extractor/radiojavan.py index 6a6118899..6a9139466 100644 --- a/yt_dlp/extractor/radiojavan.py +++ b/yt_dlp/extractor/radiojavan.py @@ -50,7 +50,6 @@ class RadioJavanIE(InfoExtractor): 'format_id': format_id, }) formats.append(f) - self._sort_formats(formats) title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py index ed38a07f0..9bcbb11d5 100644 --- a/yt_dlp/extractor/radlive.py +++ b/yt_dlp/extractor/radlive.py @@ -62,7 +62,6 @@ class RadLiveIE(InfoExtractor): raise ExtractorError('Unable to extract video info, make sure the URL is valid') formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id) - self._sort_formats(formats) data = video_info.get('structured_data', {}) diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index cd19ec07b..cab12cc21 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -313,7 +313,6 @@ class RaiPlayIE(RaiBaseIE): video = media['video'] relinker_info = self._extract_relinker_info(video['content_url'], video_id) - self._sort_formats(relinker_info['formats']) thumbnails = [] for _, value in media.get('images', {}).items(): @@ -621,8 +620,6 @@ class RaiIE(RaiBaseIE): else: raise ExtractorError('not a media file') - self._sort_formats(relinker_info['formats']) - thumbnails = [] for image_type in ('image', 'image_medium', 'image_300'): thumbnail_url = media.get(image_type) @@ -703,7 +700,6 @@ class RaiIE(RaiBaseIE): relinker_info = self._extract_relinker_info( urljoin(url, relinker_url), video_id) - self._sort_formats(relinker_info['formats']) title = self._search_regex( r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1', @@ -772,8 +768,6 @@ class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE relinker_info = self._extract_relinker_info(urljoin(url, relinker_url), video_id) - self._sort_formats(relinker_info['formats']) - return { 'id': video_id, 'title': track_info.get('title') or self._og_search_title(webpage), diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py index d69a1a216..b905f8d2e 100644 --- a/yt_dlp/extractor/rcs.py +++ b/yt_dlp/extractor/rcs.py @@ -196,7 +196,6 @@ class RCSBaseIE(InfoExtractor): 'format_id': 'http-mp4', 'url': urls['mp4'] }) - self._sort_formats(formats) return formats def _real_extract(self, url): diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py index 0cfecbc9a..27b4ad7bb 100644 --- a/yt_dlp/extractor/rcti.py +++ b/yt_dlp/extractor/rcti.py @@ -194,8 +194,6 @@ class RCTIPlusIE(RCTIPlusBaseIE): if 'akamaized' in f['url'] or 'cloudfront' in f['url']: f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs - self._sort_formats(formats) - return { 'id': video_meta.get('product_id') or video_json.get('product_id'), 'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')), diff --git a/yt_dlp/extractor/redbee.py b/yt_dlp/extractor/redbee.py index ee510eb40..b59b518b1 100644 --- a/yt_dlp/extractor/redbee.py +++ b/yt_dlp/extractor/redbee.py @@ -117,13 +117,10 @@ class ParliamentLiveUKIE(RedBeeBaseIE): video_id = self._match_id(url) formats, subtitles = self._get_formats_and_subtitles(video_id) - self._sort_formats(formats) video_info = self._download_json( f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False) - self._sort_formats(formats, ['res', 'proto']) - return { 'id': video_id, 'formats': formats, @@ -132,6 +129,7 @@ class ParliamentLiveUKIE(RedBeeBaseIE): 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'), 'timestamp': traverse_obj( video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp), + '_format_sort_fields': ('res', 'proto'), } @@ -366,7 +364,6 @@ class RTBFIE(RedBeeBaseIE): formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - self._sort_formats(formats, ['res', 'proto']) return { 'id': media_id, 'formats': formats, @@ -378,4 +375,5 @@ class RTBFIE(RedBeeBaseIE): 'series': data.get('programLabel'), 'subtitles': subtitles, 'is_live': is_live, + '_format_sort_fields': ('res', 'proto'), } diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index 50e61ba6e..a01bc8434 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -80,7 +80,6 @@ class RedBullTVIE(InfoExtractor): formats, subtitles = self._extract_m3u8_formats_and_subtitles( 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) for resource in video.get('resources', []): if resource.startswith('closed_caption_'): diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index c713b24fe..cfd79abfd 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -179,7 +179,6 @@ class RedditIE(InfoExtractor): hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False)) formats.extend(self._extract_mpd_formats( dash_playlist_url, display_id, mpd_id='dash', fatal=False)) - self._sort_formats(formats) return { **info, diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index 92d996ca6..f688d1e63 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -45,7 +45,6 @@ class RedGifsBaseInfoExtractor(InfoExtractor): 'height': height, 'quality': quality(format_id), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 8e767b6e4..49076ccd8 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -110,7 +110,6 @@ class RedTubeIE(InfoExtractor): video_url = self._html_search_regex( r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') formats.append({'url': video_url, 'ext': 'mp4'}) - self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._search_regex( diff --git a/yt_dlp/extractor/rentv.py b/yt_dlp/extractor/rentv.py index ab47ee552..fdde31704 100644 --- a/yt_dlp/extractor/rentv.py +++ b/yt_dlp/extractor/rentv.py @@ -47,7 +47,6 @@ class RENTVIE(InfoExtractor): formats.append({ 'url': src, }) - self._sort_formats(formats) return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/restudy.py b/yt_dlp/extractor/restudy.py index cd3c20d7a..6d032564d 100644 --- a/yt_dlp/extractor/restudy.py +++ b/yt_dlp/extractor/restudy.py @@ -31,7 +31,6 @@ class RestudyIE(InfoExtractor): formats = self._extract_smil_formats( 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id, video_id) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py index 1428b7cc9..6919425f3 100644 --- a/yt_dlp/extractor/reuters.py +++ b/yt_dlp/extractor/reuters.py @@ -55,7 +55,6 @@ class ReutersIE(InfoExtractor): 'ext': ext, 'container': container if method != 'mobile' else None, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/rice.py b/yt_dlp/extractor/rice.py index 9ca47f3d4..3dd4d31de 100644 --- a/yt_dlp/extractor/rice.py +++ b/yt_dlp/extractor/rice.py @@ -88,7 +88,6 @@ class RICEIE(InfoExtractor): 'ext': 'flv', }) formats.append(fmt) - self._sort_formats(formats) thumbnails = [] for content_asset in content_data.findall('.//contentAssets'): diff --git a/yt_dlp/extractor/rockstargames.py b/yt_dlp/extractor/rockstargames.py index 5f1db0f05..c491aaf53 100644 --- a/yt_dlp/extractor/rockstargames.py +++ b/yt_dlp/extractor/rockstargames.py @@ -54,8 +54,6 @@ class RockstarGamesIE(InfoExtractor): if youtube_id: return self.url_result(youtube_id, 'Youtube') - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index fcef325bf..ade3cd0a4 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -112,7 +112,6 @@ class RokfinIE(InfoExtractor): self.raise_no_formats( f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}', video_id=video_id, expected=True) - self._sort_formats(formats) uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username')) timestamp = (scheduled or float_or_none(metadata.get('postedAtMilli'), 1000) diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py index 011dadfaa..776fbfbc0 100644 --- a/yt_dlp/extractor/roosterteeth.py +++ b/yt_dlp/extractor/roosterteeth.py @@ -146,7 +146,6 @@ class RoosterTeethIE(RoosterTeethBaseIE): formats, subtitles = self._extract_m3u8_formats_and_subtitles( m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') - self._sort_formats(formats) episode = self._download_json( api_episode_url, display_id, diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py index 93faf1b32..aedaa5b55 100644 --- a/yt_dlp/extractor/rte.py +++ b/yt_dlp/extractor/rte.py @@ -94,8 +94,6 @@ class RteBaseIE(InfoExtractor): formats.extend(self._extract_f4m_formats( hds_url, item_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - info_dict['formats'] = formats return info_dict diff --git a/yt_dlp/extractor/rtl2.py b/yt_dlp/extractor/rtl2.py index afa0d33cf..056cf87d2 100644 --- a/yt_dlp/extractor/rtl2.py +++ b/yt_dlp/extractor/rtl2.py @@ -94,8 +94,6 @@ class RTL2IE(InfoExtractor): if m3u8_url: formats.extend(self._extract_akamai_formats(m3u8_url, display_id)) - self._sort_formats(formats) - return { 'id': display_id, 'title': title, @@ -142,7 +140,6 @@ class RTL2YouIE(RTL2YouBaseIE): raise ExtractorError('video not found', expected=True) formats = self._extract_m3u8_formats(stream_url.decode(), video_id, 'mp4', 'm3u8_native') - self._sort_formats(formats) video_data = self._download_json( self._BACKWERK_BASE_URL + 'video/' + video_id, video_id) diff --git a/yt_dlp/extractor/rtlnl.py b/yt_dlp/extractor/rtlnl.py index 3852a3a13..724cb64e9 100644 --- a/yt_dlp/extractor/rtlnl.py +++ b/yt_dlp/extractor/rtlnl.py @@ -116,7 +116,6 @@ class RtlNlIE(InfoExtractor): formats = self._extract_m3u8_formats( m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) - self._sort_formats(formats) thumbnails = [] @@ -174,7 +173,6 @@ class RTLLuBaseIE(InfoExtractor): webpage = self._download_webpage(url, video_id) formats, subtitles = self.get_formats_and_subtitles(webpage, video_id) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py index 6644538ed..81c4d7cac 100644 --- a/yt_dlp/extractor/rts.py +++ b/yt_dlp/extractor/rts.py @@ -212,7 +212,6 @@ class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE }) self._check_formats(formats, media_id) - self._sort_formats(formats) duration = info.get('duration') or info.get('cutout') or info.get('cutduration') if isinstance(duration, compat_str): diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index b9b181feb..a99a266c6 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -130,7 +130,6 @@ class RTVEALaCartaIE(InfoExtractor): 'quality': q(quality), 'url': video_url, }) - self._sort_formats(formats) return formats def _real_extract(self, url): @@ -238,7 +237,6 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE 'quality': q(quality), 'url': audio_url, }) - self._sort_formats(formats) return formats def _real_extract(self, url): diff --git a/yt_dlp/extractor/rtvnh.py b/yt_dlp/extractor/rtvnh.py index 58af3dda2..7c6174494 100644 --- a/yt_dlp/extractor/rtvnh.py +++ b/yt_dlp/extractor/rtvnh.py @@ -49,7 +49,6 @@ class RTVNHIE(InfoExtractor): formats.extend(self._extract_f4m_formats( http_base_url + '/manifest.f4m', video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py index fb06efa4b..a84a78da8 100644 --- a/yt_dlp/extractor/rtvs.py +++ b/yt_dlp/extractor/rtvs.py @@ -72,7 +72,6 @@ class RTVSIE(InfoExtractor): formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}] else: formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/rtvslo.py b/yt_dlp/extractor/rtvslo.py index b63ccb96f..05942b6b4 100644 --- a/yt_dlp/extractor/rtvslo.py +++ b/yt_dlp/extractor/rtvslo.py @@ -133,7 +133,6 @@ class RTVSLOIE(InfoExtractor): if any('dummy_720p.mp4' in x.get('manifest_url', '') for x in formats) and meta.get('stub') == 'error': raise ExtractorError(f'{self.IE_NAME} said: Clip not available', expected=True) - self._sort_formats(formats) return { 'id': v_id, 'webpage_url': ''.join(traverse_obj(meta, ('canonical', ('domain', 'path')))), diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py index bb113d822..9d15f4d21 100644 --- a/yt_dlp/extractor/rule34video.py +++ b/yt_dlp/extractor/rule34video.py @@ -51,8 +51,6 @@ class Rule34VideoIE(InfoExtractor): thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None) duration = self._html_search_regex(r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None) - self._sort_formats(formats) - return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index 27040646b..102615c60 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -202,7 +202,6 @@ class RumbleEmbedIE(InfoExtractor): 'height': 'h', }, default={}) }) - self._sort_formats(formats) subtitles = { lang: [{ diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index cad3caa60..5a4fd975e 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -81,7 +81,6 @@ class RutubeBaseIE(InfoExtractor): 'url': format_url, 'format_id': format_id, }) - self._sort_formats(formats) return formats def _download_and_extract_formats(self, video_id, query=None): diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index 75da01f7d..d7f9a7337 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -189,8 +189,6 @@ class RUTVIE(InfoExtractor): }) formats.append(fmt) - self._sort_formats(formats, ('source', )) - return { 'id': video_id, 'title': title, @@ -201,4 +199,5 @@ class RUTVIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, 'is_live': is_live, + '_format_sort_fields': ('source', ), } diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index 3f6d30d3c..33f6652df 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -244,8 +244,6 @@ class RuutuIE(InfoExtractor): if ns_st_cds != 'free': raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) - self._sort_formats(formats) - themes = pv('themes') return { diff --git a/yt_dlp/extractor/sapo.py b/yt_dlp/extractor/sapo.py index 9a601a01c..beffaee59 100644 --- a/yt_dlp/extractor/sapo.py +++ b/yt_dlp/extractor/sapo.py @@ -98,8 +98,6 @@ class SapoIE(InfoExtractor): 'height': 720, }) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/screen9.py b/yt_dlp/extractor/screen9.py index eae652af7..5ab0b6c60 100644 --- a/yt_dlp/extractor/screen9.py +++ b/yt_dlp/extractor/screen9.py @@ -49,7 +49,6 @@ class Screen9IE(InfoExtractor): 'format': 'mp4', }) - self._sort_formats(formats) return { 'id': video_id, 'title': traverse_obj( diff --git a/yt_dlp/extractor/scrolller.py b/yt_dlp/extractor/scrolller.py index 8469f487a..4f9fa1440 100644 --- a/yt_dlp/extractor/scrolller.py +++ b/yt_dlp/extractor/scrolller.py @@ -93,8 +93,6 @@ class ScrolllerIE(InfoExtractor): if not formats: self.raise_no_formats('There is no video.', expected=True, video_id=video_id) - self._sort_formats(formats) - return { 'id': video_id, 'title': video_data.get('title'), diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py index 6fec7c0bb..7ff0cf5b7 100644 --- a/yt_dlp/extractor/senategov.py +++ b/yt_dlp/extractor/senategov.py @@ -131,8 +131,6 @@ class SenateISVPIE(InfoExtractor): entry['format_id'] += mobj.group('tag') formats.append(entry) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, @@ -187,7 +185,6 @@ class SenateGovIE(InfoExtractor): formats = self._extract_m3u8_formats( f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8', display_id, ext='mp4') - self._sort_formats(formats) title = self._html_search_regex( (*self._og_regexes('title'), r'(?s)<title>([^<]*?)'), webpage, 'video title') diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py index 5ff06f19d..3600e2e74 100644 --- a/yt_dlp/extractor/sendtonews.py +++ b/yt_dlp/extractor/sendtonews.py @@ -77,9 +77,6 @@ class SendtoNewsIE(InfoExtractor): 'format_id': '%s-%d' % (determine_protocol(f), tbr), 'tbr': tbr, }) - # 'tbr' was explicitly set to be preferred over 'height' originally, - # So this is being kept unless someone can confirm this is unnecessary - self._sort_formats(info_dict['formats'], ('tbr', 'res')) thumbnails = [] if video.get('thumbnailUrl'): @@ -98,6 +95,9 @@ class SendtoNewsIE(InfoExtractor): 'thumbnails': thumbnails, 'duration': float_or_none(video.get('SM_length')), 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), + # 'tbr' was explicitly set to be preferred over 'height' originally, + # So this is being kept unless someone can confirm this is unnecessary + '_format_sort_fields': ('tbr', 'res') }) entries.append(info_dict) diff --git a/yt_dlp/extractor/servus.py b/yt_dlp/extractor/servus.py index ac030ea41..490d56267 100644 --- a/yt_dlp/extractor/servus.py +++ b/yt_dlp/extractor/servus.py @@ -104,7 +104,6 @@ class ServusIE(InfoExtractor): 'width': int_or_none(resource.get('width')), 'height': int_or_none(resource.get('height')), }) - self._sort_formats(formats) attrs = {} for attribute in video['attributes']: diff --git a/yt_dlp/extractor/sexu.py b/yt_dlp/extractor/sexu.py index 000f7e166..3117f81e3 100644 --- a/yt_dlp/extractor/sexu.py +++ b/yt_dlp/extractor/sexu.py @@ -34,7 +34,6 @@ class SexuIE(InfoExtractor): r'^(\d+)[pP]', source.get('label', ''), 'height', default=None)), } for source in sources if source.get('file')] - self._sort_formats(formats) title = self._html_search_regex( r'([^<]+)\s*-\s*Sexu\.Com', webpage, 'title') diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py index 05642a116..79e888583 100644 --- a/yt_dlp/extractor/seznamzpravy.py +++ b/yt_dlp/extractor/seznamzpravy.py @@ -93,7 +93,6 @@ class SeznamZpravyIE(InfoExtractor): urljoin(sdn_url, hls_rel_url), video_id, ext='mp4', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) return formats def _real_extract(self, url): diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index 53ca86b73..26a0bff40 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -118,7 +118,6 @@ class ShahidIE(ShahidBaseIE): # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html r'aws\.manifestfilter=[\w:;,-]+&?', '', playout['url']), video_id, 'mp4') - self._sort_formats(formats) # video = self._call_api( # 'product/id', video_id, { diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index c0780abe2..7a78c6e05 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -74,7 +74,6 @@ class ShemarooMeIE(InfoExtractor): iv = [0] * 16 m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii') formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']}) - self._sort_formats(formats) release_date = self._html_search_regex( (r'itemprop="uploadDate">\s*([\d-]+)', r'id="release_date" value="([\d-]+)'), diff --git a/yt_dlp/extractor/showroomlive.py b/yt_dlp/extractor/showroomlive.py index cd681a035..ab1895311 100644 --- a/yt_dlp/extractor/showroomlive.py +++ b/yt_dlp/extractor/showroomlive.py @@ -66,7 +66,6 @@ class ShowRoomLiveIE(InfoExtractor): 'format_note': stream.get('label'), 'quality': int_or_none(stream.get('quality', 100)), }) - self._sort_formats(formats) return { 'id': compat_str(room.get('live_id') or broadcaster_id), diff --git a/yt_dlp/extractor/sina.py b/yt_dlp/extractor/sina.py index d30d57d85..aeba4e377 100644 --- a/yt_dlp/extractor/sina.py +++ b/yt_dlp/extractor/sina.py @@ -97,7 +97,6 @@ class SinaIE(InfoExtractor): 'quality': preference(quality_id), 'ext': 'mp4', }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py index b7b7d7d7f..a6fb6c1f5 100644 --- a/yt_dlp/extractor/sixplay.py +++ b/yt_dlp/extractor/sixplay.py @@ -104,7 +104,6 @@ class SixPlayIE(InfoExtractor): 'quality': quality_key(quality), 'ext': ext, }) - self._sort_formats(formats) def get(getter): for src in (data, clip_data): diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index 9e4d7d35d..42d30f7c4 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -42,7 +42,6 @@ class SkyItPlayerIE(InfoExtractor): self.raise_geo_restricted(countries=['IT']) formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index 87d0fec32..9a60a79e7 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -85,7 +85,6 @@ class SlidesLiveIE(InfoExtractor): formats.extend(self._extract_mpd_formats( _MANIFEST_PATTERN % (service_id, 'mpd'), service_id, mpd_id='dash', fatal=False)) - self._sort_formats(formats) info.update({ 'id': service_id, 'formats': formats, diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py index c3a135955..a8f1e4623 100644 --- a/yt_dlp/extractor/sohu.py +++ b/yt_dlp/extractor/sohu.py @@ -176,7 +176,6 @@ class SohuIE(InfoExtractor): 'height': int_or_none(data.get('height')), 'fps': int_or_none(data.get('fps')), }) - self._sort_formats(formats) playlist.append({ 'id': '%s_part%d' % (video_id, i + 1), diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py index 17d28478f..aaad420f1 100644 --- a/yt_dlp/extractor/sonyliv.py +++ b/yt_dlp/extractor/sonyliv.py @@ -150,7 +150,6 @@ class SonyLIVIE(InfoExtractor): video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False)) for f in formats: f.setdefault('http_headers', {}).update(headers) - self._sort_formats(formats) metadata = self._call_api( '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata'] diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 228e19c3e..4879d48c8 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -296,7 +296,6 @@ class SoundcloudBaseIE(InfoExtractor): if not formats and info.get('policy') == 'BLOCK': self.raise_geo_restricted(metadata_available=True) - self._sort_formats(formats) user = info.get('user') or {} diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index f1243cc49..453016ccb 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -104,7 +104,6 @@ class SovietsClosetIE(SovietsClosetBaseIE): thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url') m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER) - self._sort_formats(m3u8_formats) if not m3u8_formats: duration = None diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index 1aa8eaba1..f242d334c 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -128,8 +128,6 @@ class SpankBangIE(InfoExtractor): format_url = format_url[0] extract_format(format_id, format_url) - self._sort_formats(formats) - info = self._search_json_ld(webpage, video_id, default={}) title = self._html_search_regex( diff --git a/yt_dlp/extractor/spankwire.py b/yt_dlp/extractor/spankwire.py index d1990e4de..334b29773 100644 --- a/yt_dlp/extractor/spankwire.py +++ b/yt_dlp/extractor/spankwire.py @@ -101,7 +101,6 @@ class SpankwireIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) view_count = str_to_int(video.get('viewed')) diff --git a/yt_dlp/extractor/sport5.py b/yt_dlp/extractor/sport5.py index f4ac98b6e..44b4067de 100644 --- a/yt_dlp/extractor/sport5.py +++ b/yt_dlp/extractor/sport5.py @@ -74,7 +74,6 @@ class Sport5IE(InfoExtractor): 'width': int(fmt.get('width')), 'height': int(fmt.get('height')), } for fmt in metadata.findall('./PlaybackLinks/FileURL')] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/sportbox.py b/yt_dlp/extractor/sportbox.py index 622a81b47..ccbb0e8cc 100644 --- a/yt_dlp/extractor/sportbox.py +++ b/yt_dlp/extractor/sportbox.py @@ -65,7 +65,6 @@ class SportBoxIE(InfoExtractor): formats.append({ 'url': src, }) - self._sort_formats(formats) player = self._parse_json( self._search_regex( diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py index 539a64209..a98584a27 100644 --- a/yt_dlp/extractor/springboardplatform.py +++ b/yt_dlp/extractor/springboardplatform.py @@ -102,8 +102,6 @@ class SpringboardPlatformIE(InfoExtractor): }) formats.append(m3u8_format) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/srgssr.py b/yt_dlp/extractor/srgssr.py index 6dd312985..145f25e9f 100644 --- a/yt_dlp/extractor/srgssr.py +++ b/yt_dlp/extractor/srgssr.py @@ -128,7 +128,6 @@ class SRGSSRIE(InfoExtractor): 'url': podcast_url, 'quality': q(quality), }) - self._sort_formats(formats) if media_type == 'video': for sub in (media_data.get('subtitleList') or []): diff --git a/yt_dlp/extractor/startrek.py b/yt_dlp/extractor/startrek.py index ee03f7837..e92122f9b 100644 --- a/yt_dlp/extractor/startrek.py +++ b/yt_dlp/extractor/startrek.py @@ -49,7 +49,6 @@ class StarTrekIE(InfoExtractor): hls = self._html_search_regex(r'\bdata-hls\s*=\s*"([^"]+)"', player, 'HLS URL') formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls, video_id, 'mp4') - self._sort_formats(formats) captions = self._html_search_regex( r'\bdata-captions-url\s*=\s*"([^"]+)"', player, 'captions URL', fatal=False) diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py index eea20ff85..7daee2fe0 100644 --- a/yt_dlp/extractor/steam.py +++ b/yt_dlp/extractor/steam.py @@ -109,7 +109,6 @@ class SteamIE(InfoExtractor): 'format_id': ext + quality, 'url': video_url, }) - self._sort_formats(formats) entry['formats'] = formats entries.append(entry) embedded_videos = re.findall(r'(]+>)', webpage) @@ -163,7 +162,6 @@ class SteamCommunityBroadcastIE(InfoExtractor): 'https://steamcommunity.com/actions/ajaxresolveusers', video_id, query={'steamids': video_id})[0] - self._sort_formats(formats) return { 'id': video_id, 'title': self._generic_title('', webpage), diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index 3e60479ad..462861e0e 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -89,7 +89,6 @@ class StreamableIE(InfoExtractor): 'vcodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['video_codec_name'])).get('vcodec'), 'acodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['audio_codec_name'])).get('acodec'), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/streamanity.py b/yt_dlp/extractor/streamanity.py index f8c37c0dd..6eaee52d9 100644 --- a/yt_dlp/extractor/streamanity.py +++ b/yt_dlp/extractor/streamanity.py @@ -35,7 +35,6 @@ class StreamanityIE(InfoExtractor): formats = self._extract_m3u8_formats( f'https://stream.mux.com/{video_info["play_id"]}.m3u8?token={video_info["token"]}', video_id, ext='mp4', m3u8_id='hls') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py index 849a9882d..c4537ba8d 100644 --- a/yt_dlp/extractor/streamcz.py +++ b/yt_dlp/extractor/streamcz.py @@ -109,7 +109,6 @@ class StreamCZIE(InfoExtractor): }) formats = list(self._extract_formats(spl_url, video)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index d04aa1db0..4229a0bf1 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -51,8 +51,6 @@ class StripchatIE(InfoExtractor): if not formats: self.raise_no_formats('No active streams found', expected=True) - self._sort_formats(formats) - return { 'id': video_id, 'title': video_id, diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index 787b9f70d..fa3826388 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -88,7 +88,6 @@ class SubstackIE(InfoExtractor): else: self.raise_no_formats(f'Page type "{post_type}" is not supported') - self._sort_formats(formats) return { 'id': str(webpage_info['post']['id']), 'formats': formats, diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py index 19498701c..708873a95 100644 --- a/yt_dlp/extractor/sunporno.py +++ b/yt_dlp/extractor/sunporno.py @@ -61,7 +61,6 @@ class SunPornoIE(InfoExtractor): 'format_id': video_ext, 'quality': quality(video_ext), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/sverigesradio.py b/yt_dlp/extractor/sverigesradio.py index 4a4b5cf7e..65da615d0 100644 --- a/yt_dlp/extractor/sverigesradio.py +++ b/yt_dlp/extractor/sverigesradio.py @@ -58,7 +58,6 @@ class SverigesRadioBaseIE(InfoExtractor): 'vcodec': 'none', 'url': audio_url, }) - self._sort_formats(formats) return { 'id': audio_id, diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py index b422b6d93..31bf7f97e 100644 --- a/yt_dlp/extractor/svt.py +++ b/yt_dlp/extractor/svt.py @@ -51,7 +51,6 @@ class SVTBaseIE(InfoExtractor): self.raise_geo_restricted( 'This video is only available in Sweden', countries=self._GEO_COUNTRIES, metadata_available=True) - self._sort_formats(formats) subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences')) if isinstance(subtitle_references, list): diff --git a/yt_dlp/extractor/swrmediathek.py b/yt_dlp/extractor/swrmediathek.py index deebdd1a4..38bdfced7 100644 --- a/yt_dlp/extractor/swrmediathek.py +++ b/yt_dlp/extractor/swrmediathek.py @@ -92,7 +92,6 @@ class SWRMediathekIE(InfoExtractor): 'vcodec': codec if media_type == 'Video' else 'none', 'acodec': codec if media_type == 'Audio' else None, }) - self._sort_formats(formats) upload_date = None entry_pdatet = attr.get('entry_pdatet') diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py index 9b9513f07..ea0532c24 100644 --- a/yt_dlp/extractor/tagesschau.py +++ b/yt_dlp/extractor/tagesschau.py @@ -139,8 +139,6 @@ class TagesschauIE(InfoExtractor): timestamp = video_info.get('timestamp') title = title or video_info.get('description') - self._sort_formats(formats) - return { 'id': display_id, 'title': title, diff --git a/yt_dlp/extractor/tass.py b/yt_dlp/extractor/tass.py index d20dacfc1..67e544a6a 100644 --- a/yt_dlp/extractor/tass.py +++ b/yt_dlp/extractor/tass.py @@ -48,7 +48,6 @@ class TassIE(InfoExtractor): 'format_id': label, 'quality': quality(label), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py index 2bf836abd..c3eec2784 100644 --- a/yt_dlp/extractor/teachertube.py +++ b/yt_dlp/extractor/teachertube.py @@ -73,8 +73,6 @@ class TeacherTubeIE(InfoExtractor): } for media_url in set(media_urls) ] - self._sort_formats(formats) - thumbnail = self._og_search_thumbnail( webpage, default=None) or self._html_search_meta( 'thumbnail', webpage) diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py index 840702ed9..a822b676f 100644 --- a/yt_dlp/extractor/teamcoco.py +++ b/yt_dlp/extractor/teamcoco.py @@ -196,7 +196,6 @@ class TeamcocoIE(TurnerBaseIE): 'format_id': format_id, 'quality': get_quality(format_id), }) - self._sort_formats(formats) info['formats'] = formats return info diff --git a/yt_dlp/extractor/ted.py b/yt_dlp/extractor/ted.py index 0e09ec757..c28a15498 100644 --- a/yt_dlp/extractor/ted.py +++ b/yt_dlp/extractor/ted.py @@ -125,8 +125,6 @@ class TedTalkIE(TedBaseIE): ext_url = external.get('code') if service.lower() == 'youtube' else None return self.url_result(ext_url or external['uri']) - self._sort_formats(formats) - thumbnail = playerData.get('thumb') or self._og_search_property('image', webpage) if thumbnail: # trim thumbnail resize parameters diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py index 8e35bc85f..212af3785 100644 --- a/yt_dlp/extractor/tele13.py +++ b/yt_dlp/extractor/tele13.py @@ -71,7 +71,6 @@ class Tele13IE(InfoExtractor): 'ext': ext, }) urls.append(format_url) - self._sort_formats(formats) return { 'id': display_id, diff --git a/yt_dlp/extractor/telebruxelles.py b/yt_dlp/extractor/telebruxelles.py index 8d87b6ec1..2c50a67e9 100644 --- a/yt_dlp/extractor/telebruxelles.py +++ b/yt_dlp/extractor/telebruxelles.py @@ -59,7 +59,6 @@ class TeleBruxellesIE(InfoExtractor): rtmp_url = re.sub(r'^rmtp', 'rtmp', rtmp_url) rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) formats = self._extract_wowza_formats(rtmp_url, article_id or display_id) - self._sort_formats(formats) is_live = 'stream/live' in rtmp_url diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index a9c0755f4..20bb82420 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -102,7 +102,6 @@ class TelecincoIE(InfoExtractor): }).encode(), headers=headers)['tokens']['1']['cdn'] formats = self._extract_m3u8_formats( stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/telegraaf.py b/yt_dlp/extractor/telegraaf.py index 6562d122c..13e9515f8 100644 --- a/yt_dlp/extractor/telegraaf.py +++ b/yt_dlp/extractor/telegraaf.py @@ -75,8 +75,6 @@ class TelegraafIE(InfoExtractor): 'format_id': 'http' + ('-%s' % label if label else ''), }) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py index 39f1a628a..5ec54857d 100644 --- a/yt_dlp/extractor/telegram.py +++ b/yt_dlp/extractor/telegram.py @@ -113,7 +113,6 @@ class TelegramEmbedIE(InfoExtractor): 'url': video_url, 'ext': 'mp4', }] - self._sort_formats(formats) videos.append({ 'id': url_basename(webpage_url), 'webpage_url': update_url_query(webpage_url, {'single': True}), diff --git a/yt_dlp/extractor/telemb.py b/yt_dlp/extractor/telemb.py index 7e444c0d0..3d29dace3 100644 --- a/yt_dlp/extractor/telemb.py +++ b/yt_dlp/extractor/telemb.py @@ -57,7 +57,6 @@ class TeleMBIE(InfoExtractor): 'preference': -10, }) formats.append(fmt) - self._sort_formats(formats) title = remove_start(self._og_search_title(webpage), 'TéléMB : ') description = self._html_search_regex( diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py index 64954b8f1..88f29cb83 100644 --- a/yt_dlp/extractor/telemundo.py +++ b/yt_dlp/extractor/telemundo.py @@ -40,7 +40,6 @@ class TelemundoIE(InfoExtractor): redirect_url + '?format=redirect&manifest=m3u&format=redirect&Tracking=true&Embedded=true&formats=MPEG4'), video_id, 'Processing m3u8').geturl() formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') - self._sort_formats(formats) date = unified_timestamp(try_get( metadata, lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['datePublished'].split(' ', 1)[1])) return { diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index 61f300fa4..ff8bf991e 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -116,7 +116,6 @@ class TencentBaseIE(InfoExtractor): formats.extend(fmts) self._merge_subtitles(subs, native_subtitles, target=subtitles) - self._sort_formats(formats) return formats, subtitles def _get_clean_title(self, title): diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py index 47cb0965e..bc64226bf 100644 --- a/yt_dlp/extractor/tennistv.py +++ b/yt_dlp/extractor/tennistv.py @@ -138,8 +138,6 @@ class TennisTVIE(InfoExtractor): formats, subtitles = self._extract_m3u8_formats_and_subtitles( self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id) - self._sort_formats(formats) - return { 'id': video_id, 'title': self._generic_title('', webpage), diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index fc4781447..633032e31 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -98,7 +98,6 @@ class TenPlayIE(InfoExtractor): if '10play-not-in-oz' in m3u8_url: self.raise_geo_restricted(countries=['AU']) formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4') - self._sort_formats(formats) return { 'formats': formats, diff --git a/yt_dlp/extractor/theholetv.py b/yt_dlp/extractor/theholetv.py index f0a096d41..a13f83bff 100644 --- a/yt_dlp/extractor/theholetv.py +++ b/yt_dlp/extractor/theholetv.py @@ -24,7 +24,6 @@ class TheHoleTvIE(InfoExtractor): r'(]*\bdata-controller="player"[^>]*>)', webpage, 'video player')) formats, subtitles = self._extract_m3u8_formats_and_subtitles( player_attrs['data-player-source-value'], video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index c8026d294..e659b8ee1 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -296,7 +296,6 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): smil_url = self._sign_url(smil_url, sig['key'], sig['secret']) formats, subtitles = self._extract_theplatform_smil(smil_url, video_id) - self._sort_formats(formats) ret = self._extract_theplatform_metadata(path, video_id) combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles) @@ -366,8 +365,6 @@ class ThePlatformFeedIE(ThePlatformBaseIE): formats.extend(cur_formats) subtitles = self._merge_subtitles(subtitles, cur_subtitles) - self._sort_formats(formats) - thumbnails = [{ 'url': thumbnail['plfile$url'], 'width': int_or_none(thumbnail.get('plfile$width')), diff --git a/yt_dlp/extractor/theta.py b/yt_dlp/extractor/theta.py index 3ec6b9711..ecf0ea091 100644 --- a/yt_dlp/extractor/theta.py +++ b/yt_dlp/extractor/theta.py @@ -41,7 +41,6 @@ class ThetaStreamIE(InfoExtractor): if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source')) formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True) - self._sort_formats(formats) channel = try_get(info, lambda x: x['user']['username']) # using this field instead of channel_id due to capitalization @@ -78,7 +77,6 @@ class ThetaVideoIE(InfoExtractor): m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url']) formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/theweatherchannel.py b/yt_dlp/extractor/theweatherchannel.py index 4f6d2ecba..682e4335d 100644 --- a/yt_dlp/extractor/theweatherchannel.py +++ b/yt_dlp/extractor/theweatherchannel.py @@ -79,7 +79,6 @@ class TheWeatherChannelIE(ThePlatformIE): # XXX: Do not subclass from concrete 'url': variant_url, 'format_id': variant_id, }) - self._sort_formats(formats) cc_url = video_data.get('cc_url') diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py index a313a8dfb..b1041902b 100644 --- a/yt_dlp/extractor/threeqsdn.py +++ b/yt_dlp/extractor/threeqsdn.py @@ -128,10 +128,6 @@ class ThreeQSDNIE(InfoExtractor): 'vcodec': 'none' if height == 0 else None, 'width': int(height * aspect) if height and aspect else None, }) - # It seems like this would be correctly handled by default - # However, unless someone can confirm this, the old - # behaviour is being kept as-is - self._sort_formats(formats, ('res', 'source_preference')) for subtitle in (config.get('subtitles') or []): src = subtitle.get('src') @@ -153,4 +149,8 @@ class ThreeQSDNIE(InfoExtractor): 'is_live': live, 'formats': formats, 'subtitles': subtitles, + # It seems like this would be correctly handled by default + # However, unless someone can confirm this, the old + # behaviour is being kept as-is + '_format_sort_fields': ('res', 'source_preference') } diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py index ce28a37c0..dbd509087 100644 --- a/yt_dlp/extractor/threespeak.py +++ b/yt_dlp/extractor/threespeak.py @@ -57,7 +57,6 @@ class ThreeSpeakIE(InfoExtractor): 'quality': 11, 'format_note': 'Original file', }) - self._sort_formats(formats) return { 'id': id, 'title': data_json.get('title') or data_json.get('root_title'), diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 79a223861..0ca6f5afd 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -233,7 +233,6 @@ class TikTokBaseIE(InfoExtractor): if auth_cookie: for f in formats: self._set_cookie(compat_urllib_parse_urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value) - self._sort_formats(formats, ('quality', 'codec', 'size', 'br')) thumbnails = [] for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak', @@ -291,7 +290,8 @@ class TikTokBaseIE(InfoExtractor): 'availability': self._availability( is_private='Private' in labels, needs_subscription='Friends only' in labels, - is_unlisted='Followers only' in labels) + is_unlisted='Followers only' in labels), + '_format_sort_fields': ('quality', 'codec', 'size', 'br'), } def _parse_aweme_video_web(self, aweme_detail, webpage_url): @@ -333,7 +333,6 @@ class TikTokBaseIE(InfoExtractor): 'height': height, }) self._remove_duplicate_formats(formats) - self._sort_formats(formats) thumbnails = [] for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'): diff --git a/yt_dlp/extractor/tnaflix.py b/yt_dlp/extractor/tnaflix.py index eceaadb30..4482c8474 100644 --- a/yt_dlp/extractor/tnaflix.py +++ b/yt_dlp/extractor/tnaflix.py @@ -162,7 +162,6 @@ class TNAFlixNetworkBaseIE(InfoExtractor): def extract_field(pattern, name): return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None - self._sort_formats(formats) return { 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/toggle.py b/yt_dlp/extractor/toggle.py index 51a51d84b..70737337c 100644 --- a/yt_dlp/extractor/toggle.py +++ b/yt_dlp/extractor/toggle.py @@ -154,7 +154,6 @@ class ToggleIE(InfoExtractor): and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'): self.report_drm(video_id) # Most likely because geo-blocked if no formats and no DRM - self._sort_formats(formats) thumbnails = [] for picture in info.get('Pictures', []): diff --git a/yt_dlp/extractor/tokentube.py b/yt_dlp/extractor/tokentube.py index a30cabb3c..d022e2753 100644 --- a/yt_dlp/extractor/tokentube.py +++ b/yt_dlp/extractor/tokentube.py @@ -95,8 +95,6 @@ class TokentubeIE(InfoExtractor): description = remove_end(description, 'Category') - self._sort_formats(formats) - return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py index 2d633ca67..acd9e68d2 100644 --- a/yt_dlp/extractor/triller.py +++ b/yt_dlp/extractor/triller.py @@ -114,7 +114,6 @@ class TrillerBaseIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) comment_count = int_or_none(video_info.get('comment_count')) diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index b7aa74060..545a67275 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -95,7 +95,6 @@ class TrovoIE(TrovoBaseIE): 'tbr': stream_info.get('bitrate'), 'http_headers': self._HEADERS, }) - self._sort_formats(formats) info = { 'id': program_id, @@ -222,7 +221,6 @@ class TrovoVodIE(TrovoBaseIE): 'url': play_url, 'http_headers': self._HEADERS, }) - self._sort_formats(formats) category = vod_info.get('categoryName') get_count = lambda x: int_or_none(vod_info.get(x + 'Num')) diff --git a/yt_dlp/extractor/tubetugraz.py b/yt_dlp/extractor/tubetugraz.py index 89371b6eb..ebabedc9c 100644 --- a/yt_dlp/extractor/tubetugraz.py +++ b/yt_dlp/extractor/tubetugraz.py @@ -37,7 +37,6 @@ class TubeTuGrazBaseIE(InfoExtractor): id = episode_info.get('id') formats = list(self._extract_formats( traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id)) - self._sort_formats(formats) title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle') series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle')) diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index f5ed950be..de8b5da69 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -103,8 +103,6 @@ class TubiTvIE(InfoExtractor): elif not formats and not video_data.get('policy_match'): # policy_match is False if content was removed raise ExtractorError('This content is currently unavailable', expected=True) - self._sort_formats(formats) - thumbnails = [] for thumbnail_url in video_data.get('thumbnails', []): if not thumbnail_url: diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index 5d6615100..88d4ae32d 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -358,7 +358,6 @@ class TumblrIE(InfoExtractor): 'height': int_or_none( media_json.get('height') or self._og_search_property('video:height', webpage, default=None)), }] - self._sort_formats(formats) # the url we're extracting from might be an original post or it might be a reblog. # if it's a reblog, og:description will be the reblogger's comment, not the uploader's. diff --git a/yt_dlp/extractor/tunein.py b/yt_dlp/extractor/tunein.py index f163eaf09..43b4f673c 100644 --- a/yt_dlp/extractor/tunein.py +++ b/yt_dlp/extractor/tunein.py @@ -49,7 +49,6 @@ class TuneInBaseIE(InfoExtractor): 'source_preference': reliability, 'format_note': format_note, }) - self._sort_formats(formats) return { 'id': content_id, diff --git a/yt_dlp/extractor/tunepk.py b/yt_dlp/extractor/tunepk.py index 2973d15ec..e4e507b00 100644 --- a/yt_dlp/extractor/tunepk.py +++ b/yt_dlp/extractor/tunepk.py @@ -57,7 +57,6 @@ class TunePkIE(InfoExtractor): formats = self._parse_jwplayer_formats( details['player']['sources'], video_id) - self._sort_formats(formats) description = self._og_search_description( webpage, default=None) or self._html_search_meta( diff --git a/yt_dlp/extractor/turbo.py b/yt_dlp/extractor/turbo.py index e3f8941c4..cdb7dcff8 100644 --- a/yt_dlp/extractor/turbo.py +++ b/yt_dlp/extractor/turbo.py @@ -53,7 +53,6 @@ class TurboIE(InfoExtractor): 'url': child.text, 'quality': get_quality(quality), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index fae8b51e7..630d84bdc 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -174,7 +174,6 @@ class TurnerBaseIE(AdobePassIE): else: f['tbr'] = int(mobj.group(1)) formats.append(f) - self._sort_formats(formats) for source in video_data.findall('closedCaptions/source'): for track in source.findall('track'): @@ -249,7 +248,6 @@ class TurnerBaseIE(AdobePassIE): 'start_time': start_time, 'end_time': start_time + chapter_duration, }) - self._sort_formats(formats) return { 'formats': formats, diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index 0024f7241..c51e63371 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -95,7 +95,6 @@ class TV2IE(InfoExtractor): }) if not formats and data.get('drmProtected'): self.report_drm(video_id) - self._sort_formats(formats) thumbnails = [{ 'id': type, @@ -258,7 +257,6 @@ class KatsomoIE(InfoExtractor): }) if not formats and data.get('drmProtected'): self.report_drm(video_id) - self._sort_formats(formats) thumbnails = [{ 'id': thumbnail.get('@type'), diff --git a/yt_dlp/extractor/tv24ua.py b/yt_dlp/extractor/tv24ua.py index 8d2475296..89905acdb 100644 --- a/yt_dlp/extractor/tv24ua.py +++ b/yt_dlp/extractor/tv24ua.py @@ -68,7 +68,6 @@ class TV24UAVideoIE(InfoExtractor): self._search_json( r'var\s*vPlayConfig\s*=\s*', webpage, 'thumbnail', video_id, default=None, transform_source=js_to_json), 'poster') - self._sort_formats(formats) return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/tv2dk.py b/yt_dlp/extractor/tv2dk.py index 0af286312..35e92f10c 100644 --- a/yt_dlp/extractor/tv2dk.py +++ b/yt_dlp/extractor/tv2dk.py @@ -164,7 +164,6 @@ class TV2DKBornholmPlayIE(InfoExtractor): formats.append({ 'url': src, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py index 6ac07716b..d4c21c046 100644 --- a/yt_dlp/extractor/tv2hu.py +++ b/yt_dlp/extractor/tv2hu.py @@ -66,7 +66,6 @@ class TV2HuIE(InfoExtractor): video_json = self._download_json(video_json_url, video_id) m3u8_url = self._proto_relative_url(traverse_obj(video_json, ('bitrates', 'hls'))) formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/tv4.py b/yt_dlp/extractor/tv4.py index e8cdd5c8c..1378a6f57 100644 --- a/yt_dlp/extractor/tv4.py +++ b/yt_dlp/extractor/tv4.py @@ -119,8 +119,6 @@ class TV4IE(InfoExtractor): if not formats and info.get('is_geo_restricted'): self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py index d449cdc04..bd0be784d 100644 --- a/yt_dlp/extractor/tv5mondeplus.py +++ b/yt_dlp/extractor/tv5mondeplus.py @@ -77,7 +77,6 @@ class TV5MondePlusIE(InfoExtractor): 'url': v_url, 'format_id': video_format, }) - self._sort_formats(formats) metadata = self._parse_json( vpl_data['data-metadata'], display_id) diff --git a/yt_dlp/extractor/tvc.py b/yt_dlp/extractor/tvc.py index 1ef64caf9..caa76ab6f 100644 --- a/yt_dlp/extractor/tvc.py +++ b/yt_dlp/extractor/tvc.py @@ -41,7 +41,6 @@ class TVCIE(InfoExtractor): 'height': int_or_none(info.get('height')), 'tbr': int_or_none(info.get('bitrate')), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/tvigle.py b/yt_dlp/extractor/tvigle.py index 9a7cb7214..6c982193d 100644 --- a/yt_dlp/extractor/tvigle.py +++ b/yt_dlp/extractor/tvigle.py @@ -120,7 +120,6 @@ class TvigleIE(InfoExtractor): 'height': int_or_none(height), 'filesize': filesize, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py index 22b605823..9c777c17d 100644 --- a/yt_dlp/extractor/tvn24.py +++ b/yt_dlp/extractor/tvn24.py @@ -70,7 +70,6 @@ class TVN24IE(InfoExtractor): 'format_id': format_id, 'height': int_or_none(format_id.rstrip('p')), }) - self._sort_formats(formats) description = self._og_search_description(webpage, default=None) thumbnail = self._og_search_thumbnail( diff --git a/yt_dlp/extractor/tvnet.py b/yt_dlp/extractor/tvnet.py index 5820bb4a7..77426f7e6 100644 --- a/yt_dlp/extractor/tvnet.py +++ b/yt_dlp/extractor/tvnet.py @@ -109,7 +109,6 @@ class TVNetIE(InfoExtractor): stream_urls.add(stream_url) formats.extend(self._extract_m3u8_formats( stream_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)) - self._sort_formats(formats) # better support for radio streams if title.startswith('VOV'): diff --git a/yt_dlp/extractor/tvnow.py b/yt_dlp/extractor/tvnow.py index 24add5260..0acc306df 100644 --- a/yt_dlp/extractor/tvnow.py +++ b/yt_dlp/extractor/tvnow.py @@ -74,7 +74,6 @@ class TVNowBaseIE(InfoExtractor): if not info.get('free', True): raise ExtractorError( 'Video %s is not available for free' % video_id, expected=True) - self._sort_formats(formats) description = info.get('articleLong') or info.get('articleShort') timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') @@ -392,7 +391,6 @@ class TVNowIE(TVNowNewBaseIE): if not info.get('free', True): raise ExtractorError( 'Video %s is not available for free' % video_id, expected=True) - self._sort_formats(formats) description = source.get('description') thumbnail = url_or_none(source.get('poster')) diff --git a/yt_dlp/extractor/tvopengr.py b/yt_dlp/extractor/tvopengr.py index d8be12c96..e208e57f2 100644 --- a/yt_dlp/extractor/tvopengr.py +++ b/yt_dlp/extractor/tvopengr.py @@ -69,7 +69,6 @@ class TVOpenGrWatchIE(TVOpenGrBaseIE): continue formats.extend(formats_) self._merge_subtitles(subs_, target=subs) - self._sort_formats(formats) return formats, subs def _real_extract(self, url): diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index c83b99762..8483564f7 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -433,8 +433,6 @@ class TVPEmbedIE(InfoExtractor): 'height': int_or_none(traverse_obj(file, ('quality', 'height'))), }) - self._sort_formats(formats) - title = dict_get(info, ('subtitle', 'title', 'seoTitle')) description = dict_get(info, ('description', 'seoDescription')) thumbnails = [] diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index f815b5137..9ef4f962c 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -294,8 +294,6 @@ class TVPlayIE(InfoExtractor): 'This content might not be available in your country due to copyright reasons', metadata_available=True) - self._sort_formats(formats) - # TODO: webvtt in m3u8 subtitles = {} sami_path = video.get('sami_path') @@ -410,7 +408,6 @@ class ViafreeIE(InfoExtractor): raise formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_href, guid, 'mp4') - self._sort_formats(formats) episode = program.get('episode') or {} return { 'id': guid, @@ -495,7 +492,6 @@ class TVPlayHomeIE(InfoExtractor): urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles( stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - self._sort_formats(formats) thumbnails = set(traverse_obj( data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none)) diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py index 31d70b6b8..b05355f87 100644 --- a/yt_dlp/extractor/tvplayer.py +++ b/yt_dlp/extractor/tvplayer.py @@ -72,7 +72,6 @@ class TVPlayerIE(InfoExtractor): raise formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4') - self._sort_formats(formats) return { 'id': resource_id, diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py index 6d1f92bbb..e8e1fc666 100644 --- a/yt_dlp/extractor/tweakers.py +++ b/yt_dlp/extractor/tweakers.py @@ -47,7 +47,6 @@ class TweakersIE(InfoExtractor): 'height': height, 'ext': ext, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/twentymin.py b/yt_dlp/extractor/twentymin.py index f33f15914..74f90b00b 100644 --- a/yt_dlp/extractor/twentymin.py +++ b/yt_dlp/extractor/twentymin.py @@ -57,7 +57,6 @@ class TwentyMinutenIE(InfoExtractor): 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p), 'quality': quality, } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])] - self._sort_formats(formats) description = video.get('lead') thumbnail = video.get('thumbnail') diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 9046f994d..735cb0bb0 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -186,15 +186,13 @@ class TwitCastingIE(InfoExtractor): 'protocol': 'websocket_frag', }) - self._sort_formats(formats, ('source',)) - infodict = { - 'formats': formats + 'formats': formats, + '_format_sort_fields': ('source', ), } elif len(m3u8_urls) == 1: formats = self._extract_m3u8_formats( m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS) - self._sort_formats(formats) infodict = { # No problem here since there's only one manifest 'formats': formats, diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 975e09c30..c59d1cf17 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -133,7 +133,6 @@ class TwitchBaseIE(InfoExtractor): 'quality': 10, 'format_note': 'Source', }) - self._sort_formats(formats) def _download_base_gql(self, video_id, ops, note, fatal=True): headers = { @@ -1144,7 +1143,6 @@ class TwitchClipsIE(TwitchBaseIE): 'height': int_or_none(option.get('quality')), 'fps': int_or_none(option.get('frameRate')), }) - self._sort_formats(formats) thumbnails = [] for thumbnail_id in ('tiny', 'small', 'medium'): diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 48c14ddce..3c81473dc 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -876,7 +876,6 @@ class TwitterIE(TwitterBaseIE): fmts, subs = self._extract_variant_formats(variant, twid) subtitles = self._merge_subtitles(subtitles, subs) formats.extend(fmts) - self._sort_formats(formats, ('res', 'br', 'size', 'proto')) # The codec of http formats are unknown thumbnails = [] media_url = media.get('media_url_https') or media.get('media_url') @@ -898,6 +897,8 @@ class TwitterIE(TwitterBaseIE): 'subtitles': subtitles, 'thumbnails': thumbnails, 'duration': float_or_none(video_info.get('duration_millis'), 1000), + # The codec of http formats are unknown + '_format_sort_fields': ('res', 'br', 'size', 'proto'), } def extract_from_card_info(card): @@ -952,7 +953,6 @@ class TwitterIE(TwitterBaseIE): vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url') content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player')) formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid) - self._sort_formats(formats) thumbnails = [] for suffix in ('_small', '', '_large', '_x_large', '_original'): diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 2c8a35473..8b99c59cf 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -391,8 +391,6 @@ class UdemyIE(InfoExtractor): if f.get('url'): formats.append(f) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py index 9fdb46faf..10668ac4b 100644 --- a/yt_dlp/extractor/udn.py +++ b/yt_dlp/extractor/udn.py @@ -90,8 +90,6 @@ class UDNEmbedIE(InfoExtractor): }) formats.append(a_format) - self._sort_formats(formats) - return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/umg.py b/yt_dlp/extractor/umg.py index e6ed656b9..3ffcb7364 100644 --- a/yt_dlp/extractor/umg.py +++ b/yt_dlp/extractor/umg.py @@ -86,7 +86,6 @@ class UMGDeIE(InfoExtractor): if not formats: for format_id in (867, 836, 940): add_m3u8_format(format_id) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/unistra.py b/yt_dlp/extractor/unistra.py index 083c87209..6e872cd14 100644 --- a/yt_dlp/extractor/unistra.py +++ b/yt_dlp/extractor/unistra.py @@ -47,7 +47,6 @@ class UnistraIE(InfoExtractor): 'format_id': format_id, 'quality': quality(format_id) }) - self._sort_formats(formats) title = self._html_search_regex( r'UTV - (.*?)</', webpage, 'title') diff --git a/yt_dlp/extractor/uol.py b/yt_dlp/extractor/uol.py index e3d9127d8..068c2b87d 100644 --- a/yt_dlp/extractor/uol.py +++ b/yt_dlp/extractor/uol.py @@ -107,7 +107,6 @@ class UOLIE(InfoExtractor): 'url': f_url, 'quality': quality(format_id), }) - self._sort_formats(formats) tags = [] for tag in video_data.get('tags', []): diff --git a/yt_dlp/extractor/uplynk.py b/yt_dlp/extractor/uplynk.py index 9b560f719..87c427f63 100644 --- a/yt_dlp/extractor/uplynk.py +++ b/yt_dlp/extractor/uplynk.py @@ -33,7 +33,6 @@ class UplynkIE(InfoExtractor): if session_id: for f in formats: f['extra_param_to_segment_url'] = 'pbs=' + session_id - self._sort_formats(formats) asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id) if asset.get('error') == 1: raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True) diff --git a/yt_dlp/extractor/urort.py b/yt_dlp/extractor/urort.py index 3f687f737..debd2ba9e 100644 --- a/yt_dlp/extractor/urort.py +++ b/yt_dlp/extractor/urort.py @@ -40,7 +40,6 @@ class UrortIE(InfoExtractor): 'url': 'http://p3urort.blob.core.windows.net/tracks/%s' % f['FileRef'], 'quality': 3 if f['FileType'] == 'mp3' else 2, } for f in s['Files']] - self._sort_formats(formats) e = { 'id': '%d-%s' % (s['BandId'], s['$id']), 'title': s['Title'], diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index 30bd3dcbf..0f0d6592d 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -76,7 +76,6 @@ class URPlayIE(InfoExtractor): formats.extend(self._extract_wowza_formats( 'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['f4m', 'rtmp', 'rtsp'])) - self._sort_formats(formats) subtitles = {} diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index cb920bf13..5df241653 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -210,8 +210,6 @@ class UstreamIE(InfoExtractor): formats.extend(self._parse_segmented_mp4(dash_streams)) ''' - self._sort_formats(formats) - description = video.get('description') timestamp = int_or_none(video.get('created_at')) duration = float_or_none(video.get('length')) diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py index fd5dad0fc..c3aeeb961 100644 --- a/yt_dlp/extractor/ustudio.py +++ b/yt_dlp/extractor/ustudio.py @@ -39,7 +39,6 @@ class UstudioIE(InfoExtractor): } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')] formats = extract('video') - self._sort_formats(formats) webpage = self._download_webpage(url, display_id) @@ -98,7 +97,6 @@ class UstudioEmbedIE(InfoExtractor): 'width': int_or_none(quality.get('width')), 'height': height, }) - self._sort_formats(formats) thumbnails = [] for image in video_data.get('images', []): diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py index 1213ae1bf..90c10c051 100644 --- a/yt_dlp/extractor/utreon.py +++ b/yt_dlp/extractor/utreon.py @@ -68,7 +68,6 @@ class UtreonIE(InfoExtractor): 'format_id': format_key.split('_')[1], 'height': int(format_key.split('_')[1][:-1]), } for format_key, format_url in videos_json.items() if url_or_none(format_url)] - self._sort_formats(formats) thumbnail = url_or_none(dict_get(json_data, ('cover_image_url', 'preview_image_url'))) return { 'id': video_id, diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index 25d462a7d..ef44d421e 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -65,8 +65,6 @@ class VeoIE(InfoExtractor): 'vbr': int_or_none(fmt.get('bit_rate'), scale=1000), }) - self._sort_formats(formats) - return { 'id': video_id, 'title': str_or_none(metadata.get('title')), diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py index d9b3ab115..92ff86521 100644 --- a/yt_dlp/extractor/veoh.py +++ b/yt_dlp/extractor/veoh.py @@ -105,7 +105,6 @@ class VeohIE(InfoExtractor): 'quality': q(f_id), 'url': f_url, }) - self._sort_formats(formats) categories = metadata.get('categoryPath') if not categories: diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index a146be048..da4ce49ca 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -274,7 +274,6 @@ class VevoIE(VevoBaseIE): 'width': int(m.group('width')), 'height': int(m.group('height')), }) - self._sort_formats(formats) track = video_info['title'] if featured_artist: diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py index b637afddf..db338fa10 100644 --- a/yt_dlp/extractor/vgtv.py +++ b/yt_dlp/extractor/vgtv.py @@ -238,8 +238,6 @@ class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE raise self.raise_geo_restricted( countries=[host.rpartition('.')[-1].partition('/')[0].upper()]) - self._sort_formats(info['formats']) - info.update({ 'id': video_id, 'title': data['title'], diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py index f3ad56bf1..d1a3b48aa 100644 --- a/yt_dlp/extractor/vice.py +++ b/yt_dlp/extractor/vice.py @@ -150,7 +150,6 @@ class ViceIE(ViceBaseIE, AdobePassIE): video_data = preplay['video'] formats = self._extract_m3u8_formats( preplay['playURL'], video_id, 'mp4', 'm3u8_native') - self._sort_formats(formats) episode = video_data.get('episode') or {} channel = video_data.get('channel') or {} season = video_data.get('season') or {} diff --git a/yt_dlp/extractor/viddler.py b/yt_dlp/extractor/viddler.py index d81a31375..40914774a 100644 --- a/yt_dlp/extractor/viddler.py +++ b/yt_dlp/extractor/viddler.py @@ -116,7 +116,6 @@ class ViddlerIE(InfoExtractor): f['format_id'] = format_id + '-html5' f['source_preference'] = 0 formats.append(f) - self._sort_formats(formats) categories = [ t.get('text') for t in data.get('tags', []) if 'text' in t] diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index fa16da28b..52fa8fcec 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -167,7 +167,6 @@ class VideaIE(InfoExtractor): 'height': int_or_none(source.get('height')), }) formats.append(f) - self._sort_formats(formats) thumbnail = self._proto_relative_url(xpath_text(video, './poster_src')) diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py index 1aa84ea70..982ab3dd0 100644 --- a/yt_dlp/extractor/videocampus_sachsen.py +++ b/yt_dlp/extractor/videocampus_sachsen.py @@ -173,7 +173,6 @@ class VideocampusSachsenIE(InfoExtractor): raise formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'}) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/videomore.py b/yt_dlp/extractor/videomore.py index 2f81860bb..ddc33f7d7 100644 --- a/yt_dlp/extractor/videomore.py +++ b/yt_dlp/extractor/videomore.py @@ -181,7 +181,6 @@ class VideomoreIE(InfoExtractor): if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'): self.raise_geo_restricted(countries=['RU'], metadata_available=True) self.raise_no_formats(error, expected=True) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/videopress.py b/yt_dlp/extractor/videopress.py index 16965dfb0..0734aee9c 100644 --- a/yt_dlp/extractor/videopress.py +++ b/yt_dlp/extractor/videopress.py @@ -76,7 +76,6 @@ class VideoPressIE(InfoExtractor): 'width': int_or_none(video.get('width')), 'height': int_or_none(video.get('height')), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 8d3abceed..770aa284d 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -156,8 +156,6 @@ class VidioIE(VidioBaseIE): formats, subs = self._extract_m3u8_formats_and_subtitles( hls_url, display_id, 'mp4', 'm3u8_native') - self._sort_formats(formats) - get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {} channel = get_first('channel') user = get_first('user') @@ -293,7 +291,6 @@ class VidioLiveIE(VidioBaseIE): if stream_meta.get('stream_url'): formats.extend(self._extract_m3u8_formats( stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native')) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index 69a75304e..5933783ae 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -77,7 +77,6 @@ class VidLiiIE(InfoExtractor): 'format_id': f'{height}p', 'height': height, }) - self._sort_formats(formats) title = self._search_regex( (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage, diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index b630f9a6d..381260114 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -134,7 +134,6 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): 'url': sub_url, }) - self._sort_formats(formats) return { 'id': film_id, 'title': title, diff --git a/yt_dlp/extractor/viidea.py b/yt_dlp/extractor/viidea.py index 157ce4d8f..4cdf2677b 100644 --- a/yt_dlp/extractor/viidea.py +++ b/yt_dlp/extractor/viidea.py @@ -158,7 +158,6 @@ class ViideaIE(InfoExtractor): smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id) smil = self._download_smil(smil_url, lecture_id) info = self._parse_smil(smil, smil_url, lecture_id) - self._sort_formats(info['formats']) info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id) info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id) if multipart: diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py index a922b195c..3246dab52 100644 --- a/yt_dlp/extractor/viki.py +++ b/yt_dlp/extractor/viki.py @@ -263,7 +263,6 @@ class VikiIE(VikiBaseIE): # Modify the URL to get 1080p mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high') formats = self._extract_mpd_formats(mpd_url, video_id) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 1b21c0050..26fe566b0 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -123,11 +123,6 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) - def _vimeo_sort_formats(self, formats): - # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps - # at the same time without actual units specified. - self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source')) - def _parse_config(self, config, video_id): video_data = config['video'] video_title = video_data.get('title') @@ -242,6 +237,9 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'formats': formats, 'subtitles': subtitles, 'is_live': is_live, + # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps + # at the same time without actual units specified. + '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'), } def _extract_original_format(self, url, video_id, unlisted_hash=None): @@ -776,7 +774,6 @@ class VimeoIE(VimeoBaseInfoExtractor): }) info = self._parse_config(self._download_json( video['config_url'], video_id), video_id) - self._vimeo_sort_formats(info['formats']) get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) info.update({ 'description': video.get('description'), @@ -874,9 +871,7 @@ class VimeoIE(VimeoBaseInfoExtractor): if config.get('view') == 4: config = self._verify_player_video_password( redirect_url, video_id, headers) - info = self._parse_config(config, video_id) - self._vimeo_sort_formats(info['formats']) - return info + return self._parse_config(config, video_id) if re.search(r'<form[^>]+?id="pw_form"', webpage): video_password = self._get_video_password() @@ -981,7 +976,7 @@ class VimeoIE(VimeoBaseInfoExtractor): info_dict_config = self._parse_config(config, video_id) formats.extend(info_dict_config['formats']) - self._vimeo_sort_formats(formats) + info_dict['_format_sort_fields'] = info_dict_config['_format_sort_fields'] json_ld = self._search_json_ld(webpage, video_id, default={}) @@ -1326,7 +1321,6 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): page_url + '/action', video_id) if source_format: info_dict['formats'].append(source_format) - self._vimeo_sort_formats(info_dict['formats']) info_dict['description'] = clean_html(clip_data.get('description')) return info_dict @@ -1398,5 +1392,4 @@ class VHXEmbedIE(VimeoBaseInfoExtractor): config = self._download_json(config_url, video_id) info = self._parse_config(config, video_id) info['id'] = video_id - self._vimeo_sort_formats(info['formats']) return info diff --git a/yt_dlp/extractor/vimm.py b/yt_dlp/extractor/vimm.py index 3522b8e33..7097149a5 100644 --- a/yt_dlp/extractor/vimm.py +++ b/yt_dlp/extractor/vimm.py @@ -23,7 +23,6 @@ class VimmIE(InfoExtractor): formats, subs = self._extract_m3u8_formats_and_subtitles( f'https://www.vimm.tv/hls/{channel_id}.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True) - self._sort_formats(formats) return { 'id': channel_id, @@ -56,7 +55,6 @@ class VimmRecordingIE(InfoExtractor): formats, subs = self._extract_m3u8_formats_and_subtitles( f'https://d211qfrkztakg3.cloudfront.net/{channel_id}/{video_id}/index.m3u8', video_id, 'mp4', m3u8_id='hls', live=False) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/vimple.py b/yt_dlp/extractor/vimple.py index a8b16dd29..fdccf465e 100644 --- a/yt_dlp/extractor/vimple.py +++ b/yt_dlp/extractor/vimple.py @@ -13,7 +13,6 @@ class SprutoBaseIE(InfoExtractor): formats = [{ 'url': f['url'], } for f in playlist['video']] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py index 8e57201f6..1909980f2 100644 --- a/yt_dlp/extractor/vine.py +++ b/yt_dlp/extractor/vine.py @@ -86,7 +86,6 @@ class VineIE(InfoExtractor): 'quality': quality, }) self._check_formats(formats, video_id) - self._sort_formats(formats) username = data.get('username') diff --git a/yt_dlp/extractor/viqeo.py b/yt_dlp/extractor/viqeo.py index 574622fa9..79b9f299a 100644 --- a/yt_dlp/extractor/viqeo.py +++ b/yt_dlp/extractor/viqeo.py @@ -74,7 +74,6 @@ class ViqeoIE(InfoExtractor): 'vcodec': 'none' if is_audio else None, }) formats.append(f) - self._sort_formats(formats) duration = int_or_none(data.get('duration')) diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index d27091c94..19d48234e 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -86,7 +86,6 @@ class ViuIE(ViuBaseIE): # r'\1whe\2', video_data['href']) m3u8_url = video_data['href'] formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4') - self._sort_formats(formats) for key, value in video_data.items(): mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key) @@ -365,7 +364,6 @@ class ViuOTTIE(InfoExtractor): 'ext': 'mp4', 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int) }) - self._sort_formats(formats) subtitles = {} for sub in video_data.get('subtitle') or []: diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 0c856e2b0..347aa381d 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -507,7 +507,6 @@ class VKIE(VKBaseIE): 'url': format_url, 'ext': 'flv', }) - self._sort_formats(formats) subtitles = {} for sub in data.get('subs') or {}: diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index f4bb079b2..e2fd39315 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -208,7 +208,6 @@ class VLiveIE(VLiveBaseIE): 'old/v3/live/%s/playInfo', video_id)['result']['adaptiveStreamUrl'] formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4') - self._sort_formats(formats) info = get_common_fields() info.update({ 'title': video['title'], @@ -286,7 +285,6 @@ class VLivePostIE(VLiveBaseIE): 'url': f_url, 'height': int_or_none(f_id[:-1]), }) - self._sort_formats(formats) entry = { 'formats': formats, 'id': video_id, diff --git a/yt_dlp/extractor/vodplatform.py b/yt_dlp/extractor/vodplatform.py index 0d3e7eec2..5ff05004b 100644 --- a/yt_dlp/extractor/vodplatform.py +++ b/yt_dlp/extractor/vodplatform.py @@ -28,7 +28,6 @@ class VODPlatformIE(InfoExtractor): formats = self._extract_wowza_formats( hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil']) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/voicerepublic.py b/yt_dlp/extractor/voicerepublic.py index e8cbd0e32..47502afb4 100644 --- a/yt_dlp/extractor/voicerepublic.py +++ b/yt_dlp/extractor/voicerepublic.py @@ -46,7 +46,6 @@ class VoiceRepublicIE(InfoExtractor): 'ext': determine_ext(talk_url) or format_id, 'vcodec': 'none', } for format_id, talk_url in talk['media_links'].items()] - self._sort_formats(formats) return { 'id': compat_str(talk.get('id') or display_id), diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py index feab79138..7438b4956 100644 --- a/yt_dlp/extractor/voicy.py +++ b/yt_dlp/extractor/voicy.py @@ -44,7 +44,6 @@ class VoicyBaseIE(InfoExtractor): 'acodec': 'mp3', 'vcodec': 'none', }] - self._sort_formats(formats) return { 'id': compat_str(entry.get('ArticleId')), 'title': entry.get('ArticleTitle'), diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py index 173556e66..b709b74e2 100644 --- a/yt_dlp/extractor/voot.py +++ b/yt_dlp/extractor/voot.py @@ -73,7 +73,6 @@ class VootIE(InfoExtractor): formats = self._extract_m3u8_formats( 'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id, video_id, 'mp4', m3u8_id='hls') - self._sort_formats(formats) description, series, season_number, episode, episode_number = [None] * 5 diff --git a/yt_dlp/extractor/voxmedia.py b/yt_dlp/extractor/voxmedia.py index 96c782d8b..f9362002f 100644 --- a/yt_dlp/extractor/voxmedia.py +++ b/yt_dlp/extractor/voxmedia.py @@ -47,7 +47,6 @@ class VoxMediaVolumeIE(OnceIE): 'tbr': int_or_none(tbr), }) if formats: - self._sort_formats(formats) info['formats'] = formats info['duration'] = int_or_none(asset.get('duration')) return info @@ -58,7 +57,6 @@ class VoxMediaVolumeIE(OnceIE): continue if provider_video_type == 'brightcove': info['formats'] = self._extract_once_formats(provider_video_id) - self._sort_formats(info['formats']) else: info.update({ '_type': 'url_transparent', diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 0b9bf2903..89fa7affc 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -192,7 +192,6 @@ class VRVIE(VRVBaseIE): formats.extend(self._extract_vrv_formats( stream.get('url'), video_id, stream_type.split('_')[1], audio_locale, stream.get('hardsub_locale'))) - self._sort_formats(formats) subtitles = {} for k in ('captions', 'subtitles'): diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py index 93842db79..1bc7ae4ba 100644 --- a/yt_dlp/extractor/vshare.py +++ b/yt_dlp/extractor/vshare.py @@ -49,8 +49,6 @@ class VShareIE(InfoExtractor): url, '<video>%s</video>' % self._extract_packed(webpage), video_id)[0] - self._sort_formats(info['formats']) - info.update({ 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/vvvvid.py b/yt_dlp/extractor/vvvvid.py index 0c3e83a0a..ed725a55d 100644 --- a/yt_dlp/extractor/vvvvid.py +++ b/yt_dlp/extractor/vvvvid.py @@ -223,7 +223,6 @@ class VVVVIDIE(InfoExtractor): metadata_from_url(embed_code) if not is_youtube: - self._sort_formats(formats) info['formats'] = formats metadata_from_url(video_data.get('thumbnail')) diff --git a/yt_dlp/extractor/vzaar.py b/yt_dlp/extractor/vzaar.py index df43caf38..6b9817c9e 100644 --- a/yt_dlp/extractor/vzaar.py +++ b/yt_dlp/extractor/vzaar.py @@ -90,8 +90,6 @@ class VzaarIE(InfoExtractor): f['_decryption_key_url'] = url_templ % ('goose', '') + qs formats.extend(m3u8_formats) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py index 6b954c5cc..a1a9c1708 100644 --- a/yt_dlp/extractor/walla.py +++ b/yt_dlp/extractor/walla.py @@ -69,7 +69,6 @@ class WallaIE(InfoExtractor): if m: fmt['height'] = int(m.group('height')) formats.append(fmt) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/wasdtv.py b/yt_dlp/extractor/wasdtv.py index bad5ccb99..f57c619b5 100644 --- a/yt_dlp/extractor/wasdtv.py +++ b/yt_dlp/extractor/wasdtv.py @@ -37,7 +37,6 @@ class WASDTVBaseIE(InfoExtractor): media_url, is_live = self._get_media_url(media_meta) video_id = media.get('media_id') or container.get('media_container_id') formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4') - self._sort_formats(formats) return { 'id': str(video_id), 'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)), @@ -149,7 +148,6 @@ class WASDTVClipIE(WASDTVBaseIE): clip = self._fetch(f'v2/clips/{clip_id}', video_id=clip_id, description='clip') clip_data = clip.get('clip_data') formats, subtitles = self._extract_m3u8_formats_and_subtitles(clip_data.get('url'), video_id=clip_id, ext='mp4') - self._sort_formats(formats) return { 'id': clip_id, 'title': clip.get('clip_title') or self._og_search_title(self._download_webpage(url, clip_id, fatal=False)), diff --git a/yt_dlp/extractor/wat.py b/yt_dlp/extractor/wat.py index e6a89adf6..7c62d2866 100644 --- a/yt_dlp/extractor/wat.py +++ b/yt_dlp/extractor/wat.py @@ -95,8 +95,6 @@ class WatIE(InfoExtractor): if manifest_urls: extract_formats(manifest_urls) - self._sort_formats(formats) - return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/watchbox.py b/yt_dlp/extractor/watchbox.py index e41148d4a..c973ca998 100644 --- a/yt_dlp/extractor/watchbox.py +++ b/yt_dlp/extractor/watchbox.py @@ -109,7 +109,6 @@ class WatchBoxIE(InfoExtractor): 'height': int_or_none(item.get('height')), 'tbr': int_or_none(item.get('bitrate')), }) - self._sort_formats(formats) description = strip_or_none(item.get('descr')) thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail') diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index 7b2e7c8e0..de5dc2666 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -103,8 +103,6 @@ class WDRIE(InfoExtractor): a_format['ext'] = ext formats.append(a_format) - self._sort_formats(formats) - caption_url = media_resource.get('captionURL') if caption_url: subtitles['de'] = [{ diff --git a/yt_dlp/extractor/webcaster.py b/yt_dlp/extractor/webcaster.py index a66a5f8c5..43eeca017 100644 --- a/yt_dlp/extractor/webcaster.py +++ b/yt_dlp/extractor/webcaster.py @@ -50,7 +50,6 @@ class WebcasterIE(InfoExtractor): 'format_note': track.get('title'), }) formats.extend(m3u8_formats) - self._sort_formats(formats) thumbnail = xpath_text(video, './/image', 'thumbnail') diff --git a/yt_dlp/extractor/webofstories.py b/yt_dlp/extractor/webofstories.py index fde9300b0..65f48f3b1 100644 --- a/yt_dlp/extractor/webofstories.py +++ b/yt_dlp/extractor/webofstories.py @@ -104,8 +104,6 @@ class WebOfStoriesIE(InfoExtractor): 'play_path': play_path, }] - self._sort_formats(formats) - return { 'id': story_id, 'title': title, diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index d5a52ce20..81a23b9df 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -88,8 +88,6 @@ class WeiboIE(InfoExtractor): 'height': res, }) - self._sort_formats(formats) - uploader = self._og_search_property( 'nick-name', webpage, 'uploader', default=None) diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py index 21574471c..f2808cd9f 100644 --- a/yt_dlp/extractor/whowatch.py +++ b/yt_dlp/extractor/whowatch.py @@ -70,7 +70,6 @@ class WhoWatchIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( hls_url, video_id, ext='mp4', m3u8_id='hls')) self._remove_duplicate_formats(formats) - self._sort_formats(formats) uploader_url = try_get(metadata, lambda x: x['live']['user']['user_path'], compat_str) if uploader_url: diff --git a/yt_dlp/extractor/willow.py b/yt_dlp/extractor/willow.py index 6c71e9a04..0ec9c9d6e 100644 --- a/yt_dlp/extractor/willow.py +++ b/yt_dlp/extractor/willow.py @@ -41,7 +41,6 @@ class WillowIE(InfoExtractor): raise ExtractorError('No videos found') formats = self._extract_m3u8_formats(video['secureurl'], video_id, 'mp4') - self._sort_formats(formats) return { 'id': str(video.get('content_id')), diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py index d27a348d9..571112390 100644 --- a/yt_dlp/extractor/wimtv.py +++ b/yt_dlp/extractor/wimtv.py @@ -139,7 +139,6 @@ class WimTVIE(InfoExtractor): }) json = json.get('resource') thumb = self._generate_thumbnail(json.get('thumbnailId')) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index e1e5855c2..38dcc2f5b 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -98,8 +98,6 @@ class WistiaBaseIE(InfoExtractor): }) formats.append(f) - self._sort_formats(formats) - subtitles = {} for caption in data.get('captions', []): language = caption.get('language') diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py index e1062b9b5..5e590e2f4 100644 --- a/yt_dlp/extractor/wppilot.py +++ b/yt_dlp/extractor/wppilot.py @@ -138,8 +138,6 @@ class WPPilotIE(WPPilotBaseIE): random.choice(fmt['url']), video_id, live=True)) - self._sort_formats(formats) - channel['formats'] = formats return channel diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py index 9eeed104f..86e264679 100644 --- a/yt_dlp/extractor/wsj.py +++ b/yt_dlp/extractor/wsj.py @@ -82,7 +82,6 @@ class WSJIE(InfoExtractor): 'height': int_or_none(v.get('height')), 'fps': float_or_none(v.get('fps')), }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py index e5c479d03..08c6d6c7c 100644 --- a/yt_dlp/extractor/xfileshare.py +++ b/yt_dlp/extractor/xfileshare.py @@ -182,7 +182,6 @@ class XFileShareIE(InfoExtractor): 'url': video_url, 'format_id': 'sd', }) - self._sort_formats(formats) thumbnail = self._search_regex( [ diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 688c6b952..59eececb6 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -234,7 +234,6 @@ class XHamsterIE(InfoExtractor): 'Referer': standard_url, }, }) - self._sort_formats(formats) categories_list = video.get('categories') if isinstance(categories_list, list): @@ -311,8 +310,6 @@ class XHamsterIE(InfoExtractor): 'url': video_url, }) - self._sort_formats(formats) - # Only a few videos have an description mobj = re.search(r'<span>Description: </span>([^<]+)', webpage) description = mobj.group(1) if mobj else None diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py index 96e23bb8d..ddc1d0b5a 100644 --- a/yt_dlp/extractor/xinpianchang.py +++ b/yt_dlp/extractor/xinpianchang.py @@ -72,8 +72,6 @@ class XinpianchangIE(InfoExtractor): 'ext': 'mp4', } for prog in v if prog.get('url') or []]) - self._sort_formats(formats) - return { 'id': video_id, 'title': data.get('title'), diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py index 14beb1347..1452aaec3 100644 --- a/yt_dlp/extractor/xnxx.py +++ b/yt_dlp/extractor/xnxx.py @@ -64,7 +64,6 @@ class XNXXIE(InfoExtractor): 'format_id': format_id, 'quality': -1 if format_id == 'low' else 0, }) - self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage, default=None) or get( 'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False) diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py index 42bffb071..8dd1cd9ef 100644 --- a/yt_dlp/extractor/xstream.py +++ b/yt_dlp/extractor/xstream.py @@ -82,7 +82,6 @@ class XstreamIE(InfoExtractor): 'url': media_url, 'tbr': tbr, }) - self._sort_formats(formats) link = find_xpath_attr( entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') diff --git a/yt_dlp/extractor/xtube.py b/yt_dlp/extractor/xtube.py index 93a6a3f33..ce4480c7d 100644 --- a/yt_dlp/extractor/xtube.py +++ b/yt_dlp/extractor/xtube.py @@ -129,7 +129,6 @@ class XTubeIE(InfoExtractor): }) self._remove_duplicate_formats(formats) - self._sort_formats(formats) if not title: title = self._search_regex( diff --git a/yt_dlp/extractor/xuite.py b/yt_dlp/extractor/xuite.py index 52423a327..71ddadd42 100644 --- a/yt_dlp/extractor/xuite.py +++ b/yt_dlp/extractor/xuite.py @@ -116,7 +116,6 @@ class XuiteIE(InfoExtractor): 'format_id': format_id, 'height': int(format_id) if format_id.isnumeric() else None, }) - self._sort_formats(formats) timestamp = media_info.get('PUBLISH_DATETIME') if timestamp: diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index 50b939496..5c505c850 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -149,8 +149,6 @@ class XVideosIE(InfoExtractor): 'quality': -2 if format_id.endswith('low') else None, }) - self._sort_formats(formats) - return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 01a859556..a69715b7c 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -241,8 +241,6 @@ class YahooIE(InfoExtractor): if not formats and msg == 'geo restricted': self.raise_geo_restricted(metadata_available=True) - self._sort_formats(formats) - thumbnails = [] for thumb in video.get('thumbnails', []): thumb_url = thumb.get('url') @@ -498,7 +496,6 @@ class YahooJapanNewsIE(InfoExtractor): 'tbr': int_or_none(vid.get('bitrate')), }) self._remove_duplicate_formats(formats) - self._sort_formats(formats) return formats diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index d87a7f9be..d5eecbd9c 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -127,7 +127,6 @@ class YandexDiskIE(InfoExtractor): 'url': format_url, 'width': int_or_none(size.get('width')), }) - self._sort_formats(formats) uid = resource.get('uid') display_name = try_get(store, lambda x: x['users'][uid]['displayName']) diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index 5e6cf6edd..535b61f65 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -121,8 +121,6 @@ class YandexVideoIE(InfoExtractor): else: formats.append({'url': content_url}) - self._sort_formats(formats) - timestamp = (int_or_none(content.get('release_date')) or int_or_none(content.get('release_date_ut')) or int_or_none(content.get('start_time'))) @@ -275,7 +273,6 @@ class ZenYandexIE(InfoExtractor): formats.extend(self._extract_mpd_formats(s_url, id, mpd_id='dash')) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats(s_url, id, 'mp4')) - self._sort_formats(formats) return { 'id': video_id, 'title': video_json.get('title') or self._og_search_title(webpage), diff --git a/yt_dlp/extractor/yapfiles.py b/yt_dlp/extractor/yapfiles.py index 221df842c..19812bae0 100644 --- a/yt_dlp/extractor/yapfiles.py +++ b/yt_dlp/extractor/yapfiles.py @@ -79,7 +79,6 @@ class YapFilesIE(InfoExtractor): 'quality': quality_key(format_id), 'height': hd_height if is_hd else None, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/yinyuetai.py b/yt_dlp/extractor/yinyuetai.py index b28c39380..b2e3172f9 100644 --- a/yt_dlp/extractor/yinyuetai.py +++ b/yt_dlp/extractor/yinyuetai.py @@ -41,7 +41,6 @@ class YinYueTaiIE(InfoExtractor): 'ext': 'mp4', 'tbr': format_info.get('bitrate'), } for format_info in info['videoUrlModels']] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/ynet.py b/yt_dlp/extractor/ynet.py index 27eda9721..a7d7371f3 100644 --- a/yt_dlp/extractor/ynet.py +++ b/yt_dlp/extractor/ynet.py @@ -39,7 +39,6 @@ class YnetIE(InfoExtractor): if m: title = m.group('title') formats = self._extract_f4m_formats(f4m_url, video_id) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py index 45856fbbe..624975b98 100644 --- a/yt_dlp/extractor/youku.py +++ b/yt_dlp/extractor/youku.py @@ -198,7 +198,6 @@ class YoukuIE(InfoExtractor): 'width': stream.get('width'), 'height': stream.get('height'), } for stream in data['stream'] if stream.get('channel_type') != 'tail'] - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 7fdb865f7..2f3f21332 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -103,7 +103,6 @@ class YouPornIE(InfoExtractor): }) f['height'] = height formats.append(f) - self._sort_formats(formats) webpage = self._download_webpage( 'http://www.youporn.com/watch/%s' % video_id, display_id, diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7e3530c0f..8a2dd728c 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4003,10 +4003,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): formats.extend(self._extract_storyboard(player_responses, duration)) - # source_preference is lower for throttled/potentially damaged formats - self._sort_formats(formats, ( - 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')) - info = { 'id': video_id, 'title': video_title, @@ -4036,6 +4032,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'), 'live_status': live_status, 'release_timestamp': live_start_time, + '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats + 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto') } subtitles = {} diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py index 4b18cb86c..88f526bbc 100644 --- a/yt_dlp/extractor/zapiks.py +++ b/yt_dlp/extractor/zapiks.py @@ -92,7 +92,6 @@ class ZapiksIE(InfoExtractor): if m: f['height'] = int(m.group('height')) formats.append(f) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index 572a1d0f2..22620c0a3 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -202,7 +202,6 @@ class ZattooPlatformBaseIE(InfoExtractor): for this_format in this_formats: this_format['quality'] = preference formats.extend(this_formats) - self._sort_formats(formats) return formats, subtitles def _extract_video(self, video_id, record_id=None): diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 1eab384b9..fca426a50 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -110,7 +110,6 @@ class ZDFBaseIE(InfoExtractor): 'class': track.get('class'), 'language': track.get('language'), }) - self._sort_formats(formats, ('tbr', 'res', 'quality', 'language_preference')) duration = float_or_none(try_get( ptmd, lambda x: x['attributes']['duration']['value']), scale=1000) @@ -121,6 +120,7 @@ class ZDFBaseIE(InfoExtractor): 'duration': duration, 'formats': formats, 'subtitles': self._extract_subtitles(ptmd), + '_format_sort_fields': ('tbr', 'res', 'quality', 'language_preference'), } def _extract_player(self, webpage, video_id, fatal=True): @@ -318,7 +318,6 @@ class ZDFIE(ZDFBaseIE): format_urls = set() for f in formitaeten or []: self._extract_format(content_id, formats, format_urls, f) - self._sort_formats(formats) thumbnails = [] teaser_bild = document.get('teaserBild') diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 10dd8fb1c..a64eb9ed0 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -146,7 +146,6 @@ class Zee5IE(InfoExtractor): if not asset_data.get('hls_url'): self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None) formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False) - self._sort_formats(formats) subtitles = {} for sub in asset_data.get('subtitle_url', []): diff --git a/yt_dlp/extractor/zeenews.py b/yt_dlp/extractor/zeenews.py index ae2cc264e..1616dbfbf 100644 --- a/yt_dlp/extractor/zeenews.py +++ b/yt_dlp/extractor/zeenews.py @@ -48,7 +48,6 @@ class ZeeNewsIE(InfoExtractor): raise ExtractorError('No video found', expected=True) formats = self._extract_m3u8_formats(embed_url, content_id, 'mp4') - self._sort_formats(formats) return { **self._json_ld(json_ld_list, display_id), diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py index d8d259dd6..c24b33874 100644 --- a/yt_dlp/extractor/zhihu.py +++ b/yt_dlp/extractor/zhihu.py @@ -45,7 +45,6 @@ class ZhihuIE(InfoExtractor): 'url': play_url, 'width': int_or_none(q.get('width')), }) - self._sort_formats(formats) author = zvideo.get('author') or {} url_token = author.get('url_token') diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index 8b2d842ff..a818c9fa9 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -168,7 +168,6 @@ class ZingMp3IE(ZingMp3BaseIE): if not formats and item.get('msg') == 'Sorry, this content is not available in your country.': self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) - self._sort_formats(formats) lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file') diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py index a455f8c04..ef8b71522 100644 --- a/yt_dlp/extractor/zoom.py +++ b/yt_dlp/extractor/zoom.py @@ -86,8 +86,6 @@ class ZoomIE(InfoExtractor): 'preference': -1 }) - self._sort_formats(formats) - return { 'id': play_id, 'title': data.get('topic'), diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py index a705149e6..8cf994505 100644 --- a/yt_dlp/extractor/zype.py +++ b/yt_dlp/extractor/zype.py @@ -97,7 +97,6 @@ class ZypeIE(InfoExtractor): if text_tracks: text_tracks = self._parse_json( text_tracks, video_id, js_to_json, False) - self._sort_formats(formats) if text_tracks: for text_track in text_tracks: From bc87dac75f289581bb2cd98500015c4d6a9027de Mon Sep 17 00:00:00 2001 From: Bnyro <82752168+Bnyro@users.noreply.github.com> Date: Thu, 17 Nov 2022 14:15:38 +0100 Subject: [PATCH 1776/2552] [extractor/youtube] Add `piped.video` (#5571) Closes #5518 Authored by: Bnyro --- yt_dlp/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 8a2dd728c..79d082d0b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -388,6 +388,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'(?:www\.)?piped\.adminforge\.de', r'(?:www\.)?watch\.whatevertinfoil\.de', r'(?:www\.)?piped\.qdi\.fi', + r'(?:www\.)?piped\.video', + r'(?:www\.)?piped\.aeong\.one', ) # extracted from account/account_menu ep From f96a3fb7d3cbeb2b63c2eafcc14b359f37ff3078 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 17 Nov 2022 19:09:40 +0000 Subject: [PATCH 1777/2552] [extractor/redgifs] Fix bug in 8c188d5d09177ed213a05c900d3523867c5897fd (#5559) --- yt_dlp/extractor/redgifs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index f688d1e63..098fb8185 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -72,7 +72,7 @@ class RedGifsBaseInfoExtractor(InfoExtractor): self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}' def _call_api(self, ep, video_id, *args, **kwargs): - for attempt in range(2): + for first_attempt in True, False: if 'authorization' not in self._API_HEADERS: self._fetch_oauth_token(video_id) try: @@ -82,8 +82,9 @@ class RedGifsBaseInfoExtractor(InfoExtractor): f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs) break except ExtractorError as e: - if not attempt and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401: + if first_attempt and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401: del self._API_HEADERS['authorization'] # refresh the token + continue raise if 'error' in data: From f5a9e9df0da38a0c3c13f1dd106d5eb585253f0c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 17 Nov 2022 19:11:35 +0000 Subject: [PATCH 1778/2552] [extractor/brightcove] Add `BrightcoveNewBaseIE` and fix embed extraction (#5558) * Move Brightcove embed extraction and tests into the IEs * Split `BrightcoveNewBaseIE` from `BrightcoveNewIE` * Fix bug in ade1fa70cbaaaadaa4772e5f0564870cea3167ef with the "wrong" spelling of `referrer` being smuggled Closes #5539 --- yt_dlp/extractor/bandaichannel.py | 4 +- yt_dlp/extractor/brightcove.py | 513 +++++++++++++++++++++++------- yt_dlp/extractor/generic.py | 270 +--------------- yt_dlp/extractor/sevenplus.py | 4 +- 4 files changed, 395 insertions(+), 396 deletions(-) diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py index e438d16ea..d7fcf44bd 100644 --- a/yt_dlp/extractor/bandaichannel.py +++ b/yt_dlp/extractor/bandaichannel.py @@ -1,8 +1,8 @@ -from .brightcove import BrightcoveNewIE +from .brightcove import BrightcoveNewBaseIE from ..utils import extract_attributes -class BandaiChannelIE(BrightcoveNewIE): # XXX: Do not subclass from concrete IE +class BandaiChannelIE(BrightcoveNewBaseIE): IE_NAME = 'bandaichannel' _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)' _TESTS = [{ diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 35e1aa9c9..2b7ddcae8 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -145,6 +145,159 @@ class BrightcoveLegacyIE(InfoExtractor): } ] + _WEBPAGE_TESTS = [{ + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' + # in the http requests + 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', + 'info_dict': { + 'id': '2765128793001', + 'ext': 'mp4', + 'title': 'Le cours de bourse : l’analyse technique', + 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9', + 'uploader': 'BFM BUSINESS', + }, + 'params': { + 'skip_download': True, + }, + 'skip': '404 Not Found', + }, { + # embedded with itemprop embedURL and video id spelled as `idVideo` + 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', + 'info_dict': { + 'id': '5255628253001', + 'ext': 'mp4', + 'title': 'md5:37c519b1128915607601e75a87995fc0', + 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', + 'uploader': 'BFM BUSINESS', + 'uploader_id': '876450612001', + 'timestamp': 1482255315, + 'upload_date': '20161220', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Redirects, page gone', + }, { + # https://github.com/ytdl-org/youtube-dl/issues/2253 + 'url': 'http://bcove.me/i6nfkrc3', + 'md5': '0ba9446db037002366bab3b3eb30c88c', + 'info_dict': { + 'id': '3101154703001', + 'ext': 'mp4', + 'title': 'Still no power', + 'uploader': 'thestar.com', + 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', + }, + 'skip': 'video gone', + }, { + # https://github.com/ytdl-org/youtube-dl/issues/3541 + 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', + 'info_dict': { + 'id': '3866516442001', + 'ext': 'mp4', + 'title': 'Leer mij vrouwen kennen: Aflevering 1', + 'description': 'Leer mij vrouwen kennen: Aflevering 1', + 'uploader': 'SBS Broadcasting', + }, + 'skip': 'Restricted to Netherlands, 404 Not Found', + 'params': { + 'skip_download': True, # m3u8 download + }, + }, { + # Brightcove video in <iframe> + 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724', + 'md5': '36d74ef5e37c8b4a2ce92880d208b968', + 'info_dict': { + 'id': '5360463607001', + 'ext': 'mp4', + 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活', + 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。', + 'uploader': 'United Nations', + 'uploader_id': '1362235914001', + 'timestamp': 1489593889, + 'upload_date': '20170315', + }, + 'skip': '404 Not Found', + }, { + # Brightcove with UUID in videoPlayer + 'url': 'http://www8.hp.com/cn/zh/home.html', + 'info_dict': { + 'id': '5255815316001', + 'ext': 'mp4', + 'title': 'Sprocket Video - China', + 'description': 'Sprocket Video - China', + 'uploader': 'HP-Video Gallery', + 'timestamp': 1482263210, + 'upload_date': '20161220', + 'uploader_id': '1107601872001', + }, + 'params': { + 'skip_download': True, # m3u8 download + }, + 'skip': 'video rotates...weekly?', + }, { + # Multiple brightcove videos + # https://github.com/ytdl-org/youtube-dl/issues/2283 + 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html', + 'info_dict': { + 'id': 'always-never', + 'title': 'Always / Never - The New Yorker', + }, + 'playlist_count': 3, + 'params': { + 'extract_flat': False, + 'skip_download': True, + }, + 'skip': 'Redirects, page gone', + }, { + # BrightcoveInPageEmbed embed + 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', + 'info_dict': { + 'id': '4238694884001', + 'ext': 'flv', + 'title': 'Tabletop: Dread, Last Thoughts', + 'description': 'Tabletop: Dread, Last Thoughts', + 'duration': 51690, + }, + 'skip': 'Redirects, page gone', + }, { + # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions' + # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm + 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html', + 'info_dict': { + 'id': '4785848093001', + 'ext': 'mp4', + 'title': 'The Cardinal Pell Interview', + 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ', + 'uploader': 'GlobeCast Australia - GlobeStream', + 'uploader_id': '2733773828001', + 'upload_date': '20160304', + 'timestamp': 1457083087, + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, + 'skip': '404 Not Found', + }, { + # Brightcove embed with whitespace around attribute names + 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill', + 'info_dict': { + 'id': '3167554373001', + 'ext': 'mp4', + 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill", + 'description': 'md5:57bacb0e0f29349de4972bfda3191713', + 'uploader_id': '1079349493', + 'upload_date': '20140207', + 'timestamp': 1391810548, + }, + 'params': { + 'skip_download': True, + }, + 'skip': '410 Gone', + }] + @classmethod def _build_brightcove_url(cls, object_str): """ @@ -281,6 +434,11 @@ class BrightcoveLegacyIE(InfoExtractor): return [src for _, src in re.findall( r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] + def _extract_from_webpage(self, url, webpage): + bc_urls = self._extract_brightcove_urls(webpage) + for bc_url in bc_urls: + yield self.url_result(smuggle_url(bc_url, {'Referer': url}), BrightcoveLegacyIE) + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -336,7 +494,131 @@ class BrightcoveLegacyIE(InfoExtractor): raise UnsupportedError(url) -class BrightcoveNewIE(AdobePassIE): +class BrightcoveNewBaseIE(AdobePassIE): + def _parse_brightcove_metadata(self, json_data, video_id, headers={}): + title = json_data['name'].strip() + + formats, subtitles = [], {} + sources = json_data.get('sources') or [] + for source in sources: + container = source.get('container') + ext = mimetype2ext(source.get('type')) + src = source.get('src') + if ext == 'm3u8' or container == 'M2TS': + if not src: + continue + fmts, subs = self._extract_m3u8_formats_and_subtitles( + src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + subtitles = self._merge_subtitles(subtitles, subs) + elif ext == 'mpd': + if not src: + continue + fmts, subs = self._extract_mpd_formats_and_subtitles(src, video_id, 'dash', fatal=False) + subtitles = self._merge_subtitles(subtitles, subs) + else: + streaming_src = source.get('streaming_src') + stream_name, app_name = source.get('stream_name'), source.get('app_name') + if not src and not streaming_src and (not stream_name or not app_name): + continue + tbr = float_or_none(source.get('avg_bitrate'), 1000) + height = int_or_none(source.get('height')) + width = int_or_none(source.get('width')) + f = { + 'tbr': tbr, + 'filesize': int_or_none(source.get('size')), + 'container': container, + 'ext': ext or container.lower(), + } + if width == 0 and height == 0: + f.update({ + 'vcodec': 'none', + }) + else: + f.update({ + 'width': width, + 'height': height, + 'vcodec': source.get('codec'), + }) + + def build_format_id(kind): + format_id = kind + if tbr: + format_id += '-%dk' % int(tbr) + if height: + format_id += '-%dp' % height + return format_id + + if src or streaming_src: + f.update({ + 'url': src or streaming_src, + 'format_id': build_format_id('http' if src else 'http-streaming'), + 'source_preference': 0 if src else -1, + }) + else: + f.update({ + 'url': app_name, + 'play_path': stream_name, + 'format_id': build_format_id('rtmp'), + }) + fmts = [f] + + # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object + if container == 'WVM' or source.get('key_systems') or ext == 'ism': + for f in fmts: + f['has_drm'] = True + formats.extend(fmts) + + if not formats: + errors = json_data.get('errors') + if errors: + error = errors[0] + self.raise_no_formats( + error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) + + for f in formats: + f.setdefault('http_headers', {}).update(headers) + + for text_track in json_data.get('text_tracks', []): + if text_track.get('kind') != 'captions': + continue + text_track_url = url_or_none(text_track.get('src')) + if not text_track_url: + continue + lang = (str_or_none(text_track.get('srclang')) + or str_or_none(text_track.get('label')) or 'en').lower() + subtitles.setdefault(lang, []).append({ + 'url': text_track_url, + }) + + is_live = False + duration = float_or_none(json_data.get('duration'), 1000) + if duration is not None and duration <= 0: + is_live = True + + common_res = [(160, 90), (320, 180), (480, 720), (640, 360), (768, 432), (1024, 576), (1280, 720), (1366, 768), (1920, 1080)] + thumb_base_url = dict_get(json_data, ('poster', 'thumbnail')) + thumbnails = [{ + 'url': re.sub(r'\d+x\d+', f'{w}x{h}', thumb_base_url), + 'width': w, + 'height': h, + } for w, h in common_res] if thumb_base_url else None + + return { + 'id': video_id, + 'title': title, + 'description': clean_html(json_data.get('description')), + 'thumbnails': thumbnails, + 'duration': duration, + 'timestamp': parse_iso8601(json_data.get('published_at')), + 'uploader_id': json_data.get('account_id'), + 'formats': formats, + 'subtitles': subtitles, + 'tags': json_data.get('tags', []), + 'is_live': is_live, + } + + +class BrightcoveNewIE(BrightcoveNewBaseIE): IE_NAME = 'brightcove:new' _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)' _TESTS = [{ @@ -353,6 +635,7 @@ class BrightcoveNewIE(AdobePassIE): 'uploader_id': '929656772001', 'formats': 'mincount:20', }, + 'skip': '404 Not Found', }, { # with rtmp streams 'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001', @@ -400,6 +683,107 @@ class BrightcoveNewIE(AdobePassIE): 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + # brightcove player url embed + 'url': 'https://nbc-2.com/weather/forecast/2022/11/16/forecast-warmest-day-of-the-week/', + 'md5': '2934d5372b354d27083ccf8575dbfee2', + 'info_dict': { + 'id': '6315650313112', + 'title': 'First Alert Forecast: November 15, 2022', + 'ext': 'mp4', + 'tags': ['nbc2', 'forecast'], + 'uploader_id': '6146886170001', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1668574571, + 'duration': 233.375, + 'upload_date': '20221116', + }, + }, { + # embedded with video tag only + 'url': 'https://www.gooddishtv.com/tiktok-rapping-chef-mr-pyrex', + 'info_dict': { + 'id': 'tiktok-rapping-chef-mr-pyrex', + 'title': 'TikTok\'s Rapping Chef Makes Jambalaya for the Hosts', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + 'description': 'Just in time for Mardi Gras', + }, + 'playlist': [{ + 'info_dict': { + 'id': '6299189544001', + 'ext': 'mp4', + 'title': 'TGD_01-032_5', + 'thumbnail': r're:^https?://.*\.jpg$', + 'tags': [], + 'timestamp': 1646078943, + 'uploader_id': '1569565978001', + 'upload_date': '20220228', + 'duration': 217.195, + }, + }, { + 'info_dict': { + 'id': '6305565995112', + 'ext': 'mp4', + 'title': 'TGD 01-087 (Airs 05.25.22)_Segment 5', + 'thumbnail': r're:^https?://.*\.jpg$', + 'tags': [], + 'timestamp': 1651604591, + 'uploader_id': '1569565978001', + 'upload_date': '20220503', + 'duration': 310.421, + }, + }], + }, { + # Brightcove:new type [2]. + 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis', + 'md5': '2b35148fcf48da41c9fb4591650784f3', + 'info_dict': { + 'id': '5348741021001', + 'ext': 'mp4', + 'upload_date': '20170306', + 'uploader_id': '4191638492001', + 'timestamp': 1488769918, + 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis', + }, + 'skip': '404 Not Found', + }, { + # Alternative brightcove <video> attributes + 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/', + 'info_dict': { + 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche', + 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs", + }, + 'playlist': [{ + 'md5': '732d22ba3d33f2f3fc253c39f8f36523', + 'info_dict': { + 'id': '5311302538001', + 'ext': 'mp4', + 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche", + 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)", + 'timestamp': 1486321708, + 'upload_date': '20170205', + 'uploader_id': '800000640001', + }, + 'only_matching': True, + }], + 'skip': '404 Not Found', + }, { + # Brightcove URL in single quotes + 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/', + 'md5': '4ae374f1f8b91c889c4b9203c8c752af', + 'info_dict': { + 'id': '4255764656001', + 'ext': 'mp4', + 'title': 'SN Presents: Russell Martin, World Citizen', + 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.', + 'uploader': 'Rogers Sportsnet', + 'uploader_id': '1704050871', + 'upload_date': '20150525', + 'timestamp': 1432570283, + }, + 'skip': 'Page no longer has URL, now has javascript', + }] + @staticmethod def _extract_url(ie, webpage): urls = BrightcoveNewIE._extract_brightcove_urls(ie, webpage) @@ -466,127 +850,10 @@ class BrightcoveNewIE(AdobePassIE): return entries - def _parse_brightcove_metadata(self, json_data, video_id, headers={}): - title = json_data['name'].strip() - - formats, subtitles = [], {} - sources = json_data.get('sources') or [] - for source in sources: - container = source.get('container') - ext = mimetype2ext(source.get('type')) - src = source.get('src') - if ext == 'm3u8' or container == 'M2TS': - if not src: - continue - fmts, subs = self._extract_m3u8_formats_and_subtitles( - src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - subtitles = self._merge_subtitles(subtitles, subs) - elif ext == 'mpd': - if not src: - continue - fmts, subs = self._extract_mpd_formats_and_subtitles(src, video_id, 'dash', fatal=False) - subtitles = self._merge_subtitles(subtitles, subs) - else: - streaming_src = source.get('streaming_src') - stream_name, app_name = source.get('stream_name'), source.get('app_name') - if not src and not streaming_src and (not stream_name or not app_name): - continue - tbr = float_or_none(source.get('avg_bitrate'), 1000) - height = int_or_none(source.get('height')) - width = int_or_none(source.get('width')) - f = { - 'tbr': tbr, - 'filesize': int_or_none(source.get('size')), - 'container': container, - 'ext': ext or container.lower(), - } - if width == 0 and height == 0: - f.update({ - 'vcodec': 'none', - }) - else: - f.update({ - 'width': width, - 'height': height, - 'vcodec': source.get('codec'), - }) - - def build_format_id(kind): - format_id = kind - if tbr: - format_id += '-%dk' % int(tbr) - if height: - format_id += '-%dp' % height - return format_id - - if src or streaming_src: - f.update({ - 'url': src or streaming_src, - 'format_id': build_format_id('http' if src else 'http-streaming'), - 'source_preference': 0 if src else -1, - }) - else: - f.update({ - 'url': app_name, - 'play_path': stream_name, - 'format_id': build_format_id('rtmp'), - }) - fmts = [f] - - # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object - if container == 'WVM' or source.get('key_systems') or ext == 'ism': - for f in fmts: - f['has_drm'] = True - formats.extend(fmts) - - if not formats: - errors = json_data.get('errors') - if errors: - error = errors[0] - self.raise_no_formats( - error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) - - for f in formats: - f.setdefault('http_headers', {}).update(headers) - - for text_track in json_data.get('text_tracks', []): - if text_track.get('kind') != 'captions': - continue - text_track_url = url_or_none(text_track.get('src')) - if not text_track_url: - continue - lang = (str_or_none(text_track.get('srclang')) - or str_or_none(text_track.get('label')) or 'en').lower() - subtitles.setdefault(lang, []).append({ - 'url': text_track_url, - }) - - is_live = False - duration = float_or_none(json_data.get('duration'), 1000) - if duration is not None and duration <= 0: - is_live = True - - common_res = [(160, 90), (320, 180), (480, 720), (640, 360), (768, 432), (1024, 576), (1280, 720), (1366, 768), (1920, 1080)] - thumb_base_url = dict_get(json_data, ('poster', 'thumbnail')) - thumbnails = [{ - 'url': re.sub(r'\d+x\d+', f'{w}x{h}', thumb_base_url), - 'width': w, - 'height': h, - } for w, h in common_res] if thumb_base_url else None - - return { - 'id': video_id, - 'title': title, - 'description': clean_html(json_data.get('description')), - 'thumbnails': thumbnails, - 'duration': duration, - 'timestamp': parse_iso8601(json_data.get('published_at')), - 'uploader_id': json_data.get('account_id'), - 'formats': formats, - 'subtitles': subtitles, - 'tags': json_data.get('tags', []), - 'is_live': is_live, - } + def _extract_from_webpage(self, url, webpage): + bc_urls = self._extract_brightcove_urls(self, webpage) + for bc_url in bc_urls: + yield self.url_result(smuggle_url(bc_url, {'referrer': url}), BrightcoveNewIE) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -630,7 +897,7 @@ class BrightcoveNewIE(AdobePassIE): api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) headers = {} - referrer = smuggled_data.get('referrer') + referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key if referrer: headers.update({ 'Referer': referrer, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 85581e622..51a6cbf06 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -5,7 +5,6 @@ import urllib.parse import xml.etree.ElementTree from .common import InfoExtractor # isort: split -from .brightcove import BrightcoveLegacyIE, BrightcoveNewIE from .commonprotocols import RtmpIE from .youtube import YoutubeIE from ..compat import compat_etree_fromstring @@ -361,188 +360,6 @@ class GenericIE(InfoExtractor): }, 'skip': 'There is a limit of 200 free downloads / month for the test song', }, - { - # embedded brightcove video - # it also tests brightcove videos that need to set the 'Referer' - # in the http requests - 'add_ie': ['BrightcoveLegacy'], - 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', - 'info_dict': { - 'id': '2765128793001', - 'ext': 'mp4', - 'title': 'Le cours de bourse : l’analyse technique', - 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9', - 'uploader': 'BFM BUSINESS', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # embedded with itemprop embedURL and video id spelled as `idVideo` - 'add_id': ['BrightcoveLegacy'], - 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', - 'info_dict': { - 'id': '5255628253001', - 'ext': 'mp4', - 'title': 'md5:37c519b1128915607601e75a87995fc0', - 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', - 'uploader': 'BFM BUSINESS', - 'uploader_id': '876450612001', - 'timestamp': 1482255315, - 'upload_date': '20161220', - }, - 'params': { - 'skip_download': True, - }, - }, - { - # https://github.com/ytdl-org/youtube-dl/issues/2253 - 'url': 'http://bcove.me/i6nfkrc3', - 'md5': '0ba9446db037002366bab3b3eb30c88c', - 'info_dict': { - 'id': '3101154703001', - 'ext': 'mp4', - 'title': 'Still no power', - 'uploader': 'thestar.com', - 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', - }, - 'add_ie': ['BrightcoveLegacy'], - 'skip': 'video gone', - }, - { - 'url': 'http://www.championat.com/video/football/v/87/87499.html', - 'md5': 'fb973ecf6e4a78a67453647444222983', - 'info_dict': { - 'id': '3414141473001', - 'ext': 'mp4', - 'title': 'Видео. Удаление Дзагоева (ЦСКА)', - 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"', - 'uploader': 'Championat', - }, - }, - { - # https://github.com/ytdl-org/youtube-dl/issues/3541 - 'add_ie': ['BrightcoveLegacy'], - 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', - 'info_dict': { - 'id': '3866516442001', - 'ext': 'mp4', - 'title': 'Leer mij vrouwen kennen: Aflevering 1', - 'description': 'Leer mij vrouwen kennen: Aflevering 1', - 'uploader': 'SBS Broadcasting', - }, - 'skip': 'Restricted to Netherlands', - 'params': { - 'skip_download': True, # m3u8 download - }, - }, - { - # Brightcove video in <iframe> - 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724', - 'md5': '36d74ef5e37c8b4a2ce92880d208b968', - 'info_dict': { - 'id': '5360463607001', - 'ext': 'mp4', - 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活', - 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。', - 'uploader': 'United Nations', - 'uploader_id': '1362235914001', - 'timestamp': 1489593889, - 'upload_date': '20170315', - }, - 'add_ie': ['BrightcoveLegacy'], - }, - { - # Brightcove with alternative playerID key - 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html', - 'info_dict': { - 'id': 'nmeth.2062_SV1', - 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research', - }, - 'playlist': [{ - 'info_dict': { - 'id': '2228375078001', - 'ext': 'mp4', - 'title': 'nmeth.2062-sv1', - 'description': 'nmeth.2062-sv1', - 'timestamp': 1363357591, - 'upload_date': '20130315', - 'uploader': 'Nature Publishing Group', - 'uploader_id': '1964492299001', - }, - }], - }, - { - # Brightcove with UUID in videoPlayer - 'url': 'http://www8.hp.com/cn/zh/home.html', - 'info_dict': { - 'id': '5255815316001', - 'ext': 'mp4', - 'title': 'Sprocket Video - China', - 'description': 'Sprocket Video - China', - 'uploader': 'HP-Video Gallery', - 'timestamp': 1482263210, - 'upload_date': '20161220', - 'uploader_id': '1107601872001', - }, - 'params': { - 'skip_download': True, # m3u8 download - }, - 'skip': 'video rotates...weekly?', - }, - { - # Brightcove:new type [2]. - 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis', - 'md5': '2b35148fcf48da41c9fb4591650784f3', - 'info_dict': { - 'id': '5348741021001', - 'ext': 'mp4', - 'upload_date': '20170306', - 'uploader_id': '4191638492001', - 'timestamp': 1488769918, - 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis', - - }, - }, - { - # Alternative brightcove <video> attributes - 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/', - 'info_dict': { - 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche', - 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs", - }, - 'playlist': [{ - 'md5': '732d22ba3d33f2f3fc253c39f8f36523', - 'info_dict': { - 'id': '5311302538001', - 'ext': 'mp4', - 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche", - 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)", - 'timestamp': 1486321708, - 'upload_date': '20170205', - 'uploader_id': '800000640001', - }, - 'only_matching': True, - }], - }, - { - # Brightcove with UUID in videoPlayer - 'url': 'http://www8.hp.com/cn/zh/home.html', - 'info_dict': { - 'id': '5255815316001', - 'ext': 'mp4', - 'title': 'Sprocket Video - China', - 'description': 'Sprocket Video - China', - 'uploader': 'HP-Video Gallery', - 'timestamp': 1482263210, - 'upload_date': '20161220', - 'uploader_id': '1107601872001', - }, - 'params': { - 'skip_download': True, # m3u8 download - }, - }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', @@ -846,20 +663,6 @@ class GenericIE(InfoExtractor): 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com', } }, - # Multiple brightcove videos - # https://github.com/ytdl-org/youtube-dl/issues/2283 - { - 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html', - 'info_dict': { - 'id': 'always-never', - 'title': 'Always / Never - The New Yorker', - }, - 'playlist_count': 3, - 'params': { - 'extract_flat': False, - 'skip_download': True, - } - }, # MLB embed { 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/', @@ -1352,21 +1155,6 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': ['Failed to parse JSON Expecting value'], }, - # Brightcove URL in single quotes - { - 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/', - 'md5': '4ae374f1f8b91c889c4b9203c8c752af', - 'info_dict': { - 'id': '4255764656001', - 'ext': 'mp4', - 'title': 'SN Presents: Russell Martin, World Citizen', - 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.', - 'uploader': 'Rogers Sportsnet', - 'uploader_id': '1704050871', - 'upload_date': '20150525', - 'timestamp': 1432570283, - }, - }, # Kinja embed { 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', @@ -1402,52 +1190,6 @@ class GenericIE(InfoExtractor): 'duration': 248.667, }, }, - # BrightcoveInPageEmbed embed - { - 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', - 'info_dict': { - 'id': '4238694884001', - 'ext': 'flv', - 'title': 'Tabletop: Dread, Last Thoughts', - 'description': 'Tabletop: Dread, Last Thoughts', - 'duration': 51690, - }, - }, - # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions' - # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm - { - 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html', - 'info_dict': { - 'id': '4785848093001', - 'ext': 'mp4', - 'title': 'The Cardinal Pell Interview', - 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ', - 'uploader': 'GlobeCast Australia - GlobeStream', - 'uploader_id': '2733773828001', - 'upload_date': '20160304', - 'timestamp': 1457083087, - }, - 'params': { - # m3u8 downloads - 'skip_download': True, - }, - }, - { - # Brightcove embed with whitespace around attribute names - 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill', - 'info_dict': { - 'id': '3167554373001', - 'ext': 'mp4', - 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill", - 'description': 'md5:57bacb0e0f29349de4972bfda3191713', - 'uploader_id': '1079349493', - 'upload_date': '20140207', - 'timestamp': 1391810548, - }, - 'params': { - 'skip_download': True, - }, - }, # Another form of arte.tv embed { 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html', @@ -1498,7 +1240,7 @@ class GenericIE(InfoExtractor): 'timestamp': 1464107587, 'uploader': 'TheAtlantic', }, - 'add_ie': ['BrightcoveLegacy'], + 'skip': 'Private Youtube video', }, # Facebook <iframe> embed { @@ -2730,16 +2472,6 @@ class GenericIE(InfoExtractor): # There probably should be a second run of generic extractor on unescaped webpage. # webpage = urllib.parse.unquote(webpage) - # TODO: Move to respective extractors - bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage) - if bc_urls: - return [self.url_result(smuggle_url(bc_url, {'Referer': url}), BrightcoveLegacyIE) - for bc_url in bc_urls] - bc_urls = BrightcoveNewIE._extract_brightcove_urls(self, webpage) - if bc_urls: - return [self.url_result(smuggle_url(bc_url, {'Referer': url}), BrightcoveNewIE) - for bc_url in bc_urls] - embeds = [] for ie in self._downloader._ies.values(): if ie.ie_key() in smuggled_data.get('block_ies', []): diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py index 36d1a86fd..222bf6ce7 100644 --- a/yt_dlp/extractor/sevenplus.py +++ b/yt_dlp/extractor/sevenplus.py @@ -1,7 +1,7 @@ import json import re -from .brightcove import BrightcoveNewIE +from .brightcove import BrightcoveNewBaseIE from ..compat import ( compat_HTTPError, compat_str, @@ -13,7 +13,7 @@ from ..utils import ( ) -class SevenPlusIE(BrightcoveNewIE): # XXX: Do not subclass from concrete IE +class SevenPlusIE(BrightcoveNewBaseIE): IE_NAME = '7plus' _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))' _TESTS = [{ From 9a0416c6a5e87c577cb5079e75e3ae63ee948d80 Mon Sep 17 00:00:00 2001 From: nixxo <nixxo@protonmail.com> Date: Fri, 18 Nov 2022 02:12:02 +0100 Subject: [PATCH 1779/2552] [extractor/twitter:spaces] Add 'Referer' to m3u8 (#5580) Closes #5565 Authored by: nixxo --- yt_dlp/extractor/twitter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 3c81473dc..62b34d081 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1167,7 +1167,8 @@ class TwitterSpacesIE(TwitterBaseIE): # XXX: Native downloader does not work formats = self._extract_m3u8_formats( traverse_obj(source, 'noRedirectPlaybackUrl', 'location'), - metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live') + metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live', + headers={'Referer': 'https://twitter.com/'}) for fmt in formats: fmt.update({'vcodec': 'none', 'acodec': 'aac'}) From 352e7d987323e9df9205ee117a604ee4123231c2 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Nov 2022 02:00:11 +0000 Subject: [PATCH 1780/2552] [extractor/twitter] Refresh guest token when expired (#5560) Closes #5548 Authored by: bashonly, Grub4K --- yt_dlp/extractor/twitter.py | 92 ++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 62b34d081..18ebb3617 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -107,46 +107,54 @@ class TwitterBaseIE(InfoExtractor): 'x-twitter-active-user': 'yes', }) - result, last_error = None, None + last_error = None for bearer_token in self._TOKENS: - headers['Authorization'] = f'Bearer {bearer_token}' + for first_attempt in (True, False): + headers['Authorization'] = f'Bearer {bearer_token}' - if not self.is_logged_in: - if not self._TOKENS[bearer_token]: - headers.pop('x-guest-token', None) - guest_token_response = self._download_json( - self._API_BASE + 'guest/activate.json', video_id, - 'Downloading guest token', data=b'', headers=headers) - - self._TOKENS[bearer_token] = guest_token_response.get('guest_token') + if not self.is_logged_in: if not self._TOKENS[bearer_token]: - raise ExtractorError('Could not retrieve guest token') - headers['x-guest-token'] = self._TOKENS[bearer_token] - - try: - allowed_status = {400, 403, 404} if graphql else {403} - result = self._download_json( - (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, - video_id, headers=headers, query=query, expected_status=allowed_status) - break - - except ExtractorError as e: - if last_error: - raise last_error - elif not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404: - raise - last_error = e - self.report_warning( - 'Twitter API gave 404 response, retrying with deprecated token. ' - 'Only one media item can be extracted') - - if result.get('errors'): - error_message = ', '.join(set(traverse_obj( - result, ('errors', ..., 'message'), expected_type=str))) or 'Unknown error' - raise ExtractorError(f'Error(s) while querying api: {error_message}', expected=True) - - assert result is not None - return result + headers.pop('x-guest-token', None) + guest_token_response = self._download_json( + self._API_BASE + 'guest/activate.json', video_id, + 'Downloading guest token', data=b'', headers=headers) + + self._TOKENS[bearer_token] = guest_token_response.get('guest_token') + if not self._TOKENS[bearer_token]: + raise ExtractorError('Could not retrieve guest token') + + headers['x-guest-token'] = self._TOKENS[bearer_token] + + try: + allowed_status = {400, 403, 404} if graphql else {403} + result = self._download_json( + (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, + video_id, headers=headers, query=query, expected_status=allowed_status) + + except ExtractorError as e: + if last_error: + raise last_error + + if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404: + raise + + last_error = e + self.report_warning( + 'Twitter API gave 404 response, retrying with deprecated auth token. ' + 'Only one media item can be extracted') + break # continue outer loop with next bearer_token + + if result.get('errors'): + errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str) + if first_attempt and any('bad guest token' in error.lower() for error in errors): + self.to_screen('Guest token has expired. Refreshing guest token') + self._TOKENS[bearer_token] = None + continue + + error_message = ', '.join(set(errors)) or 'Unknown error' + raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True) + + return result def _build_graphql_query(self, media_id): raise NotImplementedError('Method must be implemented to support GraphQL') @@ -328,7 +336,7 @@ class TwitterIE(TwitterBaseIE): 'id': '665052190608723968', 'display_id': '665052190608723968', 'ext': 'mp4', - 'title': 'md5:3f57ab5d35116537a2ae7345cd0060d8', + 'title': 'md5:55fef1d5b811944f1550e91b44abb82e', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'uploader_id': 'starwars', 'uploader': r're:Star Wars.*', @@ -364,6 +372,7 @@ class TwitterIE(TwitterBaseIE): # Test case of TwitterCardIE 'skip_download': True, }, + 'skip': 'Dead external link', }, { 'url': 'https://twitter.com/jaydingeer/status/700207533655363584', 'info_dict': { @@ -568,10 +577,10 @@ class TwitterIE(TwitterBaseIE): 'id': '1577855447914409984', 'display_id': '1577855540407197696', 'ext': 'mp4', - 'title': 'oshtru \U0001faac\U0001f47d - gm \u2728\ufe0f now I can post image and video. nice update.', - 'description': 'gm \u2728\ufe0f now I can post image and video. nice update. https://t.co/cG7XgiINOm', + 'title': 'md5:9d198efb93557b8f8d5b78c480407214', + 'description': 'md5:b9c3699335447391d11753ab21c70a74', 'upload_date': '20221006', - 'uploader': 'oshtru \U0001faac\U0001f47d', + 'uploader': 'oshtru', 'uploader_id': 'oshtru', 'uploader_url': 'https://twitter.com/oshtru', 'thumbnail': r're:^https?://.*\.jpg', @@ -1096,7 +1105,6 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): class TwitterSpacesIE(TwitterBaseIE): IE_NAME = 'twitter:spaces' _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})' - _TWITTER_GRAPHQL = 'https://twitter.com/i/api/graphql/HPEisOmj1epUNLCWTYhUWw/' _TESTS = [{ 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL', From ed027fd9d8c0832d6186b3591ca51622e34a072d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 18 Nov 2022 02:04:03 +0000 Subject: [PATCH 1781/2552] [extractor/generic] Fix JSON LD manifest extraction (#5577) Closes #5572 Authored by: bashonly, pukkandan --- yt_dlp/extractor/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 51a6cbf06..5da77273d 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -10,6 +10,7 @@ from .youtube import YoutubeIE from ..compat import compat_etree_fromstring from ..utils import ( KNOWN_EXTENSIONS, + MEDIA_EXTENSIONS, ExtractorError, UnsupportedError, determine_ext, @@ -2572,8 +2573,9 @@ class GenericIE(InfoExtractor): json_ld = self._search_json_ld(webpage, video_id, default={}) if json_ld.get('url') not in (url, None): self.report_detected('JSON LD') + is_direct = json_ld.get('ext') not in (None, *MEDIA_EXTENSIONS.manifests) return [merge_dicts({ - '_type': 'video' if json_ld.get('ext') else 'url_transparent', + '_type': 'video' if is_direct else 'url_transparent', 'url': smuggle_url(json_ld['url'], { 'force_videoid': video_id, 'to_generic': True, From 8486540257c8f1532654cafb4e22b099ba62a287 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 19 Nov 2022 08:42:06 +0530 Subject: [PATCH 1782/2552] [extractor/unsupported] Add more URLs Closes #5557, Closes #2744, Closes #5578 --- yt_dlp/extractor/unsupported.py | 40 ++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index e40666ec0..b9cb31beb 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -39,20 +39,22 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'(?:[\w\.]+\.)?mech-plus\.com', r'aha\.video', r'mubi\.com', - r'vootkids\.com' + r'vootkids\.com', + r'nowtv\.it/watch', + r'tv\.apple\.com', ) _TESTS = [{ # https://github.com/yt-dlp/yt-dlp/issues/4309 - 'url': 'https://www.peacocktv.com', + 'url': 'https://peacocktv.com/watch/playback/vod/GMO_00000000073159_01/f9d03003-eb04-3c7f-a7b6-a83ab7eb55bc', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/1719, - 'url': 'https://www.channel4.com', + 'url': 'https://www.channel4.com/programmes/gurren-lagann/on-demand/69960-001', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/1548 - 'url': 'https://www.channel5.com', + 'url': 'https://www.channel5.com/show/uk-s-strongest-man-2021/season-2021/episode-1', 'only_matching': True, }, { 'url': r'https://hsesn.apps.disneyplus.com', @@ -67,39 +69,47 @@ class KnownDRMIE(UnsupportedInfoExtractor): 'url': 'https://open.spotify.com/track/', 'only_matching': True, }, { - # TVNZ: https://github.com/yt-dlp/yt-dlp/issues/4122 - 'url': 'https://tvnz.co.nz', + # https://github.com/yt-dlp/yt-dlp/issues/4122 + 'url': 'https://www.tvnz.co.nz/shows/ice-airport-alaska/episodes/s1-e1', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/1922 - 'url': 'https://www.oneplus.ch', + 'url': 'https://www.oneplus.ch/play/1008188', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/1140 - 'url': 'https://www.artstation.com/learning/courses/', + 'url': 'https://www.artstation.com/learning/courses/dqQ/character-design-masterclass-with-serge-birault/chapters/Rxn3/introduction', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/3544 - 'url': 'https://www.philo.com', + 'url': 'https://www.philo.com/player/player/vod/Vk9EOjYwODU0ODg5OTY0ODY0OTQ5NA', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/3533 - 'url': 'https://www.mech-plus.com/', + 'url': 'https://www.mech-plus.com/player/24892/stream?assetType=episodes&playlist_id=6', 'only_matching': True, }, { - 'url': 'https://watch.mech-plus.com/', + 'url': 'https://watch.mech-plus.com/details/25240?playlist_id=6', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/2934 - 'url': 'https://www.aha.video', + 'url': 'https://www.aha.video/player/movie/lucky-man', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/2743 - 'url': 'https://mubi.com', + 'url': 'https://mubi.com/films/the-night-doctor', 'only_matching': True, }, { # https://github.com/yt-dlp/yt-dlp/issues/3287 - 'url': 'https://www.vootkids.com', + 'url': 'https://www.vootkids.com/movies/chhota-bheem-the-rise-of-kirmada/764459', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/2744 + 'url': 'https://www.nowtv.it/watch/home/asset/and-just-like-that/skyserie_f8fe979772e8437d8a61ab83b6d293e9/seasons/1/episodes/8/R_126182_HD', + 'only_matching': True, + }, { + # https://github.com/yt-dlp/yt-dlp/issues/5557 + 'url': 'https://tv.apple.com/it/show/loot---una-fortuna/umc.cmc.5erbujil1mpazuerhr1udnk45?ctx_brand=tvs.sbd.4000', 'only_matching': True, }] @@ -119,7 +129,7 @@ class KnownPiracyIE(UnsupportedInfoExtractor): """ URLS = ( - r'dood\.(?:to|watch|so|pm|wf|ru)', + r'dood\.(?:to|watch|so|pm|wf|re)', ) _TESTS = [{ From 29ca408219947914b5ce1d2fa1c268a4397719f8 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 18 Nov 2022 11:31:15 +0530 Subject: [PATCH 1783/2552] [FormatSort] Add `mov` to `vext` Closes #5581 --- README.md | 4 ++-- yt_dlp/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 367c6e036..f336dcb6a 100644 --- a/README.md +++ b/README.md @@ -1490,7 +1490,7 @@ The available fields are: - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other) - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other) - `codec`: Equivalent to `vcodec,acodec` - - `vext`: Video Extension (`mp4` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. + - `vext`: Video Extension (`mp4` > `mov` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac` - `ext`: Equivalent to `vext,aext` - `filesize`: Exact filesize, if known in advance @@ -1566,7 +1566,7 @@ $ yt-dlp -S "+size,+br" $ yt-dlp -f "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4] / bv*+ba/b" # Download the best video with the best extension -# (For video, mp4 > webm > flv. For audio, m4a > aac > mp3 ...) +# (For video, mp4 > mov > webm > flv. For audio, m4a > aac > mp3 ...) $ yt-dlp -S "ext" diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 0283c45f6..d351d0e36 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6020,8 +6020,8 @@ class FormatSorter: 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol', 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']}, 'vext': {'type': 'ordered', 'field': 'video_ext', - 'order': ('mp4', 'webm', 'flv', '', 'none'), - 'order_free': ('webm', 'mp4', 'flv', '', 'none')}, + 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'), + 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')}, 'aext': {'type': 'ordered', 'field': 'audio_ext', 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'), 'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')}, From 02b2f9fa7de583f2bfdebe568f608c9b9398d316 Mon Sep 17 00:00:00 2001 From: chengzhicn <14885347+chengzhicn@users.noreply.github.com> Date: Sun, 20 Nov 2022 04:14:21 +0800 Subject: [PATCH 1784/2552] [extractor/reddit] Add vcodec to fallback format (#5591) Authored by: chengzhicn --- yt_dlp/extractor/reddit.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index cfd79abfd..171affb93 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -171,6 +171,7 @@ class RedditIE(InfoExtractor): 'width': int_or_none(reddit_video.get('width')), 'tbr': int_or_none(reddit_video.get('bitrate_kbps')), 'acodec': 'none', + 'vcodec': 'h264', 'ext': 'mp4', 'format_id': 'fallback', 'format_note': 'DASH video, mp4_dash', From f352a0977879a6210b1519036fc75e9d423f277c Mon Sep 17 00:00:00 2001 From: Marcel <flashdagger@googlemail.com> Date: Sun, 20 Nov 2022 14:12:23 +0530 Subject: [PATCH 1785/2552] [webvtt] Handle premature EOF Closes #2867, closes #5600 Authored by: flashdagger --- yt_dlp/webvtt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 1138865ba..dd7298277 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -93,7 +93,7 @@ _REGEX_TS = re.compile(r'''(?x) ([0-9]{3})? ''') _REGEX_EOF = re.compile(r'\Z') -_REGEX_NL = re.compile(r'(?:\r\n|[\r\n])') +_REGEX_NL = re.compile(r'(?:\r\n|[\r\n]|$)') _REGEX_BLANK = re.compile(r'(?:\r\n|[\r\n])+') From 3b021eacefab4a9e43660d72d6d5a49f7ddb025e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Nov 2022 00:51:45 +0000 Subject: [PATCH 1786/2552] [extractor/generic] Add `fragment_query` extractor arg for DASH and HLS (#5528) * `fragment_query`: passthrough any query in generic mpd/m3u8 manifest URLs to their fragments * Add support for `extra_param_to_segment_url` to DASH downloader Authored by: bashonly, pukkandan --- README.md | 3 +++ yt_dlp/downloader/dash.py | 14 +++++++++++--- yt_dlp/extractor/generic.py | 18 +++++++++++++++++- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f336dcb6a..fa55d130b 100644 --- a/README.md +++ b/README.md @@ -1736,6 +1736,9 @@ The following extractors use this feature: * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off +#### generic +* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg + #### funimation * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` * `version`: The video version to extract - `uncut` or `simulcast` diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index 8723e1068..4328d739c 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -1,8 +1,9 @@ import time +import urllib.parse from . import get_suitable_downloader from .fragment import FragmentFD -from ..utils import urljoin +from ..utils import update_url_query, urljoin class DashSegmentsFD(FragmentFD): @@ -40,7 +41,12 @@ class DashSegmentsFD(FragmentFD): self._prepare_and_start_frag_download(ctx, fmt) ctx['start'] = real_start - fragments_to_download = self._get_fragments(fmt, ctx) + extra_query = None + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + if extra_param_to_segment_url: + extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) + + fragments_to_download = self._get_fragments(fmt, ctx, extra_query) if real_downloader: self.to_screen( @@ -57,7 +63,7 @@ class DashSegmentsFD(FragmentFD): fragments = fragments(ctx) if callable(fragments) else fragments return [next(iter(fragments))] if self.params.get('test') else fragments - def _get_fragments(self, fmt, ctx): + def _get_fragments(self, fmt, ctx, extra_query): fragment_base_url = fmt.get('fragment_base_url') fragments = self._resolve_fragments(fmt['fragments'], ctx) @@ -70,6 +76,8 @@ class DashSegmentsFD(FragmentFD): if not fragment_url: assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) + if extra_query: + fragment_url = update_url_query(fragment_url, extra_query) yield { 'frag_index': frag_index, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 5da77273d..2fcbc6f43 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2189,6 +2189,13 @@ class GenericIE(InfoExtractor): self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}') + def _fragment_query(self, url): + if self._configuration_arg('fragment_query'): + query_string = urllib.parse.urlparse(url).query + if query_string: + return {'extra_param_to_segment_url': query_string} + return {} + def _extract_rss(self, url, video_id, doc): NS_MAP = { 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd', @@ -2351,8 +2358,10 @@ class GenericIE(InfoExtractor): subtitles = {} if format_id.endswith('mpegurl'): formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) + info_dict.update(self._fragment_query(url)) elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers) + info_dict.update(self._fragment_query(url)) elif format_id == 'f4m': formats = self._extract_f4m_formats(url, video_id, headers=headers) else: @@ -2379,6 +2388,7 @@ class GenericIE(InfoExtractor): if first_bytes.startswith(b'#EXTM3U'): self.report_detected('M3U playlist') info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') + info_dict.update(self._fragment_query(url)) return info_dict # Maybe it's a direct link to a video? @@ -2429,6 +2439,7 @@ class GenericIE(InfoExtractor): doc, mpd_base_url=full_response.geturl().rpartition('/')[0], mpd_url=url) + info_dict.update(self._fragment_query(url)) self.report_detected('DASH manifest') return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): @@ -2541,7 +2552,10 @@ class GenericIE(InfoExtractor): m3u8_id='hls', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - else: + for fmt in formats: + fmt.update(self._fragment_query(src)) + + if not formats: formats.append({ 'url': src, 'ext': (mimetype2ext(src_type) @@ -2776,8 +2790,10 @@ class GenericIE(InfoExtractor): return [self._extract_xspf_playlist(video_url, video_id)] elif ext == 'm3u8': entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers) + entry_info_dict.update(self._fragment_query(video_url)) elif ext == 'mpd': entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers) + entry_info_dict.update(self._fragment_query(video_url)) elif ext == 'f4m': entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers) elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: From 7ff2fafe47aa9978f89ff358a8b9f9261430f33a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 21 Nov 2022 00:55:57 +0000 Subject: [PATCH 1787/2552] [extractor/vimeo] Add `VimeoProIE` (#5596) * Add support for VimeoPro URLs not containing a Vimeo video ID * Add support for password-protected VimeoPro pages Closes #5594 Authored by: bashonly, pukkandan --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/vimeo.py | 132 +++++++++++++++++++++----------- 2 files changed, 90 insertions(+), 43 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c1ab5a964..a3c5472f0 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2096,6 +2096,7 @@ from .vimeo import ( VimeoGroupsIE, VimeoLikesIE, VimeoOndemandIE, + VimeoProIE, VimeoReviewIE, VimeoUserIE, VimeoWatchLaterIE, diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 26fe566b0..97b99fc50 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -2,6 +2,7 @@ import base64 import functools import re import itertools +import urllib.error from .common import InfoExtractor from ..compat import ( @@ -311,7 +312,7 @@ class VimeoIE(VimeoBaseInfoExtractor): ) \. )? - vimeo(?:pro)?\.com/ + vimeo\.com/ (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:[^/]+/)*? (?: @@ -355,31 +356,6 @@ class VimeoIE(VimeoBaseInfoExtractor): }, 'skip': 'No longer available' }, - { - 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', - 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', - 'note': 'Vimeo Pro video (#1197)', - 'info_dict': { - 'id': '68093876', - 'ext': 'mp4', - 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus', - 'uploader_id': 'openstreetmapus', - 'uploader': 'OpenStreetMap US', - 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', - 'description': 'md5:2c362968038d4499f4d79f88458590c1', - 'duration': 1595, - 'upload_date': '20130610', - 'timestamp': 1370893156, - 'license': 'by', - 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960', - 'view_count': int, - 'comment_count': int, - 'like_count': int, - }, - 'params': { - 'format': 'best[protocol=https]', - }, - }, { 'url': 'http://player.vimeo.com/video/54469442', 'md5': 'b3e7f4d2cbb53bd7dc3bb6ff4ed5cfbd', @@ -837,15 +813,7 @@ class VimeoIE(VimeoBaseInfoExtractor): if unlisted_hash: return self._extract_from_api(video_id, unlisted_hash) - orig_url = url - is_pro = 'vimeopro.com/' in url - if is_pro: - # some videos require portfolio_id to be present in player url - # https://github.com/ytdl-org/youtube-dl/issues/20070 - url = self._extract_url(url, self._download_webpage(url, video_id)) - if not url: - url = 'https://vimeo.com/' + video_id - elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): + if any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id self._try_album_password(url) @@ -947,14 +915,6 @@ class VimeoIE(VimeoBaseInfoExtractor): video_description = self._html_search_meta( ['description', 'og:description', 'twitter:description'], webpage, default=None) - if not video_description and is_pro: - orig_webpage = self._download_webpage( - orig_url, video_id, - note='Downloading webpage for description', - fatal=False) - if orig_webpage: - video_description = self._html_search_meta( - 'description', orig_webpage, default=None) if not video_description: self.report_warning('Cannot find video description') @@ -1393,3 +1353,89 @@ class VHXEmbedIE(VimeoBaseInfoExtractor): info = self._parse_config(config, video_id) info['id'] = video_id return info + + +class VimeoProIE(VimeoBaseInfoExtractor): + IE_NAME = 'vimeo:pro' + _VALID_URL = r'https?://(?:www\.)?vimeopro\.com/[^/?#]+/(?P<slug>[^/?#]+)(?:(?:/videos?/(?P<id>[0-9]+)))?' + _TESTS = [{ + # Vimeo URL derived from video_id + 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', + 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', + 'note': 'Vimeo Pro video (#1197)', + 'info_dict': { + 'id': '68093876', + 'ext': 'mp4', + 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus', + 'uploader_id': 'openstreetmapus', + 'uploader': 'OpenStreetMap US', + 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', + 'description': 'md5:2c362968038d4499f4d79f88458590c1', + 'duration': 1595, + 'upload_date': '20130610', + 'timestamp': 1370893156, + 'license': 'by', + 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960', + 'view_count': int, + 'comment_count': int, + 'like_count': int, + 'tags': 'count:1', + }, + 'params': { + 'format': 'best[protocol=https]', + }, + }, { + # password-protected VimeoPro page with Vimeo player embed + 'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion', + 'info_dict': { + 'id': '764543723', + 'ext': 'mp4', + 'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben', + 'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280', + 'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420', + 'uploader': 'CADFEM', + 'uploader_id': 'cadfem', + 'uploader_url': 'https://vimeo.com/cadfem', + 'duration': 12505, + 'chapters': 'count:10', + }, + 'params': { + 'videopassword': 'Conference2022', + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + display_id, video_id = self._match_valid_url(url).group('slug', 'id') + if video_id: + display_id = video_id + webpage = self._download_webpage(url, display_id) + + password_form = self._search_regex( + r'(?is)<form[^>]+?method=["\']post["\'][^>]*>(.+?password.+?)</form>', + webpage, 'password form', default=None) + if password_form: + try: + webpage = self._download_webpage(url, display_id, data=urlencode_postdata({ + 'password': self._get_video_password(), + **self._hidden_inputs(password_form), + }), note='Logging in with video password') + except ExtractorError as e: + if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 418: + raise ExtractorError('Wrong video password', expected=True) + raise + + description = None + # even if we have video_id, some videos require player URL with portfolio_id query param + # https://github.com/ytdl-org/youtube-dl/issues/20070 + vimeo_url = VimeoIE._extract_url(url, webpage) + if vimeo_url: + description = self._html_search_meta('description', webpage, default=None) + elif video_id: + vimeo_url = f'https://vimeo.com/{video_id}' + else: + raise ExtractorError( + 'No Vimeo embed or video ID could be found in VimeoPro page', expected=True) + + return self.url_result(vimeo_url, VimeoIE, video_id, url_transparent=True, + description=description) From 27c0f899c8f4a71e2ec8ac7ee4ab0217da7934bd Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Nov 2022 00:40:02 +0000 Subject: [PATCH 1788/2552] [extractor/screencastify] Add extractor (#5604) Closes #5603 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/screencastify.py | 52 +++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 yt_dlp/extractor/screencastify.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a3c5472f0..375ac0d06 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1603,6 +1603,7 @@ from .savefrom import SaveFromIE from .sbs import SBSIE from .screen9 import Screen9IE from .screencast import ScreencastIE +from .screencastify import ScreencastifyIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ( ScrippsNetworksWatchIE, diff --git a/yt_dlp/extractor/screencastify.py b/yt_dlp/extractor/screencastify.py new file mode 100644 index 000000000..136b8479b --- /dev/null +++ b/yt_dlp/extractor/screencastify.py @@ -0,0 +1,52 @@ +import urllib.parse + +from .common import InfoExtractor +from ..utils import traverse_obj, update_url_query + + +class ScreencastifyIE(InfoExtractor): + _VALID_URL = r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8', + 'info_dict': { + 'id': 'sYVkZip3quLKhHw4Ybk8', + 'ext': 'mp4', + 'title': 'Inserting and Aligning the Case Top and Bottom', + 'description': '', + 'uploader': 'Paul Gunn', + 'extra_param_to_segment_url': str, + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_json( + f'https://umbrella.svc.screencastify.com/api/umbrellaService/watch/{video_id}', video_id) + + query_string = traverse_obj(info, ('manifest', 'auth', 'query')) + query = urllib.parse.parse_qs(query_string) + formats = [] + dash_manifest_url = traverse_obj(info, ('manifest', 'url')) + if dash_manifest_url: + formats.extend( + self._extract_mpd_formats( + dash_manifest_url, video_id, mpd_id='dash', query=query, fatal=False)) + hls_manifest_url = traverse_obj(info, ('manifest', 'hlsUrl')) + if hls_manifest_url: + formats.extend( + self._extract_m3u8_formats( + hls_manifest_url, video_id, ext='mp4', m3u8_id='hls', query=query, fatal=False)) + for f in formats: + f['url'] = update_url_query(f['url'], query) + + return { + 'id': video_id, + 'title': info.get('title'), + 'description': info.get('description'), + 'uploader': info.get('userName'), + 'formats': formats, + 'extra_param_to_segment_url': query_string, + } From d761dfd059ded109b4feef7315bd84f7d47c6bd7 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Nov 2022 03:42:16 +0000 Subject: [PATCH 1789/2552] [extractor/naver] Improve `_VALID_URL` for `NaverNowIE` (#5620) Authored by: bashonly --- yt_dlp/extractor/naver.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index b5425c744..9de83abf7 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -254,7 +254,7 @@ class NaverLiveIE(InfoExtractor): class NaverNowIE(NaverBaseIE): IE_NAME = 'navernow' - _VALID_URL = r'https?://now\.naver\.com/s/now\.(?P<id>[0-9]+)' + _VALID_URL = r'https?://now\.naver\.com/s/now\.(?P<id>\w+)' _API_URL = 'https://apis.naver.com/now_web/oldnow_web/v4' _TESTS = [{ 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay=', @@ -313,6 +313,9 @@ class NaverNowIE(NaverBaseIE): 'title': '아이키의 떰즈업', }, 'playlist_mincount': 101, + }, { + 'url': 'https://now.naver.com/s/now.kihyunplay?shareReplayId=30573291#replay', + 'only_matching': True, }] def _extract_replay(self, show_id, replay_id): From 9d52bf65ff38386a70493ce152f0883476b0709b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elan=20Ruusam=C3=A4e?= <glen@pld-linux.org> Date: Tue, 22 Nov 2022 20:09:57 +0200 Subject: [PATCH 1790/2552] [extractor/kanal2] Add extractor (#5575) Authored by: glensc, pukkandan, bashonly --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/kanal2.py | 66 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 yt_dlp/extractor/kanal2.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 375ac0d06..9d5af491b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -820,6 +820,7 @@ from .joj import JojIE from .jwplatform import JWPlatformIE from .kakao import KakaoIE from .kaltura import KalturaIE +from .kanal2 import Kanal2IE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE diff --git a/yt_dlp/extractor/kanal2.py b/yt_dlp/extractor/kanal2.py new file mode 100644 index 000000000..3c0efe598 --- /dev/null +++ b/yt_dlp/extractor/kanal2.py @@ -0,0 +1,66 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + join_nonempty, + traverse_obj, + unified_timestamp, + update_url_query, +) + + +class Kanal2IE(InfoExtractor): + _VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P<id>\d+)' + _TESTS = [{ + 'note': 'Test standard url (#5575)', + 'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792', + 'md5': '7ea7b16266ec1798743777df241883dd', + 'info_dict': { + 'id': '40792', + 'ext': 'mp4', + 'title': 'Aedniku aabits / Osa 53 (05.08.2016 20:00)', + 'thumbnail': r're:https?://.*\.jpg$', + 'description': 'md5:53cabf3c5d73150d594747f727431248', + 'upload_date': '20160805', + 'timestamp': 1470420000, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + playlist = self._download_json( + f'https://kanal2.postimees.ee/player/playlist/{video_id}', + video_id, query={'type': 'episodes'}, + headers={'X-Requested-With': 'XMLHttpRequest'}) + + return { + 'id': video_id, + 'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '), + 'description': traverse_obj(playlist, ('info', 'description')), + 'thumbnail': traverse_obj(playlist, ('data', 'image')), + 'formats': self.get_formats(playlist, video_id), + 'timestamp': unified_timestamp(self._search_regex( + r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$', + traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'), + } + + def get_formats(self, playlist, video_id): + path = traverse_obj(playlist, ('data', 'path')) + if not path: + raise ExtractorError('Path value not found in playlist JSON response') + session = self._download_json( + 'https://sts.postimees.ee/session/register', + video_id, note='Creating session', errnote='Error creating session', + headers={ + 'X-Original-URI': path, + 'Accept': 'application/json', + }) + if session.get('reason') != 'OK' or not session.get('session'): + reason = session.get('reason', 'unknown error') + raise ExtractorError(f'Unable to obtain session: {reason}') + + formats = [] + for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')): + formats.extend(self._extract_m3u8_formats( + update_url_query(stream, {'s': session['session']}), video_id, 'mp4')) + + return formats From 0d95d8b00ad1bf879ed61f4e588753ef87ccd061 Mon Sep 17 00:00:00 2001 From: Mudassir Chapra <37051110+muddi900@users.noreply.github.com> Date: Thu, 24 Nov 2022 20:34:45 +0500 Subject: [PATCH 1791/2552] [extractor/gronkh] Fix `_VALID_URL` (#5628) Closes #5531 Authored by: muddi900 --- yt_dlp/extractor/gronkh.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index b6cf14117..b9370e36c 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -9,15 +9,26 @@ from ..utils import ( class GronkhIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/(?:watch/)?stream/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/(?:watch/)?streams?/(?P<id>\d+)' _TESTS = [{ + 'url': 'https://gronkh.tv/streams/657', + 'info_dict': { + 'id': '657', + 'ext': 'mp4', + 'title': 'H.O.R.D.E. - DAS ZWEiTE ZEiTALTER 🎲 Session 1', + 'view_count': int, + 'thumbnail': 'https://01.cdn.vod.farm/preview/9e2555d3a23bf4e5c5b7c6b3b70a9d84.jpg', + 'upload_date': '20221111' + }, + 'params': {'skip_download': True} + }, { 'url': 'https://gronkh.tv/stream/536', 'info_dict': { 'id': '536', 'ext': 'mp4', 'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv', - 'view_count': 19491, + 'view_count': int, 'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg', 'upload_date': '20211001' }, From c0caa805157fb315d4b24ea4e1f3eef0210c2096 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 25 Nov 2022 16:10:23 +0530 Subject: [PATCH 1792/2552] [extractor/naver] Treat fan subtitles as separate language Closes #5467 --- yt_dlp/extractor/naver.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index 9de83abf7..e2e6e9728 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -8,6 +8,7 @@ from ..utils import ( clean_html, dict_get, int_or_none, + join_nonempty, merge_dicts, parse_duration, traverse_obj, @@ -72,13 +73,11 @@ class NaverBaseIE(InfoExtractor): def get_subs(caption_url): if re.search(self._CAPTION_EXT_RE, caption_url): - return [{ - 'url': replace_ext(caption_url, 'ttml'), - }, { - 'url': replace_ext(caption_url, 'vtt'), - }] - else: - return [{'url': caption_url}] + return [ + replace_ext(caption_url, 'ttml'), + replace_ext(caption_url, 'vtt'), + ] + return [caption_url] automatic_captions = {} subtitles = {} @@ -87,7 +86,13 @@ class NaverBaseIE(InfoExtractor): if not caption_url: continue sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles - sub_dict.setdefault(dict_get(caption, ('locale', 'language')), []).extend(get_subs(caption_url)) + lang = caption.get('locale') or join_nonempty('language', 'country', from_dict=caption) or 'und' + if caption.get('type') == 'fan': + lang += '_fan%d' % next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in sub_dict) + sub_dict.setdefault(lang, []).extend({ + 'url': sub_url, + 'name': join_nonempty('label', 'fanName', from_dict=caption, delim=' - '), + } for sub_url in get_subs(caption_url)) user = meta.get('user', {}) From 86f557b636cf2dc66cd882a88ae4338086c48fbb Mon Sep 17 00:00:00 2001 From: marieell <marieell@tuta.io> Date: Sat, 26 Nov 2022 03:30:25 +0100 Subject: [PATCH 1793/2552] [extractor/youporn] Fix metadata (#2768) Authored by: marieell --- yt_dlp/extractor/youporn.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 2f3f21332..8f1b9911b 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( extract_attributes, int_or_none, + merge_dicts, str_to_int, unified_strdate, url_or_none, @@ -64,6 +65,24 @@ class YouPornIE(InfoExtractor): }, { 'url': 'https://www.youporn.com/watch/13922959/femdom-principal/', 'only_matching': True, + }, { + 'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/', + 'info_dict': { + 'id': '16290308', + 'age_limit': 18, + 'categories': [], + 'description': 'md5:00ea70f642f431c379763c17c2f396bc', + 'display_id': 'tinderspecial-trailer1', + 'duration': 298.0, + 'ext': 'mp4', + 'upload_date': '20201123', + 'uploader': 'Ersties', + 'tags': [], + 'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg', + 'timestamp': 1606089600, + 'title': 'Tinder In Real Life', + 'view_count': int, + } }] def _real_extract(self, url): @@ -159,7 +178,8 @@ class YouPornIE(InfoExtractor): r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>', 'tags') - return { + data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) + return merge_dicts(data, { 'id': video_id, 'display_id': display_id, 'title': title, @@ -174,4 +194,4 @@ class YouPornIE(InfoExtractor): 'tags': tags, 'age_limit': age_limit, 'formats': formats, - } + }) From 48652590ec401f4e747a5e51552cdcac20744aa1 Mon Sep 17 00:00:00 2001 From: alexia <nyuszika7h@gmail.com> Date: Mon, 28 Nov 2022 03:36:18 +0100 Subject: [PATCH 1794/2552] [extractor/amazonminitv] Add extractors (#3628) Authored by: nyuszika7h, GautamMKGarg --- yt_dlp/extractor/_extractors.py | 5 + yt_dlp/extractor/amazonminitv.py | 322 +++++++++++++++++++++++++++++++ 2 files changed, 327 insertions(+) create mode 100644 yt_dlp/extractor/amazonminitv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9d5af491b..2fe15f6d2 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -87,6 +87,11 @@ from .alura import ( ) from .amcnetworks import AMCNetworksIE from .amazon import AmazonStoreIE +from .amazonminitv import ( + AmazonMiniTVIE, + AmazonMiniTVSeasonIE, + AmazonMiniTVSeriesIE, +) from .americastestkitchen import ( AmericasTestKitchenIE, AmericasTestKitchenSeasonIE, diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py new file mode 100644 index 000000000..793fac2e4 --- /dev/null +++ b/yt_dlp/extractor/amazonminitv.py @@ -0,0 +1,322 @@ +import json + +from .common import InfoExtractor +from ..utils import ExtractorError, int_or_none, traverse_obj, try_get + + +class AmazonMiniTVIE(InfoExtractor): + _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)' + _HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36', + } + _CLIENT_ID = 'ATVIN' + _DEVICE_LOCALE = 'en_GB' + _TESTS = [{ + 'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', + 'md5': '0045a5ea38dddd4de5a5fcec7274b476', + 'info_dict': { + 'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840', + 'ext': 'mp4', + 'title': 'May I Kiss You?', + 'language': 'Hindi', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:a549bfc747973e04feb707833474e59d', + 'release_timestamp': 1644710400, + 'release_date': '20220213', + 'duration': 846, + 'chapters': [{ + 'start_time': 815.0, + 'end_time': 846, + 'title': 'End Credits', + }], + 'series': 'Couple Goals', + 'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', + 'season': 'Season 3', + 'season_number': 3, + 'season_id': 'amzn1.dv.gti.20331016-d9b9-4968-b991-c89fa4927a36', + 'episode': 'May I Kiss You?', + 'episode_number': 2, + 'episode_id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840', + }, + }, { + 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', + 'md5': '9a977bffd5d99c4dd2a32b360aee1863', + 'info_dict': { + 'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab', + 'ext': 'mp4', + 'title': 'Jahaan', + 'language': 'Hindi', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'md5:05eb765a77bf703f322f120ec6867339', + 'release_timestamp': 1647475200, + 'release_date': '20220317', + 'duration': 783, + 'chapters': [], + }, + }, { + 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab', + 'only_matching': True, + }, { + 'url': 'amazonminitv:amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab', + 'only_matching': True, + }, { + 'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab', + 'only_matching': True, + }] + _GRAPHQL_QUERY_CONTENT = ''' +query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) { + content( + applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId} + contentId: $contentId + contentType: $contentType + ) { + contentId + name + ... on Episode { + contentId + vodType + name + images + description { + synopsis + contentLengthInSeconds + } + publicReleaseDateUTC + audioTracks + seasonId + seriesId + seriesName + seasonNumber + episodeNumber + timecode { + endCreditsTime + } + } + ... on MovieContent { + contentId + vodType + name + description { + synopsis + contentLengthInSeconds + } + images + publicReleaseDateUTC + audioTracks + } + } +}''' + + def _call_api(self, asin, data=None, note=None): + query = {} + headers = self._HEADERS.copy() + if data: + name = 'graphql' + data['variables'].update({ + 'clientId': self._CLIENT_ID, + 'contentType': 'VOD', + 'deviceLocale': self._DEVICE_LOCALE, + 'sessionIdToken': self.session_id, + }) + headers.update({'Content-Type': 'application/json'}) + else: + name = 'prs' + query.update({ + 'clientId': self._CLIENT_ID, + 'deviceType': 'A1WMMUXPCUJL4N', + 'contentId': asin, + 'deviceLocale': self._DEVICE_LOCALE, + }) + + resp = self._download_json( + f'https://www.amazon.in/minitv/api/web/{name}', + asin, query=query, data=json.dumps(data).encode() if data else None, + headers=headers, note=note) + + if 'errors' in resp: + raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}') + + if data: + resp = resp['data'][data['operationName']] + return resp + + def _real_initialize(self): + # Download webpage to get the required guest session cookies + self._download_webpage( + 'https://www.amazon.in/minitv', + None, + headers=self._HEADERS, + note='Downloading webpage') + + self.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value + + def _real_extract(self, url): + asin = f'amzn1.dv.gti.{self._match_id(url)}' + + title_info = self._call_api( + asin, data={ + 'operationName': 'content', + 'variables': { + 'contentId': asin, + }, + 'query': self._GRAPHQL_QUERY_CONTENT, + }, + note='Downloading title info') + + prs = self._call_api(asin, note='Downloading playback info') + + formats = [] + subtitles = {} + for type_, asset in prs['playbackAssets'].items(): + if not isinstance(asset, dict): + continue + if type_ == 'hls': + m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( + asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native', + m3u8_id=type_, fatal=False) + formats.extend(m3u8_fmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + elif type_ == 'dash': + mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles( + asset['manifestUrl'], asin, mpd_id=type_, fatal=False) + formats.extend(mpd_fmts) + subtitles = self._merge_subtitles(subtitles, mpd_subs) + + duration = traverse_obj(title_info, ('description', 'contentLengthInSeconds')) + credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000) + chapters = [{ + 'start_time': credits_time, + 'end_time': duration + credits_time, # FIXME: I suppose this is correct + 'title': 'End Credits', + }] if credits_time and duration else [] + is_episode = title_info.get('vodType') == 'EPISODE' + + return { + 'id': asin, + 'title': title_info.get('name'), + 'formats': formats, + 'subtitles': subtitles, + 'language': traverse_obj(title_info, ('audioTracks', 0)), + 'thumbnails': [{ + 'id': type_, + 'url': url, + } for type_, url in (title_info.get('images') or {}).items()], + 'description': traverse_obj(title_info, ('description', 'synopsis')), + 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)), + 'duration': duration, + 'chapters': chapters, + 'series': title_info.get('seriesName'), + 'series_id': title_info.get('seriesId'), + 'season_number': title_info.get('seasonNumber'), + 'season_id': title_info.get('seasonId'), + 'episode': title_info.get('name') if is_episode else None, + 'episode_number': title_info.get('episodeNumber'), + 'episode_id': asin if is_episode else None, + } + + +class AmazonMiniTVSeasonIE(AmazonMiniTVIE): + IE_NAME = 'amazonminitv:season' + _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)' + IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix' + _TESTS = [{ + 'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0', + 'playlist_mincount': 6, + 'info_dict': { + 'id': 'amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0', + }, + }, { + 'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0', + 'only_matching': True, + }] + _GRAPHQL_QUERY = ''' +query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) { + getEpisodes( + applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId} + episodeOrSeasonId: $episodeOrSeasonId + ) { + episodes { + ... on Episode { + contentId + name + images + seriesName + seasonId + seriesId + seasonNumber + episodeNumber + description { + synopsis + contentLengthInSeconds + } + publicReleaseDateUTC + } + } + } +} +''' + + def _entries(self, asin): + season_info = self._call_api( + asin, + data={ + 'operationName': 'getEpisodes', + 'variables': { + 'episodeOrSeasonId': asin, + }, + 'query': self._GRAPHQL_QUERY, + }, + note='Downloading season info') + + for episode in season_info['episodes']: + yield self.url_result(f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) + + def _real_extract(self, url): + asin = f'amzn1.dv.gti.{self._match_id(url)}' + return self.playlist_result(self._entries(asin), playlist_id=asin) + + +class AmazonMiniTVSeriesIE(AmazonMiniTVIE): + IE_NAME = 'amazonminitv:series' + _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)' + _TESTS = [{ + 'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', + 'playlist_mincount': 3, + 'info_dict': { + 'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', + }, + }, { + 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0', + 'only_matching': True, + }] + _GRAPHQL_QUERY = ''' +query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) { + getSeasons( + applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId} + episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId + ) { + seasons { + seasonId + } + } +} +''' + + def _entries(self, asin): + season_info = self._call_api( + asin, + data={ + 'operationName': 'getSeasons', + 'variables': { + 'episodeOrSeasonOrSeriesId': asin, + }, + 'query': self._GRAPHQL_QUERY, + }, + note='Downloading series info') + + for season in season_info['seasons']: + yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId']) + + def _real_extract(self, url): + asin = f'amzn1.dv.gti.{self._match_id(url)}' + return self.playlist_result(self._entries(asin), playlist_id=asin) From a9d069f5b8540f15caaf696bc39ce6a969f8b11c Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 29 Nov 2022 07:50:58 +0530 Subject: [PATCH 1795/2552] [extractor/amazonminitv] Cleanup 48652590ec401f4e747a5e51552cdcac20744aa1 --- yt_dlp/extractor/amazonminitv.py | 162 +++++++++++++------------------ 1 file changed, 65 insertions(+), 97 deletions(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 793fac2e4..730996853 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -4,16 +4,43 @@ from .common import InfoExtractor from ..utils import ExtractorError, int_or_none, traverse_obj, try_get -class AmazonMiniTVIE(InfoExtractor): +class AmazonMiniTVBaseIE(InfoExtractor): + def _real_initialize(self): + self._download_webpage( + 'https://www.amazon.in/minitv', None, + note='Fetching guest session cookies') + AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value + + def _call_api(self, asin, data=None, note=None): + device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'} + if data: + data['variables'].update({ + 'contentType': 'VOD', + 'sessionIdToken': self.session_id, + **device, + }) + + resp = self._download_json( + f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}', + asin, note=note, headers={'Content-Type': 'application/json'}, + data=json.dumps(data).encode() if data else None, + query=None if data else { + 'deviceType': 'A1WMMUXPCUJL4N', + 'contentId': asin, + **device, + }) + + if resp.get('errors'): + raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}') + elif not data: + return resp + return resp['data'][data['operationName']] + + +class AmazonMiniTVIE(AmazonMiniTVBaseIE): _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)' - _HEADERS = { - 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36', - } - _CLIENT_ID = 'ATVIN' - _DEVICE_LOCALE = 'en_GB' _TESTS = [{ 'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', - 'md5': '0045a5ea38dddd4de5a5fcec7274b476', 'info_dict': { 'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840', 'ext': 'mp4', @@ -24,11 +51,7 @@ class AmazonMiniTVIE(InfoExtractor): 'release_timestamp': 1644710400, 'release_date': '20220213', 'duration': 846, - 'chapters': [{ - 'start_time': 815.0, - 'end_time': 846, - 'title': 'End Credits', - }], + 'chapters': 'count:2', 'series': 'Couple Goals', 'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', 'season': 'Season 3', @@ -40,7 +63,6 @@ class AmazonMiniTVIE(InfoExtractor): }, }, { 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', - 'md5': '9a977bffd5d99c4dd2a32b360aee1863', 'info_dict': { 'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab', 'ext': 'mp4', @@ -63,6 +85,7 @@ class AmazonMiniTVIE(InfoExtractor): 'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab', 'only_matching': True, }] + _GRAPHQL_QUERY_CONTENT = ''' query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) { content( @@ -107,68 +130,13 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, } }''' - def _call_api(self, asin, data=None, note=None): - query = {} - headers = self._HEADERS.copy() - if data: - name = 'graphql' - data['variables'].update({ - 'clientId': self._CLIENT_ID, - 'contentType': 'VOD', - 'deviceLocale': self._DEVICE_LOCALE, - 'sessionIdToken': self.session_id, - }) - headers.update({'Content-Type': 'application/json'}) - else: - name = 'prs' - query.update({ - 'clientId': self._CLIENT_ID, - 'deviceType': 'A1WMMUXPCUJL4N', - 'contentId': asin, - 'deviceLocale': self._DEVICE_LOCALE, - }) - - resp = self._download_json( - f'https://www.amazon.in/minitv/api/web/{name}', - asin, query=query, data=json.dumps(data).encode() if data else None, - headers=headers, note=note) - - if 'errors' in resp: - raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}') - - if data: - resp = resp['data'][data['operationName']] - return resp - - def _real_initialize(self): - # Download webpage to get the required guest session cookies - self._download_webpage( - 'https://www.amazon.in/minitv', - None, - headers=self._HEADERS, - note='Downloading webpage') - - self.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value - def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' - - title_info = self._call_api( - asin, data={ - 'operationName': 'content', - 'variables': { - 'contentId': asin, - }, - 'query': self._GRAPHQL_QUERY_CONTENT, - }, - note='Downloading title info') - prs = self._call_api(asin, note='Downloading playback info') - formats = [] - subtitles = {} + formats, subtitles = [], {} for type_, asset in prs['playbackAssets'].items(): - if not isinstance(asset, dict): + if not traverse_obj(asset, 'manifestUrl'): continue if type_ == 'hls': m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( @@ -181,14 +149,16 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, asset['manifestUrl'], asin, mpd_id=type_, fatal=False) formats.extend(mpd_fmts) subtitles = self._merge_subtitles(subtitles, mpd_subs) + else: + self.report_warning(f'Unknown asset type: {type_}') - duration = traverse_obj(title_info, ('description', 'contentLengthInSeconds')) + title_info = self._call_api( + asin, note='Downloading title info', data={ + 'operationName': 'content', + 'variables': {'contentId': asin}, + 'query': self._GRAPHQL_QUERY_CONTENT, + }) credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000) - chapters = [{ - 'start_time': credits_time, - 'end_time': duration + credits_time, # FIXME: I suppose this is correct - 'title': 'End Credits', - }] if credits_time and duration else [] is_episode = title_info.get('vodType') == 'EPISODE' return { @@ -203,8 +173,11 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, } for type_, url in (title_info.get('images') or {}).items()], 'description': traverse_obj(title_info, ('description', 'synopsis')), 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)), - 'duration': duration, - 'chapters': chapters, + 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')), + 'chapters': [{ + 'start_time': credits_time, + 'title': 'End Credits', + }] if credits_time else [], 'series': title_info.get('seriesName'), 'series_id': title_info.get('seriesId'), 'season_number': title_info.get('seasonNumber'), @@ -215,7 +188,7 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, } -class AmazonMiniTVSeasonIE(AmazonMiniTVIE): +class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE): IE_NAME = 'amazonminitv:season' _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)' IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix' @@ -229,6 +202,7 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVIE): 'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0', 'only_matching': True, }] + _GRAPHQL_QUERY = ''' query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) { getEpisodes( @@ -258,25 +232,22 @@ query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonI def _entries(self, asin): season_info = self._call_api( - asin, - data={ + asin, note='Downloading season info', data={ 'operationName': 'getEpisodes', - 'variables': { - 'episodeOrSeasonId': asin, - }, + 'variables': {'episodeOrSeasonId': asin}, 'query': self._GRAPHQL_QUERY, - }, - note='Downloading season info') + }) for episode in season_info['episodes']: - yield self.url_result(f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) + yield self.url_result( + f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' - return self.playlist_result(self._entries(asin), playlist_id=asin) + return self.playlist_result(self._entries(asin), asin) -class AmazonMiniTVSeriesIE(AmazonMiniTVIE): +class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE): IE_NAME = 'amazonminitv:series' _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)' _TESTS = [{ @@ -289,6 +260,7 @@ class AmazonMiniTVSeriesIE(AmazonMiniTVIE): 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0', 'only_matching': True, }] + _GRAPHQL_QUERY = ''' query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) { getSeasons( @@ -304,19 +276,15 @@ query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeas def _entries(self, asin): season_info = self._call_api( - asin, - data={ + asin, note='Downloading series info', data={ 'operationName': 'getSeasons', - 'variables': { - 'episodeOrSeasonOrSeriesId': asin, - }, + 'variables': {'episodeOrSeasonOrSeriesId': asin}, 'query': self._GRAPHQL_QUERY, - }, - note='Downloading series info') + }) for season in season_info['seasons']: yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId']) def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' - return self.playlist_result(self._entries(asin), playlist_id=asin) + return self.playlist_result(self._entries(asin), asin) From 71eb82d1b2864927b62e0600c41b8b9db4071218 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 30 Nov 2022 05:17:45 +0530 Subject: [PATCH 1796/2552] [extractor/youtube] Subtitles cannot be translated to `und` Closes #5674 --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 79d082d0b..c6c89915b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4085,7 +4085,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not trans_code: continue orig_trans_code = trans_code - if caption_track.get('kind') != 'asr': + if caption_track.get('kind') != 'asr' and trans_code != 'und': if not get_translated_subs: continue trans_code += f'-{lang_code}' From 9bcfe33be7f1aa7164e690ced133cae4b063efa4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 30 Nov 2022 06:10:26 +0530 Subject: [PATCH 1797/2552] [utils] Make `ExtractorError` mutable --- yt_dlp/extractor/common.py | 14 ++++---------- yt_dlp/utils.py | 21 +++++++++++++++------ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index c2b9970ec..3ca8fe24c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -692,16 +692,10 @@ class InfoExtractor: except UnsupportedError: raise except ExtractorError as e: - kwargs = { - 'video_id': e.video_id or self.get_temp_id(url), - 'ie': self.IE_NAME, - 'tb': e.traceback or sys.exc_info()[2], - 'expected': e.expected, - 'cause': e.cause - } - if hasattr(e, 'countries'): - kwargs['countries'] = e.countries - raise type(e)(e.orig_msg, **kwargs) + e.video_id = e.video_id or self.get_temp_id(url), + e.ie = e.ie or self.IE_NAME, + e.traceback = e.traceback or sys.exc_info()[2] + raise except http.client.IncompleteRead as e: raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url)) except (KeyError, StopIteration) as e: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index d351d0e36..ed1b24335 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1095,13 +1095,16 @@ class ExtractorError(YoutubeDLError): self.exc_info = sys.exc_info() # preserve original exception if isinstance(self.exc_info[1], ExtractorError): self.exc_info = self.exc_info[1].exc_info + super().__init__(self.__msg) - super().__init__(''.join(( - format_field(ie, None, '[%s] '), - format_field(video_id, None, '%s: '), - msg, - format_field(cause, None, ' (caused by %r)'), - '' if expected else bug_reports_message()))) + @property + def __msg(self): + return ''.join(( + format_field(self.ie, None, '[%s] '), + format_field(self.video_id, None, '%s: '), + self.orig_msg, + format_field(self.cause, None, ' (caused by %r)'), + '' if self.expected else bug_reports_message())) def format_traceback(self): return join_nonempty( @@ -1109,6 +1112,12 @@ class ExtractorError(YoutubeDLError): self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]), delim='\n') or None + def __setattr__(self, name, value): + super().__setattr__(name, value) + if getattr(self, 'msg', None) and name not in ('msg', 'args'): + self.msg = self.__msg or type(self).__name__ + self.args = (self.msg, ) # Cannot be property + class UnsupportedError(ExtractorError): def __init__(self, url): From ba723997235fc50673dac8eae1503b509b7800d5 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 1 Dec 2022 04:00:32 +0000 Subject: [PATCH 1798/2552] [extractor/tiktok] Fix subs, `DouyinIE`, improve `_VALID_URL` (#5676) Closes #5665, Closes #2267 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 152 ++++++++++++++++++++++++------------- 1 file changed, 99 insertions(+), 53 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 0ca6f5afd..1bbf88495 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -16,6 +16,7 @@ from ..utils import ( int_or_none, join_nonempty, qualities, + remove_start, srt_subtitles_timecode, str_or_none, traverse_obj, @@ -51,7 +52,7 @@ class TikTokBaseIE(InfoExtractor): return self._download_json( 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id, fatal=fatal, note=note, errnote=errnote, headers={ - 'User-Agent': f'com.ss.android.ugc.trill/{manifest_app_version} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)', + 'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)', 'Accept': 'application/json', }, query=query) @@ -126,11 +127,21 @@ class TikTokBaseIE(InfoExtractor): continue raise e + def _extract_aweme_app(self, aweme_id): + feed_list = self._call_api( + 'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed', + errnote='Unable to download video feed').get('aweme_list') or [] + aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None) + if not aweme_detail: + raise ExtractorError('Unable to find video in feed', video_id=aweme_id) + return self._parse_aweme_video_app(aweme_detail) + def _get_subtitles(self, aweme_detail, aweme_id): # TODO: Extract text positioning info subtitles = {} + # aweme/detail endpoint subs captions_info = traverse_obj( - aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict, default=[]) + aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict) for caption in captions_info: caption_url = traverse_obj(caption, ('url', 'url_list', ...), expected_type=url_or_none, get_all=False) if not caption_url: @@ -145,6 +156,24 @@ class TikTokBaseIE(InfoExtractor): f'{i + 1}\n{srt_subtitles_timecode(line["start_time"] / 1000)} --> {srt_subtitles_timecode(line["end_time"] / 1000)}\n{line["text"]}' for i, line in enumerate(caption_json['utterances']) if line.get('text')) }) + # feed endpoint subs + if not subtitles: + for caption in traverse_obj(aweme_detail, ('video', 'cla_info', 'caption_infos', ...), expected_type=dict): + if not caption.get('url'): + continue + subtitles.setdefault(caption.get('lang') or 'en', []).append({ + 'ext': remove_start(caption.get('caption_format'), 'web'), + 'url': caption['url'], + }) + # webpage subs + if not subtitles: + for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', ...), expected_type=dict): + if not caption.get('Url'): + continue + subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({ + 'ext': remove_start(caption.get('Format'), 'web'), + 'url': caption['Url'], + }) return subtitles def _parse_aweme_video_app(self, aweme_detail): @@ -354,7 +383,7 @@ class TikTokBaseIE(InfoExtractor): 'timestamp': int_or_none(aweme_detail.get('createTime')), 'creator': str_or_none(author_info.get('nickname')), 'uploader': str_or_none(author_info.get('uniqueId') or aweme_detail.get('author')), - 'uploader_id': str_or_none(author_info.get('id') or aweme_detail.get('authorId')), + 'uploader_id': str_or_none(traverse_obj(author_info, 'id', 'uid', 'authorId')), 'uploader_url': user_url, 'track': str_or_none(music_info.get('title')), 'album': str_or_none(music_info.get('album')) or None, @@ -521,14 +550,6 @@ class TikTokIE(TikTokBaseIE): 'only_matching': True }] - def _extract_aweme_app(self, aweme_id): - feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id, - note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or [] - aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None) - if not aweme_detail: - raise ExtractorError('Unable to find video in feed', video_id=aweme_id) - return self._parse_aweme_video_app(aweme_detail) - def _real_extract(self, url): video_id, user_id = self._match_valid_url(url).group('id', 'user_id') try: @@ -763,56 +784,68 @@ class TikTokTagIE(TikTokBaseListIE): return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id) -class DouyinIE(TikTokIE): # XXX: Do not subclass from concrete IE +class DouyinIE(TikTokBaseIE): _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://www.douyin.com/video/6961737553342991651', - 'md5': '10523312c8b8100f353620ac9dc8f067', + 'md5': 'a97db7e3e67eb57bf40735c022ffa228', 'info_dict': { 'id': '6961737553342991651', 'ext': 'mp4', 'title': '#杨超越 小小水手带你去远航❤️', - 'uploader': '杨超越', - 'upload_date': '20210513', - 'timestamp': 1620905839, + 'description': '#杨超越 小小水手带你去远航❤️', 'uploader_id': '110403406559', + 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', + 'creator': '杨超越', + 'duration': 19782, + 'timestamp': 1620905839, + 'upload_date': '20210513', + 'track': '@杨超越创作的原声', 'view_count': int, 'like_count': int, 'repost_count': int, 'comment_count': int, - } + }, }, { 'url': 'https://www.douyin.com/video/6982497745948921092', - 'md5': 'd78408c984b9b5102904cf6b6bc2d712', + 'md5': '34a87ebff3833357733da3fe17e37c0e', 'info_dict': { 'id': '6982497745948921092', 'ext': 'mp4', 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想', - 'uploader': '杨超越工作室', - 'upload_date': '20210708', - 'timestamp': 1625739481, + 'description': '这个夏日和小羊@杨超越 一起遇见白色幻想', 'uploader_id': '408654318141572', + 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', + 'creator': '杨超越工作室', + 'duration': 42608, + 'timestamp': 1625739481, + 'upload_date': '20210708', + 'track': '@杨超越工作室创作的原声', 'view_count': int, 'like_count': int, 'repost_count': int, 'comment_count': int, - } + }, }, { 'url': 'https://www.douyin.com/video/6953975910773099811', - 'md5': '72e882e24f75064c218b76c8b713c185', + 'md5': 'dde3302460f19db59c47060ff013b902', 'info_dict': { 'id': '6953975910773099811', 'ext': 'mp4', 'title': '#一起看海 出现在你的夏日里', - 'uploader': '杨超越', - 'upload_date': '20210422', - 'timestamp': 1619098692, + 'description': '#一起看海 出现在你的夏日里', 'uploader_id': '110403406559', + 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', + 'creator': '杨超越', + 'duration': 17228, + 'timestamp': 1619098692, + 'upload_date': '20210422', + 'track': '@杨超越创作的原声', 'view_count': int, 'like_count': int, 'repost_count': int, 'comment_count': int, - } + }, }, { 'url': 'https://www.douyin.com/video/6950251282489675042', 'md5': 'b4db86aec367ef810ddd38b1737d2fed', @@ -828,25 +861,30 @@ class DouyinIE(TikTokIE): # XXX: Do not subclass from concrete IE 'like_count': int, 'repost_count': int, 'comment_count': int, - } + }, + 'skip': 'No longer available', }, { 'url': 'https://www.douyin.com/video/6963263655114722595', - 'md5': '1abe1c477d05ee62efb40bf2329957cf', + 'md5': 'cf9f11f0ec45d131445ec2f06766e122', 'info_dict': { 'id': '6963263655114722595', 'ext': 'mp4', 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈', - 'uploader': '杨超越', - 'upload_date': '20210517', - 'timestamp': 1621261163, + 'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈', 'uploader_id': '110403406559', + 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', + 'creator': '杨超越', + 'duration': 15115, + 'timestamp': 1621261163, + 'upload_date': '20210517', + 'track': '@杨超越创作的原声', 'view_count': int, 'like_count': int, 'repost_count': int, 'comment_count': int, - } + }, }] - _APP_VERSIONS = [('9.6.0', '960')] + _APP_VERSIONS = [('23.3.0', '230300')] _APP_NAME = 'aweme' _AID = 1128 _API_HOSTNAME = 'aweme.snssdk.com' @@ -859,7 +897,8 @@ class DouyinIE(TikTokIE): # XXX: Do not subclass from concrete IE try: return self._extract_aweme_app(video_id) except ExtractorError as e: - self.report_warning(f'{e}; trying with webpage') + e.expected = True + self.to_screen(f'{e}; trying with webpage') webpage = self._download_webpage(url, video_id) render_data_json = self._search_regex( @@ -867,7 +906,10 @@ class DouyinIE(TikTokIE): # XXX: Do not subclass from concrete IE webpage, 'render data', default=None) if not render_data_json: # TODO: Run verification challenge code to generate signature cookies - raise ExtractorError('Fresh cookies (not necessarily logged in) are needed') + cookies = self._get_cookies(self._WEBPAGE_HOST) + expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid') + raise ExtractorError( + 'Fresh cookies (not necessarily logged in) are needed', expected=expected) render_data = self._parse_json( render_data_json, video_id, transform_source=compat_urllib_parse_unquote) @@ -875,31 +917,35 @@ class DouyinIE(TikTokIE): # XXX: Do not subclass from concrete IE class TikTokVMIE(InfoExtractor): - _VALID_URL = r'https?://(?:vm|vt)\.tiktok\.com/(?P<id>\w+)' + _VALID_URL = r'https?://(?:(?:vm|vt)\.tiktok\.com|(?:www\.)tiktok\.com/t)/(?P<id>\w+)' IE_NAME = 'vm.tiktok' _TESTS = [{ - 'url': 'https://vm.tiktok.com/ZSe4FqkKd', + 'url': 'https://www.tiktok.com/t/ZTRC5xgJp', 'info_dict': { - 'id': '7023491746608712966', + 'id': '7170520270497680683', 'ext': 'mp4', - 'title': 'md5:5607564db90271abbbf8294cca77eddd', - 'description': 'md5:5607564db90271abbbf8294cca77eddd', - 'duration': 11, - 'upload_date': '20211026', - 'uploader_id': '7007385080558846981', - 'creator': 'Memes', - 'artist': 'Memes', - 'track': 'original sound', - 'uploader': 'susmandem', - 'timestamp': 1635284105, - 'thumbnail': r're:https://.+\.webp.*', - 'like_count': int, + 'title': 'md5:c64f6152330c2efe98093ccc8597871c', + 'uploader_id': '6687535061741700102', + 'upload_date': '20221127', 'view_count': int, + 'like_count': int, 'comment_count': int, + 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAObqu3WCTXxmw2xwZ3iLEHnEecEIw7ks6rxWqOqOhaPja9BI7gqUQnjw8_5FSoDXX', + 'album': 'Wave of Mutilation: Best of Pixies', + 'thumbnail': r're:https://.+\.webp.*', + 'duration': 5, + 'timestamp': 1669516858, 'repost_count': int, - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAXcNoOEOxVyBzuII_E--T0MeCrLP0ay1Sm6x_n3dluiWEoWZD0VlQOytwad4W0i0n', - } + 'artist': 'Pixies', + 'track': 'Where Is My Mind?', + 'description': 'md5:c64f6152330c2efe98093ccc8597871c', + 'uploader': 'sigmachaddeus', + 'creator': 'SigmaChad', + }, + }, { + 'url': 'https://vm.tiktok.com/ZSe4FqkKd', + 'only_matching': True, }, { 'url': 'https://vt.tiktok.com/ZSe4FqkKd', 'only_matching': True, From 0e96b408b994678764a89cabbb3879b2c383624a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 1 Dec 2022 04:04:32 +0000 Subject: [PATCH 1799/2552] [extractor/reddit] Extract video embeds in text posts (#5677) Closes #5612 Authored by: bashonly --- yt_dlp/extractor/reddit.py | 45 +++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 171affb93..f1a5c852a 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -1,15 +1,15 @@ import random -from urllib.parse import urlparse +import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, + traverse_obj, try_get, unescapeHTML, url_or_none, - traverse_obj ) @@ -56,6 +56,14 @@ class RedditIE(InfoExtractor): 'comment_count': int, 'age_limit': 0, }, + }, { + # videos embedded in reddit text post + 'url': 'https://www.reddit.com/r/KamenRider/comments/wzqkxp/finale_kamen_rider_revice_episode_50_family_to/', + 'playlist_count': 2, + 'info_dict': { + 'id': 'wzqkxp', + 'title': 'md5:72d3d19402aa11eff5bd32fc96369b37', + }, }, { 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', 'only_matching': True, @@ -102,10 +110,6 @@ class RedditIE(InfoExtractor): data = data[0]['data']['children'][0]['data'] video_url = data['url'] - # Avoid recursing into the same reddit URL - if 'reddit.com/' in video_url and '/%s/' % video_id in video_url: - raise ExtractorError('No media found', expected=True) - over_18 = data.get('over_18') if over_18 is True: age_limit = 18 @@ -148,6 +152,32 @@ class RedditIE(InfoExtractor): 'age_limit': age_limit, } + parsed_url = urllib.parse.urlparse(video_url) + + # Check for embeds in text posts, or else raise to avoid recursing into the same reddit URL + if 'reddit.com' in parsed_url.netloc and f'/{video_id}/' in parsed_url.path: + entries = [] + for media in traverse_obj(data, ('media_metadata', ...), expected_type=dict): + if not media.get('id') or media.get('e') != 'RedditVideo': + continue + formats = [] + if media.get('hlsUrl'): + formats.extend(self._extract_m3u8_formats( + unescapeHTML(media['hlsUrl']), video_id, 'mp4', m3u8_id='hls', fatal=False)) + if media.get('dashUrl'): + formats.extend(self._extract_mpd_formats( + unescapeHTML(media['dashUrl']), video_id, mpd_id='dash', fatal=False)) + if formats: + entries.append({ + 'id': media['id'], + 'display_id': video_id, + 'formats': formats, + **info, + }) + if entries: + return self.playlist_result(entries, video_id, info.get('title')) + raise ExtractorError('No media found', expected=True) + # Check if media is hosted on reddit: reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False) if reddit_video: @@ -189,7 +219,6 @@ class RedditIE(InfoExtractor): 'duration': int_or_none(reddit_video.get('duration')), } - parsed_url = urlparse(video_url) if parsed_url.netloc == 'v.redd.it': self.raise_no_formats('This video is processing', expected=True, video_id=video_id) return { From ddf1e22d48530819d60220d0bdc36e20f5b8483b Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 1 Dec 2022 11:24:43 +0000 Subject: [PATCH 1800/2552] [extractor/swearnet] Fix description bug (#5681) Bug in 049565df2e24d9611a9ffdd033c80a6dafdabbe0 Closes #5643 Authoried by: bashonly --- yt_dlp/extractor/swearnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/swearnet.py b/yt_dlp/extractor/swearnet.py index 86a303ec7..6e216a2a5 100644 --- a/yt_dlp/extractor/swearnet.py +++ b/yt_dlp/extractor/swearnet.py @@ -62,7 +62,7 @@ class SwearnetEpisodeIE(InfoExtractor): 'id': str(json_data['videoId']), 'title': json_data.get('name') or self._html_search_meta(['og:title', 'twitter:title'], webpage), 'description': (json_data.get('description') - or self._html_search_meta(['og:description', 'twitter:description'])), + or self._html_search_meta(['og:description', 'twitter:description'], webpage)), 'duration': int_or_none(json_data.get('seconds')), 'formats': formats, 'subtitles': subtitles, From c9f5ce511877ae4f22d2eb2f70c3c6edf6c1971d Mon Sep 17 00:00:00 2001 From: Benjamin Ryan <ben@ryben.dev> Date: Fri, 2 Dec 2022 03:38:00 -0600 Subject: [PATCH 1801/2552] [extractor/tiktok] Update API hostname (#5690) Closes #5688 Authored by: redraskal --- yt_dlp/extractor/tiktok.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 1bbf88495..95223f5de 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -30,7 +30,7 @@ class TikTokBaseIE(InfoExtractor): _WORKING_APP_VERSION = None _APP_NAME = 'trill' _AID = 1180 - _API_HOSTNAME = 'api-h2.tiktokv.com' + _API_HOSTNAME = 'api16-normal-c-useast1a.tiktokv.com' _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' _WEBPAGE_HOST = 'https://www.tiktok.com/' QUALITIES = ('360p', '540p', '720p', '1080p') From 71df9b7fd504767583cf1e088ae307c942799f2b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 30 Nov 2022 11:34:51 +0530 Subject: [PATCH 1802/2552] [cleanup] Misc --- .github/workflows/core.yml | 11 ++++++----- .github/workflows/quick-test.yml | 13 ++++++------- .gitignore | 1 + CONTRIBUTING.md | 22 +++++++++++++++++++--- README.md | 26 +++++++++++++------------- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/__init__.py | 7 +++---- yt_dlp/downloader/common.py | 5 ++++- yt_dlp/extractor/common.py | 12 +++++++++++- yt_dlp/options.py | 8 ++++---- yt_dlp/utils.py | 5 ++++- 11 files changed, 72 insertions(+), 40 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index e12918626..dead444c0 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -12,13 +12,13 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - # CPython 3.9 is in quick-test - python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8] + # CPython 3.11 is in quick-test + python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8] run-tests-ext: [sh] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest - python-version: '3.8' + python-version: '3.7' run-tests-ext: bat - os: windows-latest python-version: pypy-3.9 @@ -33,5 +33,6 @@ jobs: run: pip install pytest - name: Run tests continue-on-error: False - run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} core - # Linter is in quick-test + run: | + python3 -m yt_dlp -v || true # Print debug head + ./devscripts/run_tests.${{ matrix.run-tests-ext }} core diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 8a0ac98bb..930e58152 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -10,24 +10,23 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python + - name: Set up Python 3.11 uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: '3.11' - name: Install test requirements run: pip install pytest pycryptodomex - name: Run tests - run: ./devscripts/run_tests.sh core + run: | + python3 -m yt_dlp -v || true + ./devscripts/run_tests.sh core flake8: name: Linter if: "!contains(github.event.head_commit.message, 'ci skip all')" runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 + - uses: actions/setup-python@v4 - name: Install flake8 run: pip install flake8 - name: Make lazy extractors diff --git a/.gitignore b/.gitignore index 0ce059b34..00d74057f 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,7 @@ dist/ zip/ tmp/ venv/ +.venv/ completions/ # Misc diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a8ac671dc..551db674e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -351,8 +351,9 @@ Say you extracted a list of thumbnails into `thumbnail_data` and want to iterate ```python thumbnail_data = data.get('thumbnails') or [] thumbnails = [{ - 'url': item['url'] -} for item in thumbnail_data] # correct + 'url': item['url'], + 'height': item.get('h'), +} for item in thumbnail_data if item.get('url')] # correct ``` and not like: @@ -360,12 +361,27 @@ and not like: ```python thumbnail_data = data.get('thumbnails') thumbnails = [{ - 'url': item['url'] + 'url': item['url'], + 'height': item.get('h'), } for item in thumbnail_data] # incorrect ``` In this case, `thumbnail_data` will be `None` if the field was not found and this will cause the loop `for item in thumbnail_data` to raise a fatal error. Using `or []` avoids this error and results in setting an empty list in `thumbnails` instead. +Alternately, this can be further simplified by using `traverse_obj` + +```python +thumbnails = [{ + 'url': item['url'], + 'height': item.get('h'), +} for item in traverse_obj(data, ('thumbnails', lambda _, v: v['url']))] +``` + +or, even better, + +```python +thumbnails = traverse_obj(data, ('thumbnails', ..., {'url': 'url', 'height': 'h'})) +``` ### Provide fallbacks diff --git a/README.md b/README.md index fa55d130b..b6a07da9a 100644 --- a/README.md +++ b/README.md @@ -432,19 +432,19 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi explicitly provided IP block in CIDR notation ## Video Selection: - -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the videos + -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items to download. You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. Use negative indices to count from the right and negative STEP to download in reverse order. E.g. "-I 1:3,7,-5::2" used on a - playlist of size 15 will download the videos + playlist of size 15 will download the items at index 1,2,3,7,11,13,15 - --min-filesize SIZE Do not download any videos smaller than + --min-filesize SIZE Abort download if filesize is smaller than + SIZE, e.g. 50k or 44.6M + --max-filesize SIZE Abort download if filesize is larger than SIZE, e.g. 50k or 44.6M - --max-filesize SIZE Do not download any videos larger than SIZE, - e.g. 50k or 44.6M --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. @@ -491,9 +491,9 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi a file that is in the archive --break-on-reject Stop the download process when encountering a file that has been filtered out - --break-per-input --break-on-existing, --break-on-reject, - --max-downloads, and autonumber resets per - input URL + --break-per-input Alters --max-downloads, --break-on-existing, + --break-on-reject, and autonumber to reset + per input URL --no-break-per-input --break-on-existing and similar options terminates the entire download queue --skip-playlist-after-errors N Number of allowed failures until the rest of @@ -1046,10 +1046,10 @@ Make chapter entries for, or remove various segments (sponsor, for, separated by commas. Available categories are sponsor, intro, outro, selfpromo, preview, filler, interaction, - music_offtopic, poi_highlight, chapter, all and - default (=all). You can prefix the category - with a "-" to exclude it. See [1] for - description of the categories. E.g. + music_offtopic, poi_highlight, chapter, all + and default (=all). You can prefix the + category with a "-" to exclude it. See [1] + for description of the categories. E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories --sponsorblock-remove CATS SponsorBlock categories to be removed from @@ -1058,7 +1058,7 @@ Make chapter entries for, or remove various segments (sponsor, remove takes precedence. The syntax and available categories are the same as for --sponsorblock-mark except that "default" - refers to "all,-filler" and poi_highlight and + refers to "all,-filler" and poi_highlight, chapter are not available --sponsorblock-chapter-title TEMPLATE An output template for the title of the diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index b1d009280..8d28783d8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3123,7 +3123,7 @@ class YoutubeDL: fd, success = None, True if info_dict.get('protocol') or info_dict.get('url'): fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') - if fd is not FFmpegFD and ( + if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and ( info_dict.get('section_start') or info_dict.get('section_end')): msg = ('This format cannot be partially downloaded' if FFmpegFD.available() else 'You have requested downloading the video partially, but ffmpeg is not installed') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f1a347514..f1d6c369b 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -91,12 +91,11 @@ def get_urls(urls, batchfile, verbose): def print_extractor_information(opts, urls): - # Importing GenericIE is currently slow since it imports other extractors - # TODO: Move this back to module level after generalization of embed detection - from .extractor.generic import GenericIE - out = '' if opts.list_extractors: + # Importing GenericIE is currently slow since it imports YoutubeIE + from .extractor.generic import GenericIE + urls = dict.fromkeys(urls, False) for ie in list_extractor_classes(opts.age_limit): out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index fe3633250..077b29b41 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -20,6 +20,7 @@ from ..utils import ( RetryManager, classproperty, decodeArgument, + deprecation_warning, encodeFilename, format_bytes, join_nonempty, @@ -180,7 +181,9 @@ class FileDownloader: @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" - parse_bytes(bytestr) + deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and ' + 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead') + return parse_bytes(bytestr) def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3ca8fe24c..3910c55ad 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -71,6 +71,7 @@ from ..utils import ( str_to_int, strip_or_none, traverse_obj, + truncate_string, try_call, try_get, unescapeHTML, @@ -674,7 +675,8 @@ class InfoExtractor: for _ in range(2): try: self.initialize() - self.write_debug('Extracting URL: %s' % url) + self.to_screen('Extracting URL: %s' % ( + url if self.get_param('verbose') else truncate_string(url, 100, 20))) ie_result = self._real_extract(url) if ie_result is None: return None @@ -1906,6 +1908,14 @@ class InfoExtractor: errnote=None, fatal=True, live=False, data=None, headers={}, query={}): + if not m3u8_url: + if errnote is not False: + errnote = errnote or 'Failed to obtain m3u8 URL' + if fatal: + raise ExtractorError(errnote, video_id=video_id) + self.report_warning(f'{errnote}{bug_reports_message()}') + return [], {} + res = self._download_webpage_handle( m3u8_url, video_id, note='Downloading m3u8 information' if note is None else note, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index bee867aa9..bc574b885 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -535,10 +535,10 @@ def create_parser(): '-I', '--playlist-items', dest='playlist_items', metavar='ITEM_SPEC', default=None, help=( - 'Comma separated playlist_index of the videos to download. ' + 'Comma separated playlist_index of the items to download. ' 'You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. ' 'Use negative indices to count from the right and negative STEP to download in reverse order. ' - 'E.g. "-I 1:3,7,-5::2" used on a playlist of size 15 will download the videos at index 1,2,3,7,11,13,15')) + 'E.g. "-I 1:3,7,-5::2" used on a playlist of size 15 will download the items at index 1,2,3,7,11,13,15')) selection.add_option( '--match-title', dest='matchtitle', metavar='REGEX', @@ -554,7 +554,7 @@ def create_parser(): selection.add_option( '--max-filesize', metavar='SIZE', dest='max_filesize', default=None, - help='Abort download if filesize if larger than SIZE, e.g. 50k or 44.6M') + help='Abort download if filesize is larger than SIZE, e.g. 50k or 44.6M') selection.add_option( '--date', metavar='DATE', dest='date', default=None, @@ -635,7 +635,7 @@ def create_parser(): selection.add_option( '--break-per-input', action='store_true', dest='break_per_url', default=False, - help='--break-on-existing, --break-on-reject, --max-downloads, and autonumber resets per input URL') + help='Alters --max-downloads, --break-on-existing, --break-on-reject, and autonumber to reset per input URL') selection.add_option( '--no-break-per-input', action='store_false', dest='break_per_url', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ed1b24335..a3da3c69e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3872,6 +3872,9 @@ class download_range_func: return (isinstance(other, download_range_func) and self.chapters == other.chapters and self.ranges == other.ranges) + def __repr__(self): + return f'{type(self).__name__}({self.chapters}, {self.ranges})' + def parse_dfxp_time_expr(time_expr): if not time_expr: @@ -5976,7 +5979,7 @@ def truncate_string(s, left, right=0): assert left > 3 and right >= 0 if s is None or len(s) <= left + right: return s - return f'{s[:left-3]}...{s[-right:]}' + return f'{s[:left-3]}...{s[-right:] if right else ""}' def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None): From c53a18f016fe6ff774411d938c9959097f00b44c Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 5 Dec 2022 01:06:37 +0530 Subject: [PATCH 1803/2552] [utils] windows_enable_vt_mode: Proper implementation Authored by: Grub4K --- yt_dlp/utils.py | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a3da3c69e..36170e125 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5579,17 +5579,39 @@ def supports_terminal_sequences(stream): return False -def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 +def windows_enable_vt_mode(): + """Ref: https://bugs.python.org/issue30075 """ if get_windows_version() < (10, 0, 10586): return - global WINDOWS_VT_MODE - try: - Popen.run('', shell=True) - except Exception: - return - WINDOWS_VT_MODE = True - supports_terminal_sequences.cache_clear() + import ctypes + import ctypes.wintypes + import msvcrt + + ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004 + + dll = ctypes.WinDLL('kernel32', use_last_error=False) + handle = os.open('CONOUT$', os.O_RDWR) + + try: + h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle)) + dw_original_mode = ctypes.wintypes.DWORD() + success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode)) + if not success: + raise Exception('GetConsoleMode failed') + + success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD( + dw_original_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING)) + if not success: + raise Exception('SetConsoleMode failed') + except Exception as e: + write_string(f'WARNING: Cannot enable VT mode - {e}') + else: + global WINDOWS_VT_MODE + WINDOWS_VT_MODE = True + supports_terminal_sequences.cache_clear() + finally: + os.close(handle) _terminal_sequences_re = re.compile('\033\\[[^m]+m') From c4cbd3bebd33d2d77fa340a4035447ab1b9eb3eb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:30:31 +0000 Subject: [PATCH 1804/2552] [extractor/tiktok] Update `_VALID_URL`, add `api_hostname` arg (#5708) Closes #5706 Authored by: bashonly --- README.md | 1 + yt_dlp/extractor/tiktok.py | 31 +++++++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b6a07da9a..8fdedacf5 100644 --- a/README.md +++ b/README.md @@ -1765,6 +1765,7 @@ The following extractors use this feature: * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` #### tiktok +* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com` * `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1` * `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221` diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 95223f5de..2dd4510cc 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -30,11 +30,15 @@ class TikTokBaseIE(InfoExtractor): _WORKING_APP_VERSION = None _APP_NAME = 'trill' _AID = 1180 - _API_HOSTNAME = 'api16-normal-c-useast1a.tiktokv.com' _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' _WEBPAGE_HOST = 'https://www.tiktok.com/' QUALITIES = ('360p', '540p', '720p', '1080p') + @property + def _API_HOSTNAME(self): + return self._configuration_arg( + 'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0] + @staticmethod def _create_url(user_id, video_id): return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}' @@ -398,7 +402,7 @@ class TikTokBaseIE(InfoExtractor): class TikTokIE(TikTokBaseIE): - _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)' + _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)?/video)/(?P<id>\d+)' _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] _TESTS = [{ @@ -944,8 +948,27 @@ class TikTokVMIE(InfoExtractor): 'creator': 'SigmaChad', }, }, { - 'url': 'https://vm.tiktok.com/ZSe4FqkKd', - 'only_matching': True, + 'url': 'https://vm.tiktok.com/ZTR45GpSF/', + 'info_dict': { + 'id': '7106798200794926362', + 'ext': 'mp4', + 'title': 'md5:edc3e7ea587847f8537468f2fe51d074', + 'uploader_id': '6997695878846268418', + 'upload_date': '20220608', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'thumbnail': r're:https://.+\.webp.*', + 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAdZ_NcPPgMneaGrW0hN8O_J_bwLshwNNERRF5DxOw2HKIzk0kdlLrR8RkVl1ksrMO', + 'duration': 29, + 'timestamp': 1654680400, + 'repost_count': int, + 'artist': 'Akihitoko', + 'track': 'original sound', + 'description': 'md5:edc3e7ea587847f8537468f2fe51d074', + 'uploader': 'akihitoko1', + 'creator': 'Akihitoko', + }, }, { 'url': 'https://vt.tiktok.com/ZSe4FqkKd', 'only_matching': True, From 935bac1e4de35107a15ea2ad45402f507527dcfb Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 6 Dec 2022 00:35:08 +0530 Subject: [PATCH 1805/2552] Fix `--cookies-from-browser` CLI parsing Closes #5716 --- yt_dlp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f1d6c369b..202f102ba 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -350,7 +350,7 @@ def validate_options(opts): mobj = re.fullmatch(r'''(?x) (?P<name>[^+:]+) (?:\s*\+\s*(?P<keyring>[^:]+))? - (?:\s*:\s*(?P<profile>.+?))? + (?:\s*:\s*(?!:)(?P<profile>.+?))? (?:\s*::\s*(?P<container>.+))? ''', opts.cookiesfrombrowser) if mobj is None: From 7991ae57a800316930e20a15df8314616c5cba8f Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 8 Dec 2022 17:17:16 +0530 Subject: [PATCH 1806/2552] [extractor/sibnet] Separate from VKIE Fixes https://github.com/yt-dlp/yt-dlp/commit/bfd973ece3369c593b5e82a88cc16de80088a73e#commitcomment-91834251 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/generic.py | 5 ----- yt_dlp/extractor/sibnet.py | 17 +++++++++++++++++ yt_dlp/extractor/vk.py | 6 +++--- 4 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 yt_dlp/extractor/sibnet.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 2fe15f6d2..137284089 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1639,6 +1639,7 @@ from .shared import ( VivoIE, ) from .sharevideos import ShareVideosEmbedIE +from .sibnet import SibnetEmbedIE from .shemaroome import ShemarooMeIE from .showroomlive import ShowRoomLiveIE from .simplecast import ( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2fcbc6f43..190aff331 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1864,11 +1864,6 @@ class GenericIE(InfoExtractor): 'title': 'I AM BIO Podcast | BIO', }, 'playlist_mincount': 52, - }, - { - # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed) - 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html', - 'only_matching': True, }, { # WimTv embed player 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/', diff --git a/yt_dlp/extractor/sibnet.py b/yt_dlp/extractor/sibnet.py new file mode 100644 index 000000000..73bb75d8f --- /dev/null +++ b/yt_dlp/extractor/sibnet.py @@ -0,0 +1,17 @@ +from .common import InfoExtractor + + +class SibnetEmbedIE(InfoExtractor): + # Ref: https://help.sibnet.ru/?sibnet_video_embed + _VALID_URL = False + _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1'] + _WEBPAGE_TESTS = [{ + 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html', + 'info_dict': { + 'id': 'shell', # FIXME? + 'ext': 'mp4', + 'age_limit': 0, + 'thumbnail': 'https://video.sibnet.ru/upload/cover/video_1887072_0.jpg', + 'title': 'КВН Москва не сразу строилась - Девушка впервые играет в Mortal Kombat', + } + }] diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 347aa381d..0fb95c863 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from .dailymotion import DailymotionIE from .odnoklassniki import OdnoklassnikiIE from .pladform import PladformIE +from .sibnet import SibnetEmbedIE from .vimeo import VimeoIE from .youtube import YoutubeIE from ..compat import compat_urlparse @@ -101,8 +102,7 @@ class VKIE(VKBaseIE): (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))? ) ''' - # https://help.sibnet.ru/?sibnet_video_embed - _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1'] + _TESTS = [ { 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', @@ -455,7 +455,7 @@ class VKIE(VKBaseIE): if odnoklassniki_url: return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) - sibnet_url = next(self._extract_embed_urls(url, info_page), None) + sibnet_url = next(SibnetEmbedIE._extract_embed_urls(url, info_page), None) if sibnet_url: return self.url_result(sibnet_url) From 42ec478fc4abe4131a0908881673a19aa750bc97 Mon Sep 17 00:00:00 2001 From: David Turner <547637+digitall@users.noreply.github.com> Date: Thu, 8 Dec 2022 12:38:52 +0000 Subject: [PATCH 1807/2552] [extractor/plutotv] Fix videos with non-zero start (#5745) Authored by: digitall --- yt_dlp/extractor/plutotv.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py index 71a05cc7a..caffeb21d 100644 --- a/yt_dlp/extractor/plutotv.py +++ b/yt_dlp/extractor/plutotv.py @@ -84,6 +84,17 @@ class PlutoTVIE(InfoExtractor): }, { 'url': 'https://pluto.tv/it/on-demand/series/csi-vegas/episode/legacy-2021-1-1', 'only_matching': True, + }, + { + 'url': 'https://pluto.tv/en/on-demand/movies/attack-of-the-killer-tomatoes-1977-1-1-ptv1', + 'md5': '7db56369c0da626a32d505ec6eb3f89f', + 'info_dict': { + 'id': '5b190c7bb0875c36c90c29c4', + 'ext': 'mp4', + 'title': 'Attack of the Killer Tomatoes', + 'description': 'A group of scientists band together to save the world from mutated tomatoes that KILL! (1978)', + 'duration': 5700, + } } ] @@ -103,7 +114,7 @@ class PlutoTVIE(InfoExtractor): compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8')) continue first_segment_url = re.search( - r'^(https?://.*/).+\-0+\.ts$', res, + r'^(https?://.*/).+\-0+[0-1]0\.ts$', res, re.MULTILINE) if first_segment_url: m3u8_urls.add( From dfc186d4220081fdf7184347187639b15ab68a2f Mon Sep 17 00:00:00 2001 From: lkw123 <2020393267@qq.com> Date: Thu, 8 Dec 2022 20:43:29 +0800 Subject: [PATCH 1808/2552] [extractor/xiami] Remove extractors (#5711) Authored by: synthpop123 --- supportedsites.md | 4 - yt_dlp/extractor/_extractors.py | 6 - yt_dlp/extractor/xiami.py | 198 -------------------------------- 3 files changed, 208 deletions(-) delete mode 100644 yt_dlp/extractor/xiami.py diff --git a/supportedsites.md b/supportedsites.md index d7565c139..fbada177e 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -1624,10 +1624,6 @@ - **XHamster** - **XHamsterEmbed** - **XHamsterUser** - - **xiami:album**: 虾米音乐 - 专辑 - - **xiami:artist**: 虾米音乐 - 歌手 - - **xiami:collection**: 虾米音乐 - 精选集 - - **xiami:song**: 虾米音乐 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 - **xinpianchang**: xinpianchang.com diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 137284089..54ac1b730 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2236,12 +2236,6 @@ from .xhamster import ( XHamsterEmbedIE, XHamsterUserIE, ) -from .xiami import ( - XiamiSongIE, - XiamiAlbumIE, - XiamiArtistIE, - XiamiCollectionIE -) from .ximalaya import ( XimalayaIE, XimalayaAlbumIE diff --git a/yt_dlp/extractor/xiami.py b/yt_dlp/extractor/xiami.py deleted file mode 100644 index 71b2956a8..000000000 --- a/yt_dlp/extractor/xiami.py +++ /dev/null @@ -1,198 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote -from ..utils import int_or_none - - -class XiamiBaseIE(InfoExtractor): - _API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id' - - def _download_webpage_handle(self, *args, **kwargs): - webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs) - if '>Xiami is currently not available in your country.<' in webpage: - self.raise_geo_restricted('Xiami is currently not available in your country') - return webpage - - def _extract_track(self, track, track_id=None): - track_name = track.get('songName') or track.get('name') or track['subName'] - artist = track.get('artist') or track.get('artist_name') or track.get('singers') - title = '%s - %s' % (artist, track_name) if artist else track_name - track_url = self._decrypt(track['location']) - - subtitles = {} - lyrics_url = track.get('lyric_url') or track.get('lyric') - if lyrics_url and lyrics_url.startswith('http'): - subtitles['origin'] = [{'url': lyrics_url}] - - return { - 'id': track.get('song_id') or track_id, - 'url': track_url, - 'title': title, - 'thumbnail': track.get('pic') or track.get('album_pic'), - 'duration': int_or_none(track.get('length')), - 'creator': track.get('artist', '').split(';')[0], - 'track': track_name, - 'track_number': int_or_none(track.get('track')), - 'album': track.get('album_name') or track.get('title'), - 'artist': artist, - 'subtitles': subtitles, - } - - def _extract_tracks(self, item_id, referer, typ=None): - playlist = self._download_json( - '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), - item_id, headers={ - 'Referer': referer, - }) - return [ - self._extract_track(track, item_id) - for track in playlist['data']['trackList']] - - @staticmethod - def _decrypt(origin): - n = int(origin[0]) - origin = origin[1:] - short_length = len(origin) // n - long_num = len(origin) - short_length * n - l = tuple() - for i in range(0, n): - length = short_length - if i < long_num: - length += 1 - l += (origin[0:length], ) - origin = origin[length:] - ans = '' - for i in range(0, short_length + 1): - for j in range(0, n): - if len(l[j]) > i: - ans += l[j][i] - return compat_urllib_parse_unquote(ans).replace('^', '0') - - -class XiamiSongIE(XiamiBaseIE): - IE_NAME = 'xiami:song' - IE_DESC = '虾米音乐' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'http://www.xiami.com/song/1775610518', - 'md5': '521dd6bea40fd5c9c69f913c232cb57e', - 'info_dict': { - 'id': '1775610518', - 'ext': 'mp3', - 'title': 'HONNE - Woman', - 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', - 'duration': 265, - 'creator': 'HONNE', - 'track': 'Woman', - 'album': 'Woman', - 'artist': 'HONNE', - 'subtitles': { - 'origin': [{ - 'ext': 'lrc', - }], - }, - }, - 'skip': 'Georestricted', - }, { - 'url': 'http://www.xiami.com/song/1775256504', - 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', - 'info_dict': { - 'id': '1775256504', - 'ext': 'mp3', - 'title': '戴荃 - 悟空', - 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', - 'duration': 200, - 'creator': '戴荃', - 'track': '悟空', - 'album': '悟空', - 'artist': '戴荃', - 'subtitles': { - 'origin': [{ - 'ext': 'lrc', - }], - }, - }, - 'skip': 'Georestricted', - }, { - 'url': 'http://www.xiami.com/song/1775953850', - 'info_dict': { - 'id': '1775953850', - 'ext': 'mp3', - 'title': 'До Скону - Чума Пожирает Землю', - 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', - 'duration': 683, - 'creator': 'До Скону', - 'track': 'Чума Пожирает Землю', - 'track_number': 7, - 'album': 'Ад', - 'artist': 'До Скону', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.xiami.com/song/xLHGwgd07a1', - 'only_matching': True, - }] - - def _real_extract(self, url): - return self._extract_tracks(self._match_id(url), url)[0] - - -class XiamiPlaylistBaseIE(XiamiBaseIE): - def _real_extract(self, url): - item_id = self._match_id(url) - return self.playlist_result(self._extract_tracks(item_id, url, self._TYPE), item_id) - - -class XiamiAlbumIE(XiamiPlaylistBaseIE): - IE_NAME = 'xiami:album' - IE_DESC = '虾米音乐 - 专辑' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[^/?#&]+)' - _TYPE = '1' - _TESTS = [{ - 'url': 'http://www.xiami.com/album/2100300444', - 'info_dict': { - 'id': '2100300444', - }, - 'playlist_count': 10, - 'skip': 'Georestricted', - }, { - 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', - 'only_matching': True, - }, { - 'url': 'http://www.xiami.com/album/URVDji2a506', - 'only_matching': True, - }] - - -class XiamiArtistIE(XiamiPlaylistBaseIE): - IE_NAME = 'xiami:artist' - IE_DESC = '虾米音乐 - 歌手' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[^/?#&]+)' - _TYPE = '2' - _TESTS = [{ - 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', - 'info_dict': { - 'id': '2132', - }, - 'playlist_count': 20, - 'skip': 'Georestricted', - }, { - 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99', - 'only_matching': True, - }] - - -class XiamiCollectionIE(XiamiPlaylistBaseIE): - IE_NAME = 'xiami:collection' - IE_DESC = '虾米音乐 - 精选集' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[^/?#&]+)' - _TYPE = '3' - _TEST = { - 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', - 'info_dict': { - 'id': '156527391', - }, - 'playlist_mincount': 29, - 'skip': 'Georestricted', - } From 28b8f57b4b2a2e1bd1fbe68ae1ab2c44fdd51992 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Thu, 8 Dec 2022 22:58:36 +0900 Subject: [PATCH 1809/2552] [extractor/noice] Add NoicePodcast extractor (#5621) Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/noice.py | 116 ++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 yt_dlp/extractor/noice.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 54ac1b730..c9dd7463c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1211,6 +1211,7 @@ from .nintendo import NintendoIE from .nitter import NitterIE from .njpwworld import NJPWWorldIE from .nobelprize import NobelPrizeIE +from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE from .noovo import NoovoIE diff --git a/yt_dlp/extractor/noice.py b/yt_dlp/extractor/noice.py new file mode 100644 index 000000000..e6e343303 --- /dev/null +++ b/yt_dlp/extractor/noice.py @@ -0,0 +1,116 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + determine_ext, + int_or_none, + parse_iso8601, + traverse_obj, + variadic, +) + + +class NoicePodcastIE(InfoExtractor): + _VALID_URL = r'https?://open\.noice\.id/content/(?P<id>[a-fA-F0-9-]+)' + _TESTS = [{ + 'url': 'https://open.noice.id/content/7694bb04-ff0f-40fa-a60b-5b39f29584b2', + 'info_dict': { + 'id': '7694bb04-ff0f-40fa-a60b-5b39f29584b2', + 'ext': 'm4a', + 'season': 'Season 1', + 'description': 'md5:58d1274e6857b6fbbecf47075885380d', + 'release_date': '20221115', + 'timestamp': 1668496642, + 'season_number': 1, + 'upload_date': '20221115', + 'release_timestamp': 1668496642, + 'title': 'Eps 1. Belajar dari Wishnutama: Kreatif Bukan Followers! (bersama Wishnutama)', + 'modified_date': '20221121', + 'categories': ['Bisnis dan Keuangan'], + 'duration': 3567, + 'modified_timestamp': 1669030647, + 'thumbnail': 'https://images.noiceid.cc/catalog/content-1668496302560', + 'channel_id': '9dab1024-5b92-4265-ae1c-63da87359832', + 'like_count': int, + 'channel': 'Noice Space Talks', + 'comment_count': int, + 'dislike_count': int, + 'channel_follower_count': int, + } + }, { + 'url': 'https://open.noice.id/content/222134e4-99f2-456f-b8a2-b8be404bf063', + 'info_dict': { + 'id': '222134e4-99f2-456f-b8a2-b8be404bf063', + 'ext': 'm4a', + 'release_timestamp': 1653488220, + 'description': 'md5:35074f6190cef52b05dd133bb2ef460e', + 'upload_date': '20220525', + 'timestamp': 1653460637, + 'release_date': '20220525', + 'thumbnail': 'https://images.noiceid.cc/catalog/content-1653460337625', + 'title': 'Eps 1: Dijodohin Sama Anak Pak RT', + 'modified_timestamp': 1669030647, + 'season_number': 1, + 'modified_date': '20221121', + 'categories': ['Cerita dan Drama'], + 'duration': 1830, + 'season': 'Season 1', + 'channel_id': '60193f6b-d24d-4b23-913b-ceed5a731e74', + 'dislike_count': int, + 'like_count': int, + 'comment_count': int, + 'channel': 'Dear Jerome', + 'channel_follower_count': int, + } + }] + + def _get_formats_and_subtitles(self, media_url, video_id): + formats, subtitles = [], {} + for url in variadic(media_url): + ext = determine_ext(url) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles(url, video_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.append({ + 'url': url, + 'ext': 'mp3', + 'vcodec': 'none', + 'acodec': 'mp3', + }) + return formats, subtitles + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['contentDetails'] + + media_url_list = traverse_obj(nextjs_data, (('rawContentUrl', 'url'), )) + formats, subtitles = self._get_formats_and_subtitles(media_url_list, display_id) + + return { + 'id': nextjs_data.get('id') or display_id, + 'title': nextjs_data.get('title') or self._html_search_meta('og:title', webpage), + 'formats': formats, + 'subtitles': subtitles, + 'description': (nextjs_data.get('description') or clean_html(nextjs_data.get('htmlDescription')) + or self._html_search_meta(['description', 'og:description'], webpage)), + 'thumbnail': nextjs_data.get('image') or self._html_search_meta('og:image', webpage), + 'timestamp': parse_iso8601(nextjs_data.get('createdAt')), + 'release_timestamp': parse_iso8601(nextjs_data.get('publishedAt')), + 'modified_timestamp': parse_iso8601( + nextjs_data.get('updatedAt') or self._html_search_meta('og:updated_time', webpage)), + 'duration': int_or_none(nextjs_data.get('duration')), + 'categories': traverse_obj(nextjs_data, ('genres', ..., 'name')), + 'season': nextjs_data.get('seasonName'), + 'season_number': int_or_none(nextjs_data.get('seasonNumber')), + 'channel': traverse_obj(nextjs_data, ('catalog', 'title')), + 'channel_id': traverse_obj(nextjs_data, ('catalog', 'id'), 'catalogId'), + **traverse_obj(nextjs_data, ('meta', 'aggregations', { + 'like_count': 'likes', + 'dislike_count': 'dislikes', + 'comment_count': 'comments', + 'channel_follower_count': 'followers', + })) + } From 839e2a62ae977ae51b1fcec50a8af3d28e1d230c Mon Sep 17 00:00:00 2001 From: MMM <flashdagger@googlemail.com> Date: Thu, 8 Dec 2022 17:32:17 +0100 Subject: [PATCH 1810/2552] [extractor/rumble] Add RumbleIE extractor (#5515) Closes #2846 Authored by: flashdagger --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/rumble.py | 102 ++++++++++++++++++++++++++------ 2 files changed, 84 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c9dd7463c..b1d0a9fb0 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1568,6 +1568,7 @@ from .ruhd import RUHDIE from .rule34video import Rule34VideoIE from .rumble import ( RumbleEmbedIE, + RumbleIE, RumbleChannelIE, ) from .rutube import ( diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index 102615c60..b7f798ffb 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -4,11 +4,15 @@ import re from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( + ExtractorError, + UnsupportedError, + clean_html, + get_element_by_class, int_or_none, + parse_count, parse_iso8601, traverse_obj, unescapeHTML, - ExtractorError, ) @@ -111,24 +115,6 @@ class RumbleEmbedIE(InfoExtractor): }] _WEBPAGE_TESTS = [ - { - 'note': 'Rumble embed', - 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', - 'md5': '53af34098a7f92c4e51cf0bd1c33f009', - 'info_dict': { - 'id': 'vb0ofn', - 'ext': 'mp4', - 'timestamp': 1612662578, - 'uploader': 'LovingMontana', - 'channel': 'LovingMontana', - 'upload_date': '20210207', - 'title': 'Winter-loving dog helps girls dig a snow fort ', - 'channel_url': 'https://rumble.com/c/c-546523', - 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg', - 'duration': 103, - 'live_status': 'not_live', - } - }, { 'note': 'Rumble JS embed', 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it', @@ -235,6 +221,84 @@ class RumbleEmbedIE(InfoExtractor): } +class RumbleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$' + _EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>'] + _TESTS = [{ + 'add_ie': ['RumbleEmbed'], + 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', + 'md5': '53af34098a7f92c4e51cf0bd1c33f009', + 'info_dict': { + 'id': 'vb0ofn', + 'ext': 'mp4', + 'timestamp': 1612662578, + 'uploader': 'LovingMontana', + 'channel': 'LovingMontana', + 'upload_date': '20210207', + 'title': 'Winter-loving dog helps girls dig a snow fort ', + 'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!', + 'channel_url': 'https://rumble.com/c/c-546523', + 'thumbnail': r're:https://.+\.jpg', + 'duration': 103, + 'like_count': int, + 'view_count': int, + 'live_status': 'not_live', + } + }, { + 'url': 'http://www.rumble.com/vDMUM1?key=value', + 'only_matching': True, + }] + + _WEBPAGE_TESTS = [{ + 'url': 'https://rumble.com/videos?page=2', + 'playlist_count': 25, + 'info_dict': { + 'id': 'videos?page=2', + 'title': 'All videos', + 'description': 'Browse videos uploaded to Rumble.com', + 'age_limit': 0, + }, + }, { + 'url': 'https://rumble.com/live-videos', + 'playlist_mincount': 19, + 'info_dict': { + 'id': 'live-videos', + 'title': 'Live Videos', + 'description': 'Live videos on Rumble.com', + 'age_limit': 0, + }, + }, { + 'url': 'https://rumble.com/search/video?q=rumble&sort=views', + 'playlist_count': 24, + 'info_dict': { + 'id': 'video?q=rumble&sort=views', + 'title': 'Search results for: rumble', + 'age_limit': 0, + }, + }] + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None) + if not url_info: + raise UnsupportedError(url) + + release_ts_str = self._search_regex( + r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', + webpage, 'release date', fatal=False, default=None) + view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views', + webpage, 'view count', fatal=False, default=None) + + return self.url_result( + url_info['url'], ie_key=url_info['ie_key'], url_transparent=True, + view_count=parse_count(view_count_str), + release_timestamp=parse_iso8601(release_ts_str), + like_count=parse_count(get_element_by_class('rumbles-count', webpage)), + description=clean_html(get_element_by_class('media-description', webpage)), + ) + + class RumbleChannelIE(InfoExtractor): _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))' From 72f96c55662c688a15ed00ffa661546156f7e461 Mon Sep 17 00:00:00 2001 From: nixxo <nixxo@protonmail.com> Date: Thu, 8 Dec 2022 17:52:19 +0100 Subject: [PATCH 1811/2552] [extractor/la7] Improve extractor (#5538) Authored by: nixxo Closes #5360 --- yt_dlp/extractor/la7.py | 52 ++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py index 68dc1d4df..36bfaf5c3 100644 --- a/yt_dlp/extractor/la7.py +++ b/yt_dlp/extractor/la7.py @@ -2,7 +2,6 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, float_or_none, HEADRequest, int_or_none, @@ -13,13 +12,13 @@ from ..utils import ( class LA7IE(InfoExtractor): IE_NAME = 'la7.it' - _VALID_URL = r'''(?x)(https?://)?(?: - (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/| + _VALID_URL = r'''(?x)https?://(?: + (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video|news)/| tg\.la7\.it/repliche-tgla7\?id= )(?P<id>.+)''' _TESTS = [{ - # 'src' is a plain URL + # single quality video 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', 'md5': '8b613ffc0c4bf9b9e377169fc19c214c', 'info_dict': { @@ -29,6 +28,20 @@ class LA7IE(InfoExtractor): 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico', 'thumbnail': 're:^https?://.*', 'upload_date': '20151002', + 'formats': 'count:4', + }, + }, { + # multiple quality video + 'url': 'https://www.la7.it/calcio-femminile/news/il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736', + 'md5': 'd2370e78f75e8d1238cb3a0db9a2eda3', + 'info_dict': { + 'id': 'il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736', + 'ext': 'mp4', + 'title': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile', + 'description': 'Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile', + 'thumbnail': 're:^https?://.*', + 'upload_date': '20221126', + 'formats': 'count:8', }, }, { 'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077', @@ -39,7 +52,7 @@ class LA7IE(InfoExtractor): def _generate_mp4_url(self, quality, m3u8_formats): for f in m3u8_formats: if f['vcodec'] != 'none' and quality in f['url']: - http_url = '%s%s.mp4' % (self._HOST, quality) + http_url = f'{self._HOST}{quality}.mp4' urlh = self._request_webpage( HEADRequest(http_url), quality, @@ -58,12 +71,13 @@ class LA7IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - if not url.startswith('http'): - url = '%s//%s' % (self.http_scheme(), url) + if re.search(r'(?i)(drmsupport\s*:\s*true)\s*', webpage): + self.report_drm(video_id) - webpage = self._download_webpage(url, video_id) - video_path = self._search_regex(r'(/content/.*?).mp4', webpage, 'video_path') + video_path = self._search_regex( + r'(/content/[\w/,]+?)\.mp4(?:\.csmil)?/master\.m3u8', webpage, 'video_path') formats = self._extract_mpd_formats( f'{self._HOST}/local/dash/,{video_path}.mp4.urlset/manifest.mpd', @@ -90,8 +104,7 @@ class LA7IE(InfoExtractor): class LA7PodcastEpisodeIE(InfoExtractor): IE_NAME = 'la7.it:pod:episode' - _VALID_URL = r'''(?x)(https?://)? - (?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)''' + _VALID_URL = r'https?://(?:www\.)?la7\.it/[^/]+/podcast/([^/]+-)?(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497', @@ -125,14 +138,15 @@ class LA7PodcastEpisodeIE(InfoExtractor): webpage, 'video_id', group='vid') media_url = self._search_regex( - (r'src:\s*([\'"])(?P<url>.+?mp3.+?)\1', - r'data-podcast=([\'"])(?P<url>.+?mp3.+?)\1'), + (r'src\s*:\s*([\'"])(?P<url>\S+?mp3.+?)\1', + r'data-podcast\s*=\s*([\'"])(?P<url>\S+?mp3.+?)\1'), webpage, 'media_url', group='url') - ext = determine_ext(media_url) formats = [{ 'url': media_url, - 'format_id': ext, - 'ext': ext, + 'format_id': 'http-mp3', + 'ext': 'mp3', + 'acodec': 'mp3', + 'vcodec': 'none', }] title = self._html_search_regex( @@ -173,7 +187,7 @@ class LA7PodcastEpisodeIE(InfoExtractor): # and title is the same as the show_title # add the date to the title if date and not date_alt and ppn and ppn.lower() == title.lower(): - title += ' del %s' % date + title = f'{title} del {date}' return { 'id': video_id, 'title': title, @@ -193,7 +207,7 @@ class LA7PodcastEpisodeIE(InfoExtractor): class LA7PodcastIE(LA7PodcastEpisodeIE): # XXX: Do not subclass from concrete IE IE_NAME = 'la7.it:podcast' - _VALID_URL = r'(https?://)?(www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])' + _VALID_URL = r'https?://(?:www\.)?la7\.it/(?P<id>[^/]+)/podcast/?(?:$|[#?])' _TESTS = [{ 'url': 'https://www.la7.it/propagandalive/podcast', @@ -201,7 +215,7 @@ class LA7PodcastIE(LA7PodcastEpisodeIE): # XXX: Do not subclass from concrete I 'id': 'propagandalive', 'title': "Propaganda Live", }, - 'playlist_count': 10, + 'playlist_count_min': 10, }] def _real_extract(self, url): From 85a802969ebb62ff57347110f7ad0d87099e65e7 Mon Sep 17 00:00:00 2001 From: milkknife <111794344+milkknife@users.noreply.github.com> Date: Thu, 8 Dec 2022 17:56:36 +0100 Subject: [PATCH 1812/2552] [extractor/webcamerapl] Add extractor (#5715) Authored by: milkknife --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/webcamerapl.py | 44 +++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 yt_dlp/extractor/webcamerapl.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b1d0a9fb0..c3eb2bb77 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2194,6 +2194,7 @@ from .wdr import ( WDRElefantIE, WDRMobileIE, ) +from .webcamerapl import WebcameraplIE from .webcaster import ( WebcasterIE, WebcasterFeedIE, diff --git a/yt_dlp/extractor/webcamerapl.py b/yt_dlp/extractor/webcamerapl.py new file mode 100644 index 000000000..a02d9519c --- /dev/null +++ b/yt_dlp/extractor/webcamerapl.py @@ -0,0 +1,44 @@ +import codecs + +from .common import InfoExtractor + + +class WebcameraplIE(InfoExtractor): + _VALID_URL = r'https?://(?P<id>[\w-]+)\.webcamera\.pl' + _TESTS = [{ + 'url': 'https://warszawa-plac-zamkowy.webcamera.pl', + 'info_dict': { + 'id': 'warszawa-plac-zamkowy', + 'ext': 'mp4', + 'title': r're:WIDOK NA PLAC ZAMKOWY W WARSZAWIE \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'live_status': 'is_live', + } + }, { + 'url': 'https://gdansk-stare-miasto.webcamera.pl/', + 'info_dict': { + 'id': 'gdansk-stare-miasto', + 'ext': 'mp4', + 'title': r're:GDAŃSK - widok na Stare Miasto \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'live_status': 'is_live', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + rot13_m3u8_url = self._search_regex(r'data-src\s*=\s*"(uggc[^"]+\.z3h8)"', + webpage, 'm3u8 url', default=None) + if not rot13_m3u8_url: + self.raise_no_formats('No video/audio found at the provided url', expected=True) + + m3u8_url = codecs.decode(rot13_m3u8_url, 'rot-13') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, live=True) + + return { + 'id': video_id, + 'title': self._html_search_regex(r'<h1\b[^>]*>([^>]+)</h1>', webpage, 'title'), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + } From b44cd29851fdc2fadb283adb59a074f89a27ba7e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 8 Dec 2022 22:42:49 +0530 Subject: [PATCH 1813/2552] [jsinterp] Escape regex that looks like nested set Closes #5749 --- yt_dlp/jsinterp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index e25997129..3f7d659ac 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -343,7 +343,8 @@ class JSInterpreter: inner, outer = self._separate(expr, expr[0], 1) if expr[0] == '/': flags, outer = self._regex_flags(outer) - inner = re.compile(inner[1:], flags=flags) + # Avoid https://github.com/python/cpython/issues/74534 + inner = re.compile(inner[1:].replace('[[', r'[\['), flags=flags) else: inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True)) if not outer: From 3d79ebc8b7e2b1fe3be8cbd0957b00ef29f8647a Mon Sep 17 00:00:00 2001 From: Elyse <26639800+elyse0@users.noreply.github.com> Date: Thu, 8 Dec 2022 15:17:21 -0600 Subject: [PATCH 1814/2552] [extractor/mediastream] Add extractor (#5640) Closes #5532, closes #4431, closes #4425 Authored by: elyse0, HobbyistDev Co-authored-by: HobbyistDev <tesutonihon4@gmail.com> --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/mediastream.py | 155 ++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 yt_dlp/extractor/mediastream.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c3eb2bb77..c90d7b7f6 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -982,6 +982,10 @@ from .mediasite import ( MediasiteCatalogIE, MediasiteNamedCatalogIE, ) +from .mediastream import ( + MediaStreamIE, + WinSportsVideoIE, +) from .mediaworksnz import MediaWorksNZVODIE from .medici import MediciIE from .megaphone import MegaphoneIE diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py new file mode 100644 index 000000000..4d3949527 --- /dev/null +++ b/yt_dlp/extractor/mediastream.py @@ -0,0 +1,155 @@ +import re + +from .common import InfoExtractor +from ..utils import clean_html, get_element_html_by_class + + +class MediaStreamIE(InfoExtractor): + _VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)' + + _TESTS = [{ + 'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831', + 'md5': '97b4f2634b8e8612cc574dfcd504df05', + 'info_dict': { + 'id': '6318e3f1d1d316083ae48831', + 'title': 'Video: Así fue el despido de Thomas Tuchel del Chelsea', + 'description': 'md5:358ce1e1396010d50a1ece1be3633c95', + 'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831', + 'ext': 'mp4', + }, + }] + + _WEBPAGE_TESTS = [{ + 'url': 'https://www.multimedios.com/video/costa-rica-tv-en-vivo/v2616', + 'info_dict': { + 'id': '5a7b1e63a8da282c34d65445', + 'title': 're:mmtv-costarica', + 'description': 'mmtv-costarica', + 'thumbnail': 're:^https?://[^?#]+5a7b1e63a8da282c34d65445', + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'Livestream' + }, + }, { + 'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas', + 'md5': 'de31f0b1ecc321fb35bf22d58734ea40', + 'info_dict': { + 'id': '63731bab8ec9b308a2c9ed28', + 'title': 'Clases de llaves y castigos ¿Quién sabe más?', + 'description': 'md5:1b49aa1ee5a4b32fbd66104b2d629e9d', + 'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28', + 'ext': 'mp4', + }, + }, { + 'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120', + 'info_dict': { + 'id': '63756df1c638b008a5659dec', + 'title': 'Facundo González sufrió fuerte golpe durante competencia frente a Hugo García en EEG', + 'description': 'md5:9490c034264afd756eef7b2c3adee69e', + 'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec', + 'ext': 'mp4', + }, + }, { + 'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083', + 'info_dict': { + 'id': '637307669609130f74cd3a6e', + 'title': 'Las Nuevas Lomas Town: Bernardo De La Mata se enfrentó a sujeto para luchar por el amor de Macarena', + 'description': 'md5:60d71772f1e1496923539ae58aa17124', + 'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e', + 'ext': 'mp4', + }, + }] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage): + yield f'https://mdstrm.com/embed/{mobj.group("video_id")}' + + yield from re.findall( + r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage) + + for mobj in re.finditer( + r'''(?x) + <(?:div|ps-mediastream)[^>]+ + class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+ + data-video-id\s*=\s*"(?P<video_id>\w+)\s*" + (?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))? + ''', webpage): + + video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed' + yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + if 'Debido a tu ubicación no puedes ver el contenido' in webpage: + self.raise_geo_restricted() + + player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id) + + formats, subtitles = [], {} + for video_format in player_config['src']: + if video_format == 'hls': + fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + elif video_format == 'mpd': + fmts, subs = self._extract_mpd_formats_and_subtitles(player_config['src'][video_format], video_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.append({ + 'url': player_config['src'][video_format], + }) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage) or player_config.get('title'), + 'description': self._og_search_description(webpage), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': player_config.get('type') == 'live', + 'thumbnail': self._og_search_thumbnail(webpage), + } + + +class WinSportsVideoIE(InfoExtractor): + _VALID_URL = r'https?://www\.winsports\.co/videos/(?P<display_id>[\w-]+)-(?P<id>\d+)' + + _TESTS = [{ + 'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536', + 'info_dict': { + 'id': '62dc8357162c4b0821fcfb3c', + 'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco', + 'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco', + 'description': 'md5:eb811b2b2882bdc59431732c06b905f2', + 'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c', + 'ext': 'mp4', + }, + }, { + 'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548', + 'info_dict': { + 'id': '62dcb875ef12a5526790b552', + 'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional', + 'title': 'Observa aquí los goles del empate entre Tolima y Nacional', + 'description': 'md5:b19402ba6e46558b93fd24b873eea9c9', + 'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552', + 'ext': 'mp4', + }, + }] + + def _real_extract(self, url): + display_id, video_id = self._match_valid_url(url).group('display_id', 'id') + webpage = self._download_webpage(url, display_id) + + media_setting_json = self._search_json( + r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id) + + mediastream_id = media_setting_json['settings']['mediastream_formatter'][video_id]['mediastream_id'] + + return self.url_result( + f'https://mdstrm.com/embed/{mediastream_id}', MediaStreamIE, video_id, url_transparent=True, + display_id=display_id, video_title=clean_html(get_element_html_by_class('title-news', webpage))) From b05f0a50e05a85da0cdb322d6472b3cb67ee8427 Mon Sep 17 00:00:00 2001 From: Vita <docbender@users.noreply.github.com> Date: Fri, 9 Dec 2022 07:03:36 +0100 Subject: [PATCH 1815/2552] [extractor/yle_areena] Support restricted videos (#5735) * and improve metadata Closes #5734 Authored by: docbender --- yt_dlp/extractor/yle_areena.py | 96 ++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/yle_areena.py b/yt_dlp/extractor/yle_areena.py index 118dc1262..98d3b1949 100644 --- a/yt_dlp/extractor/yle_areena.py +++ b/yt_dlp/extractor/yle_areena.py @@ -1,40 +1,79 @@ from .common import InfoExtractor from .kaltura import KalturaIE -from ..utils import int_or_none, traverse_obj, url_or_none +from ..utils import ( + int_or_none, + smuggle_url, + traverse_obj, + unified_strdate, + url_or_none, +) class YleAreenaIE(InfoExtractor): _VALID_URL = r'https?://areena\.yle\.fi/(?P<id>[\d-]+)' - _TESTS = [{ - 'url': 'https://areena.yle.fi/1-4371942', - 'md5': '932edda0ecf5dfd6423804182d32f8ac', - 'info_dict': { - 'id': '0_a3tjk92c', - 'ext': 'mp4', - 'title': 'Pouchit', - 'description': 'md5:d487309c3abbe5650265bbd1742d2f82', - 'series': 'Modernit miehet', - 'season': 'Season 1', - 'season_number': 1, - 'episode': 'Episode 2', - 'episode_number': 2, - 'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061', - 'uploader_id': 'ovp@yle.fi', - 'duration': 1435, - 'view_count': int, - 'upload_date': '20181204', - 'timestamp': 1543916210, - 'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]}, - 'age_limit': 7, + _TESTS = [ + { + 'url': 'https://areena.yle.fi/1-4371942', + 'md5': '932edda0ecf5dfd6423804182d32f8ac', + 'info_dict': { + 'id': '0_a3tjk92c', + 'ext': 'mp4', + 'title': 'Pouchit', + 'description': 'md5:d487309c3abbe5650265bbd1742d2f82', + 'series': 'Modernit miehet', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 2', + 'episode_number': 2, + 'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/0_a3tjk92c/version/100061', + 'uploader_id': 'ovp@yle.fi', + 'duration': 1435, + 'view_count': int, + 'upload_date': '20181204', + 'release_date': '20190106', + 'timestamp': 1543916210, + 'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]}, + 'age_limit': 7, + 'webpage_url': 'https://areena.yle.fi/1-4371942' + } + }, + { + 'url': 'https://areena.yle.fi/1-2158940', + 'md5': 'cecb603661004e36af8c5188b5212b12', + 'info_dict': { + 'id': '1_l38iz9ur', + 'ext': 'mp4', + 'title': 'Albi haluaa vessan', + 'description': 'md5:15236d810c837bed861fae0e88663c33', + 'series': 'Albi Lumiukko', + 'season': None, + 'season_number': None, + 'episode': None, + 'episode_number': None, + 'thumbnail': 'http://cfvod.kaltura.com/p/1955031/sp/195503100/thumbnail/entry_id/1_l38iz9ur/version/100021', + 'uploader_id': 'ovp@yle.fi', + 'duration': 319, + 'view_count': int, + 'upload_date': '20211202', + 'release_date': '20211215', + 'timestamp': 1638448202, + 'subtitles': {}, + 'age_limit': 0, + 'webpage_url': 'https://areena.yle.fi/1-2158940' + } } - }] + ] def _real_extract(self, url): video_id = self._match_id(url) info = self._search_json_ld(self._download_webpage(url, video_id), video_id, default={}) video_data = self._download_json( f'https://player.api.yle.fi/v1/preview/{video_id}.json?app_id=player_static_prod&app_key=8930d72170e48303cf5f3867780d549b', - video_id) + video_id, headers={ + 'origin': 'https://areena.yle.fi', + 'referer': 'https://areena.yle.fi/', + 'content-type': 'application/json' + }) # Example title: 'K1, J2: Pouchit | Modernit miehet' series, season_number, episode_number, episode = self._search_regex( @@ -54,7 +93,9 @@ class YleAreenaIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': 'kaltura:1955031:%s' % traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id')), + 'url': smuggle_url( + f'kaltura:1955031:{video_data["data"]["ongoing_ondemand"]["kaltura"]["id"]}', + {'source_url': url}), 'ie_key': KalturaIE.ie_key(), 'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str) or episode or info.get('title')), @@ -62,10 +103,11 @@ class YleAreenaIE(InfoExtractor): 'series': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'series', 'title', 'fin'), expected_type=str) or series), 'season_number': (int_or_none(self._search_regex(r'Kausi (\d+)', description, 'season number', default=None)) - or int(season_number)), + or int_or_none(season_number)), 'episode_number': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'episode_number'), expected_type=int_or_none) - or int(episode_number)), + or int_or_none(episode_number)), 'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})), 'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none), 'subtitles': subtitles, + 'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)), } From 10dc85924a74ae69bcf3170c37b351036eacca58 Mon Sep 17 00:00:00 2001 From: nixxo <nixxo@protonmail.com> Date: Fri, 9 Dec 2022 08:20:37 +0100 Subject: [PATCH 1816/2552] [extractor/mediaset] Better embed detection and error messages (#5664) Authored by: nixxo --- yt_dlp/extractor/generic.py | 13 --- yt_dlp/extractor/mediaset.py | 199 ++++++++++++++--------------------- 2 files changed, 80 insertions(+), 132 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 190aff331..bf3c9c1e8 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1547,19 +1547,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['WashingtonPost'], }, - { - # Mediaset embed - 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml', - 'info_dict': { - 'id': '720642', - 'ext': 'mp4', - 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Mediaset'], - }, { # JOJ.sk embeds 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok', diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index 61bdb2a3f..1fa529914 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -7,7 +7,6 @@ from ..utils import ( GeoRestrictedError, int_or_none, OnDemandPagedList, - parse_qs, try_get, urljoin, update_url_query, @@ -16,20 +15,25 @@ from ..utils import ( class MediasetIE(ThePlatformBaseIE): _TP_TLD = 'eu' - _VALID_URL = r'''(?x) + _GUID_RE = r'F[0-9A-Z]{15}' + _VALID_URL = rf'''(?x) (?: mediaset:| https?:// (?:\w+\.)+mediaset\.it/ (?: (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_| - player/(?:v\d+/)?index\.html\?.*?\bprogramGuid= + player/(?:v\d+/)?index\.html\?\S*?\bprogramGuid= ) - )(?P<id>[0-9A-Z]{16,}) + )(?P<id>{_GUID_RE}) ''' + + _EMBED_REGEX = [ + rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]' + ] _TESTS = [{ # full episode - 'url': 'https://www.mediasetplay.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102', + 'url': 'https://mediasetinfinity.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102', 'md5': 'a7e75c6384871f322adb781d3bd72c26', 'info_dict': { 'id': 'F310575103000102', @@ -50,7 +54,7 @@ class MediasetIE(ThePlatformBaseIE): 'chapters': [{'start_time': 0.0, 'end_time': 439.88}, {'start_time': 439.88, 'end_time': 1685.84}, {'start_time': 1685.84, 'end_time': 2682.0}], }, }, { - 'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501', + 'url': 'https://mediasetinfinity.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501', 'md5': '1276f966ac423d16ba255ce867de073e', 'info_dict': { 'id': 'F309013801000501', @@ -71,51 +75,8 @@ class MediasetIE(ThePlatformBaseIE): 'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}], }, }, { - 'url': 'https://www.mediasetplay.mediaset.it/video/cameracafe5/episodio-69-pezzo-di-luna_F303843101017801', - 'md5': 'd1650ac9ff944f185556126a736df148', - 'info_dict': { - 'id': 'F303843101017801', - 'ext': 'mp4', - 'title': 'Episodio 69 - Pezzo di luna', - 'description': 'md5:7c32c8ec4118b72588b9412f11353f73', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 263.008, - 'upload_date': '20200902', - 'series': 'Camera Café 5', - 'timestamp': 1599064700, - 'uploader': 'Italia 1', - 'uploader_id': 'I1', - 'season': 'Season 5', - 'episode': 'Episode 178', - 'season_number': 5, - 'episode_number': 178, - 'chapters': [{'start_time': 0.0, 'end_time': 261.88}, {'start_time': 261.88, 'end_time': 263.008}], - }, - }, { - 'url': 'https://www.mediasetplay.mediaset.it/video/cameracafe5/episodio-51-tu-chi-sei_F303843107000601', - 'md5': '567e9ad375b7a27a0e370650f572a1e3', - 'info_dict': { - 'id': 'F303843107000601', - 'ext': 'mp4', - 'title': 'Episodio 51 - Tu chi sei?', - 'description': 'md5:42ef006e56824cc31787a547590923f4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 367.021, - 'upload_date': '20200902', - 'series': 'Camera Café 5', - 'timestamp': 1599069817, - 'uploader': 'Italia 1', - 'uploader_id': 'I1', - 'season': 'Season 5', - 'episode': 'Episode 6', - 'season_number': 5, - 'episode_number': 6, - 'chapters': [{'start_time': 0.0, 'end_time': 358.68}, {'start_time': 358.68, 'end_time': 367.021}], - }, - }, { - # movie - 'url': 'https://www.mediasetplay.mediaset.it/movie/selvaggi/selvaggi_F006474501000101', - 'md5': '720440187a2ae26af8148eb9e6b901ed', + # DRM + 'url': 'https://mediasetinfinity.mediaset.it/movie/selvaggi/selvaggi_F006474501000101', 'info_dict': { 'id': 'F006474501000101', 'ext': 'mp4', @@ -129,71 +90,70 @@ class MediasetIE(ThePlatformBaseIE): 'uploader_id': 'B6', 'chapters': [{'start_time': 0.0, 'end_time': 1938.56}, {'start_time': 1938.56, 'end_time': 5233.01}], }, + 'params': { + 'ignore_no_formats_error': True, + }, + 'expected_warnings': [ + 'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences', + 'Content behind paywall and DRM', + ], + 'skip': True, }, { - # clip - 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680', + # old domain + 'url': 'https://www.mediasetplay.mediaset.it/video/mrwronglezionidamore/episodio-1_F310575103000102', 'only_matching': True, }, { - # iframe simple + # iframe 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924', 'only_matching': True, - }, { - # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/) - 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104', - 'only_matching': True, - }, { - # embedUrl (from https://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/) - 'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323&autoplay=true&purl=http://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/', - 'only_matching': True, }, { 'url': 'mediaset:FAFU000000665924', 'only_matching': True, + }] + _WEBPAGE_TESTS = [{ + # Mediaset embed + 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml', + 'info_dict': { + 'id': 'FD00000000004929', + 'ext': 'mp4', + 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"', + 'duration': 67.013, + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Mediaset Play', + 'uploader_id': 'QY', + 'upload_date': '20201005', + 'timestamp': 1601866168, + 'chapters': [], + }, + 'params': { + 'skip_download': True, + } }, { - 'url': 'https://www.mediasetplay.mediaset.it/video/mediasethaacuoreilfuturo/palmieri-alicudi-lisola-dei-tre-bambini-felici--un-decreto-per-alicudi-e-tutte-le-microscuole_FD00000000102295', - 'only_matching': True, - }, { - 'url': 'https://www.mediasetplay.mediaset.it/video/cherryseason/anticipazioni-degli-episodi-del-23-ottobre_F306837101005C02', - 'only_matching': True, - }, { - 'url': 'https://www.mediasetplay.mediaset.it/video/tg5/ambiente-onda-umana-per-salvare-il-pianeta_F309453601079D01', - 'only_matching': True, - }, { - 'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135', - 'only_matching': True, - }, { - 'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102', - 'only_matching': True, - }, { - 'url': 'https://mediasetinfinity.mediaset.it/video/braveandbeautiful/episodio-113_F310948005000402', - 'only_matching': True, - }, { - 'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323', - 'only_matching': True, + # WittyTV embed + 'url': 'https://www.wittytv.it/mauriziocostanzoshow/ultima-puntata-venerdi-25-novembre/', + 'info_dict': { + 'id': 'F312172801000801', + 'ext': 'mp4', + 'title': 'Ultima puntata - Venerdì 25 novembre', + 'description': 'Una serata all\'insegna della musica e del buonumore ma non priva di spunti di riflessione', + 'duration': 6203.01, + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Canale 5', + 'uploader_id': 'C5', + 'upload_date': '20221126', + 'timestamp': 1669428689, + 'chapters': list, + 'series': 'Maurizio Costanzo Show', + 'season': 'Season 12', + 'season_number': 12, + 'episode': 'Episode 8', + 'episode_number': 8, + }, + 'params': { + 'skip_download': True, + } }] - def _extract_from_webpage(self, url, webpage): - def _program_guid(qs): - return qs.get('programGuid', [None])[0] - - for mobj in re.finditer( - r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1', - webpage): - embed_url = mobj.group('url') - embed_qs = parse_qs(embed_url) - program_guid = _program_guid(embed_qs) - if program_guid: - yield self.url_result(embed_url) - continue - - video_id = embed_qs.get('id', [None])[0] - if not video_id: - continue - urlh = self._request_webpage(embed_url, video_id, note='Following embed URL redirect') - embed_url = urlh.geturl() - program_guid = _program_guid(parse_qs(embed_url)) - if program_guid: - yield self.url_result(embed_url) - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): for video in smil.findall(self._xpath_ns('.//video', namespace)): video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src']) @@ -217,7 +177,7 @@ class MediasetIE(ThePlatformBaseIE): def _real_extract(self, url): guid = self._match_id(url) - tp_path = 'PR1GhC/media/guid/2702976343/' + guid + tp_path = f'PR1GhC/media/guid/2702976343/{guid}' info = self._extract_theplatform_metadata(tp_path, guid) formats = [] @@ -225,15 +185,17 @@ class MediasetIE(ThePlatformBaseIE): first_e = geo_e = None asset_type = 'geoNo:HD,browser,geoIT|geoNo:HD,geoIT|geoNo:SD,browser,geoIT|geoNo:SD,geoIT|geoNo|HD|SD' # TODO: fixup ISM+none manifest URLs - for f in ('MPEG4', 'M3U'): + for f in ('MPEG4', 'MPEG-DASH', 'M3U'): try: tp_formats, tp_subtitles = self._extract_theplatform_smil( - update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), { + update_url_query(f'http://link.theplatform.{self._TP_TLD}/s/{tp_path}', { 'mbr': 'true', 'formats': f, 'assetTypes': asset_type, - }), guid, 'Downloading %s SMIL data' % (f.split('+')[0])) + }), guid, f'Downloading {f.split("+")[0]} SMIL data') except ExtractorError as e: + if e.orig_msg == 'None of the available releases match the specified AssetType, ProtectionScheme, and/or Format preferences': + e.orig_msg = 'This video is DRM protected' if not geo_e and isinstance(e, GeoRestrictedError): geo_e = e if not first_e: @@ -248,7 +210,7 @@ class MediasetIE(ThePlatformBaseIE): raise geo_e or first_e feed_data = self._download_json( - 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/' + guid, + f'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/{guid}', guid, fatal=False) if feed_data: publish_info = feed_data.get('mediasetprogram$publishInfo') or {} @@ -299,23 +261,23 @@ class MediasetShowIE(MediasetIE): # XXX: Do not subclass from concrete IE ''' _TESTS = [{ # TV Show webpage (general webpage) - 'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/leiene_SE000000000061', + 'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061', 'info_dict': { 'id': '000000000061', - 'title': 'Le Iene', + 'title': 'Le Iene 2022/2023', }, - 'playlist_mincount': 7, + 'playlist_mincount': 6, }, { # TV Show webpage (specific season) - 'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763', + 'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763', 'info_dict': { 'id': '000000002763', - 'title': 'Le Iene', + 'title': 'Le Iene 2021/2022', }, 'playlist_mincount': 7, }, { # TV Show specific playlist (with multiple pages) - 'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375', + 'url': 'https://mediasetinfinity.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375', 'info_dict': { 'id': '100013375', 'title': 'I servizi', @@ -340,10 +302,9 @@ class MediasetShowIE(MediasetIE): # XXX: Do not subclass from concrete IE playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb') if not sb: page = self._download_webpage(url, st or playlist_id) - entries = [self.url_result(urljoin('https://www.mediasetplay.mediaset.it', url)) + entries = [self.url_result(urljoin('https://mediasetinfinity.mediaset.it', url)) for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)] - title = (self._html_search_regex(r'(?s)<h1[^>]*>(.+?)</h1>', page, 'title', default=None) - or self._og_search_title(page)) + title = self._html_extract_title(page).split('|')[0].strip() return self.playlist_result(entries, st or playlist_id, title) entries = OnDemandPagedList( From 710822166279059c2880bfa4ca7a5626cc1e7d98 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 9 Dec 2022 15:17:16 +0530 Subject: [PATCH 1817/2552] Add `ac4` to known codecs Note: ffmpeg does not currently support this format Related #5738 --- README.md | 2 +- yt_dlp/utils.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8fdedacf5..c0a2a420b 100644 --- a/README.md +++ b/README.md @@ -1488,7 +1488,7 @@ The available fields are: - `source`: The preference of the source - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`) - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other) - - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other) + - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` `ac4` > > `eac3` > `ac3` > `dts` > other) - `codec`: Equivalent to `vcodec,acodec` - `vext`: Video Extension (`mp4` > `mov` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac` diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 36170e125..9697ba1c1 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3572,7 +3572,7 @@ def parse_codecs(codecs_str): hdr = 'HDR10' elif parts[:2] == ['vp9', '2']: hdr = 'HDR10' - elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', + elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): acodec = acodec or full_codec elif parts[0] in ('stpp', 'wvtt'): @@ -3605,7 +3605,7 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None): # TODO: All codecs supported by parse_codecs isn't handled here COMPATIBLE_CODECS = { 'mp4': { - 'av1', 'hevc', 'avc1', 'mp4a', # fourcc (m3u8, mpd) + 'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd) 'h264', 'aacl', 'ec-3', # Set in ISM }, 'webm': { @@ -6048,7 +6048,7 @@ class FormatSorter: 'vcodec': {'type': 'ordered', 'regex': True, 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']}, 'acodec': {'type': 'ordered', 'regex': True, - 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']}, + 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']}, 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range', 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]}, 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol', From e74a3c6dcc30ba16455749c3c5dbb9477961c175 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 9 Dec 2022 15:17:51 +0530 Subject: [PATCH 1818/2552] [extractor/hotstar] Improve format metadata --- yt_dlp/extractor/hotstar.py | 40 ++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 8725c9436..cea1812f1 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -148,6 +148,12 @@ class HotStarIE(HotStarBaseIE): 'dr': 'dynamic_range', } + _TAG_FIELDS = { + 'language': 'language', + 'acodec': 'audio_codec', + 'vcodec': 'video_codec', + } + @classmethod def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None): assert None in (video_type, root) @@ -182,24 +188,22 @@ class HotStarIE(HotStarBaseIE): for key, prefix in self._IGNORE_MAP.items() for ignore in self._configuration_arg(key)): continue + tag_dict = dict((t.split(':', 1) + [None])[:2] for t in tags.split(';')) format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: continue format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url) - dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr') ext = determine_ext(format_url) current_formats, current_subs = [], {} try: if 'package:hls' in tags or ext == 'm3u8': current_formats, current_subs = self._extract_m3u8_formats_and_subtitles( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', - m3u8_id=f'{dr}-hls', headers=headers) + format_url, video_id, ext='mp4', headers=headers) elif 'package:dash' in tags or ext == 'mpd': current_formats, current_subs = self._extract_mpd_formats_and_subtitles( - format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) + format_url, video_id, headers=headers) elif ext == 'f4m': pass # XXX: produce broken files else: @@ -213,20 +217,32 @@ class HotStarIE(HotStarBaseIE): geo_restricted = True continue - if tags and 'encryption:plain' not in tags: + if tag_dict.get('encryption') not in ('plain', None): for f in current_formats: f['has_drm'] = True - if tags and 'language' in tags: - lang = re.search(r'language:(?P<lang>[a-z]+)', tags).group('lang') - for f in current_formats: - if not f.get('langauge'): - f['language'] = lang + for f in current_formats: + for k, v in self._TAG_FIELDS.items(): + if not f.get(k): + f[k] = tag_dict.get(v) + if f.get('vcodec') != 'none' and not f.get('dynamic_range'): + f['dynamic_range'] = tag_dict.get('dynamic_range') + if f.get('acodec') != 'none' and not f.get('audio_channels'): + f['audio_channels'] = { + 'stereo': 2, + 'dolby51': 6, + }.get(tag_dict.get('audio_channel')) + f['format_note'] = join_nonempty( + tag_dict.get('ladder'), + tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None, + f.get('format_note'), + delim=', ') formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) + self._remove_duplicate_formats(formats) for f in formats: f.setdefault('http_headers', {}).update(headers) @@ -235,7 +251,7 @@ class HotStarIE(HotStarBaseIE): 'title': video_data.get('title'), 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), - 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), + 'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')), 'formats': formats, 'subtitles': subs, 'channel': video_data.get('channelName'), From f69b0554eb4500f1bdd0e07484d6b0a91e2b050c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 9 Dec 2022 23:25:37 +0000 Subject: [PATCH 1819/2552] [extractor/slideslive] Fix extractor (#5737) Closes #1532 Authored by: bashonly, Grub4K --- yt_dlp/extractor/slideslive.py | 163 +++++++++++++++++++++++++-------- 1 file changed, 124 insertions(+), 39 deletions(-) diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index 9a60a79e7..86c26a8a2 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -1,92 +1,176 @@ from .common import InfoExtractor from ..utils import ( - bool_or_none, smuggle_url, - try_get, + traverse_obj, + unified_timestamp, url_or_none, ) class SlidesLiveIE(InfoExtractor): _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)' - _WORKING = False _TESTS = [{ - # video_service_name = YOUTUBE + # service_name = yoda 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', - 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f', 'info_dict': { - 'id': 'LMtgR8ba0b0', + 'id': '38902413', 'ext': 'mp4', 'title': 'GCC IA16 backend', - 'description': 'Watch full version of this video at https://slideslive.com/38902413.', - 'uploader': 'SlidesLive Videos - A', - 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', - 'timestamp': 1597615266, - 'upload_date': '20170925', - } + 'timestamp': 1648189972, + 'upload_date': '20220325', + 'thumbnail': r're:^https?://.*\.jpg', + }, + 'params': { + 'skip_download': 'm3u8', + }, }, { - # video_service_name = yoda + # service_name = yoda 'url': 'https://slideslive.com/38935785', - 'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a', 'info_dict': { - 'id': 'RMraDYN5ozA_', + 'id': '38935785', 'ext': 'mp4', 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges', + 'upload_date': '20211115', + 'timestamp': 1636996003, + 'thumbnail': r're:^https?://.*\.jpg', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # service_name = yoda + 'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics', + 'info_dict': { + 'id': '38973182', + 'ext': 'mp4', + 'title': 'How Should a Machine Learning Researcher Think About AI Ethics?', + 'upload_date': '20220201', + 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1643728135, + }, + 'params': { + 'skip_download': 'm3u8', }, }, { - # video_service_name = youtube + # service_name = youtube + 'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost', + 'md5': '8a79b5e3d700837f40bd2afca3c8fa01', + 'info_dict': { + 'id': 'jmg02wCJD5M', + 'display_id': '38897546', + 'ext': 'mp4', + 'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost', + 'description': 'Watch full version of this video at https://slideslive.com/38897546.', + 'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw', + 'channel': 'SlidesLive Videos - G1', + 'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw', + 'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw', + 'uploader': 'SlidesLive Videos - G1', + 'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw', + 'live_status': 'not_live', + 'upload_date': '20160710', + 'timestamp': 1618786715, + 'duration': 6827, + 'like_count': int, + 'view_count': int, + 'comment_count': int, + 'channel_follower_count': int, + 'age_limit': 0, + 'thumbnail': r're:^https?://.*\.jpg', + 'playable_in_embed': True, + 'availability': 'unlisted', + 'tags': [], + 'categories': ['People & Blogs'], + }, + }, { + # service_name = youtube 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'only_matching': True, }, { - # video_service_name = url + # service_name = url 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1', 'only_matching': True, }, { - # video_service_name = vimeo + # service_name = vimeo 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3', 'only_matching': True, }] + def _extract_custom_m3u8_info(self, m3u8_data): + m3u8_dict = {} + + lookup = { + 'PRESENTATION-TITLE': 'title', + 'PRESENTATION-UPDATED-AT': 'timestamp', + 'PRESENTATION-THUMBNAIL': 'thumbnail', + 'PLAYLIST-TYPE': 'playlist_type', + 'VOD-VIDEO-SERVICE-NAME': 'service_name', + 'VOD-VIDEO-ID': 'service_id', + 'VOD-VIDEO-SERVERS': 'video_servers', + 'VOD-SUBTITLES': 'subtitles', + } + + for line in m3u8_data.splitlines(): + if not line.startswith('#EXT-SL-'): + continue + tag, _, value = line.partition(':') + key = lookup.get(tag.lstrip('#EXT-SL-')) + if not key: + continue + m3u8_dict[key] = value + + # Some values are stringified JSON arrays + for key in ('video_servers', 'subtitles'): + if key in m3u8_dict: + m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or [] + + return m3u8_dict + def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( - 'https://ben.slideslive.com/player/' + video_id, video_id) - service_name = video_data['video_service_name'].lower() + webpage = self._download_webpage(url, video_id) + player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token') + player_data = self._download_webpage( + f'https://ben.slideslive.com/player/{video_id}', video_id, + note='Downloading player info', query={'player_token': player_token}) + player_info = self._extract_custom_m3u8_info(player_data) + + service_name = player_info['service_name'].lower() assert service_name in ('url', 'yoda', 'vimeo', 'youtube') - service_id = video_data['video_service_id'] + service_id = player_info['service_id'] + subtitles = {} - for sub in try_get(video_data, lambda x: x['subtitles'], list) or []: - if not isinstance(sub, dict): - continue + for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict): webvtt_url = url_or_none(sub.get('webvtt_url')) if not webvtt_url: continue - lang = sub.get('language') or 'en' - subtitles.setdefault(lang, []).append({ + subtitles.setdefault(sub.get('language') or 'en', []).append({ 'url': webvtt_url, + 'ext': 'vtt', }) + info = { 'id': video_id, - 'thumbnail': video_data.get('thumbnail'), - 'is_live': bool_or_none(video_data.get('is_live')), + 'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''), + 'timestamp': unified_timestamp(player_info.get('timestamp')), + 'is_live': player_info.get('playlist_type') != 'vod', + 'thumbnail': url_or_none(player_info.get('thumbnail')), 'subtitles': subtitles, } + if service_name in ('url', 'yoda'): - info['title'] = video_data['title'] if service_name == 'url': info['url'] = service_id else: + cdn_hostname = player_info['video_servers'][0] formats = [] - _MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s' - # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol formats.extend(self._extract_m3u8_formats( - _MANIFEST_PATTERN % (service_id, 'm3u8'), - service_id, 'mp4', m3u8_id='hls', fatal=False)) + f'https://{cdn_hostname}/{service_id}/master.m3u8', + video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)) formats.extend(self._extract_mpd_formats( - _MANIFEST_PATTERN % (service_id, 'mpd'), service_id, - mpd_id='dash', fatal=False)) + f'https://{cdn_hostname}/{service_id}/master.mpd', + video_id, mpd_id='dash', fatal=False)) info.update({ - 'id': service_id, 'formats': formats, }) else: @@ -94,10 +178,11 @@ class SlidesLiveIE(InfoExtractor): '_type': 'url_transparent', 'url': service_id, 'ie_key': service_name.capitalize(), - 'title': video_data.get('title'), + 'display_id': video_id, }) if service_name == 'vimeo': info['url'] = smuggle_url( - 'https://player.vimeo.com/video/' + service_id, + f'https://player.vimeo.com/video/{service_id}', {'http_headers': {'Referer': url}}) + return info From 3cf50fa8e9e460fef35531df46b6e893924f1c96 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 9 Dec 2022 23:36:38 +0000 Subject: [PATCH 1820/2552] [downloader/ffmpeg] Fix headers for video+audio formats (#5659) Authored by: bashonly, Grub4K --- yt_dlp/downloader/external.py | 31 +++++++++++++------------------ yt_dlp/extractor/generic.py | 2 +- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 895390d6c..575138371 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -342,7 +342,6 @@ class FFmpegFD(ExternalFD): and cls.can_download(info_dict)) def _call_downloader(self, tmpfilename, info_dict): - urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']] ffpp = FFmpegPostProcessor(downloader=self) if not ffpp.available: self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') @@ -372,16 +371,6 @@ class FFmpegFD(ExternalFD): # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] - http_headers = None - if info_dict.get('http_headers'): - youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers']) - http_headers = [ - # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: - # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. - '-headers', - ''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items()) - ] - env = None proxy = self.params.get('proxy') if proxy: @@ -434,21 +423,26 @@ class FFmpegFD(ExternalFD): start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end') - for i, url in enumerate(urls): - if http_headers is not None and re.match(r'^https?://', url): - args += http_headers + selected_formats = info_dict.get('requested_formats') or [info_dict] + for i, fmt in enumerate(selected_formats): + if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']): + headers_dict = handle_youtubedl_headers(fmt['http_headers']) + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. + args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())]) + if start_time: args += ['-ss', str(start_time)] if end_time: args += ['-t', str(end_time - start_time)] - args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] + args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] if info_dict.get('requested_formats') or protocol == 'http_dash_segments': - for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): + for i, fmt in enumerate(selected_formats): stream_number = fmt.get('manifest_stream_number', 0) args.extend(['-map', f'{i}:{stream_number}']) @@ -488,8 +482,9 @@ class FFmpegFD(ExternalFD): args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) self._debug_cmd(args) + piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) with Popen(args, stdin=subprocess.PIPE, env=env) as proc: - if url in ('-', 'pipe:'): + if piped: self.on_process_started(proc, proc.stdin) try: retval = proc.wait() @@ -499,7 +494,7 @@ class FFmpegFD(ExternalFD): # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). - if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): + if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped: proc.communicate_or_kill(b'q') else: proc.kill(timeout=None) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index bf3c9c1e8..2281c71f3 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2356,7 +2356,7 @@ class GenericIE(InfoExtractor): info_dict.update({ 'formats': formats, 'subtitles': subtitles, - 'http_headers': headers, + 'http_headers': headers or None, }) return info_dict From 16bed382fd5e7f258b8d058ca2863deb38875994 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 9 Dec 2022 23:41:45 +0000 Subject: [PATCH 1821/2552] [extractor/twitter] Heed `--no-playlist` for multi-video tweets (#5757) Closes #5752 Authored by: bashonly, Grub4K --- yt_dlp/extractor/twitter.py | 57 ++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 18ebb3617..a4e280c82 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -293,7 +293,7 @@ class TwitterCardIE(InfoExtractor): class TwitterIE(TwitterBaseIE): IE_NAME = 'twitter' - _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)' + _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/video/(?P<index>\d+))?' _TESTS = [{ 'url': 'https://twitter.com/freethenipple/status/643211948184596480', @@ -336,7 +336,7 @@ class TwitterIE(TwitterBaseIE): 'id': '665052190608723968', 'display_id': '665052190608723968', 'ext': 'mp4', - 'title': 'md5:55fef1d5b811944f1550e91b44abb82e', + 'title': 'md5:e99588f17b3dd0503814ffb560e64731', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'uploader_id': 'starwars', 'uploader': r're:Star Wars.*', @@ -648,7 +648,7 @@ class TwitterIE(TwitterBaseIE): 'uploader_url': 'https://twitter.com/Rizdraws', 'upload_date': '20220928', 'timestamp': 1664391723, - 'thumbnail': 're:^https?://.*\\.jpg', + 'thumbnail': r're:^https?://.+\.jpg', 'like_count': int, 'repost_count': int, 'comment_count': int, @@ -727,6 +727,48 @@ class TwitterIE(TwitterBaseIE): }, 'add_ie': ['TwitterSpaces'], 'params': {'skip_download': 'm3u8'}, + }, { + # URL specifies video number but --yes-playlist + 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1', + 'playlist_mincount': 2, + 'info_dict': { + 'id': '1600649710662213632', + 'title': 'md5:be05989b0722e114103ed3851a0ffae2', + 'timestamp': 1670459604.0, + 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', + 'comment_count': int, + 'uploader_id': 'CTVJLaidlaw', + 'repost_count': int, + 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'], + 'upload_date': '20221208', + 'age_limit': 0, + 'uploader': 'Jocelyn Laidlaw', + 'uploader_url': 'https://twitter.com/CTVJLaidlaw', + 'like_count': int, + }, + }, { + # URL specifies video number and --no-playlist + 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2', + 'info_dict': { + 'id': '1600649511827013632', + 'ext': 'mp4', + 'title': 'md5:be05989b0722e114103ed3851a0ffae2', + 'thumbnail': r're:^https?://.+\.jpg', + 'timestamp': 1670459604.0, + 'uploader_id': 'CTVJLaidlaw', + 'uploader': 'Jocelyn Laidlaw', + 'repost_count': int, + 'comment_count': int, + 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'], + 'duration': 102.226, + 'uploader_url': 'https://twitter.com/CTVJLaidlaw', + 'display_id': '1600649710662213632', + 'like_count': int, + 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', + 'upload_date': '20221208', + 'age_limit': 0, + }, + 'params': {'noplaylist': True}, }, { # onion route 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273', @@ -828,7 +870,7 @@ class TwitterIE(TwitterBaseIE): } def _real_extract(self, url): - twid = self._match_id(url) + twid, selected_index = self._match_valid_url(url).group('id', 'index') if self.is_logged_in or self._configuration_arg('force_graphql'): self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})') result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid) @@ -998,6 +1040,13 @@ class TwitterIE(TwitterBaseIE): entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)] + if not self._yes_playlist(twid, selected_index, video_label='URL-specified video number'): + index = int(selected_index) - 1 + if index >= len(entries): + raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True) + + return entries[index] + if len(entries) == 1: return entries[0] From 7c5e1701f6e948c83a928b6657542036c1d7516e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 9 Dec 2022 23:43:10 +0000 Subject: [PATCH 1822/2552] [extractor/foxsports] Fix extractor (#5719) Closes #5714 Authored by: bashonly --- yt_dlp/extractor/foxsports.py | 56 ++++++++++++++++-------- yt_dlp/extractor/uplynk.py | 80 ++++++++++++++++++++++------------- 2 files changed, 89 insertions(+), 47 deletions(-) diff --git a/yt_dlp/extractor/foxsports.py b/yt_dlp/extractor/foxsports.py index f9d7fe52a..f906a1718 100644 --- a/yt_dlp/extractor/foxsports.py +++ b/yt_dlp/extractor/foxsports.py @@ -1,31 +1,51 @@ from .common import InfoExtractor +from .uplynk import UplynkPreplayIE +from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url class FoxSportsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)' - - _TEST = { - 'url': 'http://www.foxsports.com/tennessee/video/432609859715', - 'md5': 'b49050e955bebe32c301972e4012ac17', + _VALID_URL = r'https?://(?:www\.)?foxsports\.com/watch/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://www.foxsports.com/watch/play-612168c6700004b', 'info_dict': { - 'id': '432609859715', + 'id': 'b72f5bd8658140baa5791bb676433733', 'ext': 'mp4', - 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', - 'description': 'Courtney Lee talks about Memphis being focused.', - # TODO: fix timestamp - 'upload_date': '19700101', # '20150423', - # 'timestamp': 1429761109, - 'uploader': 'NEWA-FNG-FOXSPORTS', + 'display_id': 'play-612168c6700004b', + 'title': 'md5:e0c4ecac3a1f25295b4fae22fb5c126a', + 'description': 'md5:371bc43609708ae2b9e1a939229762af', + 'uploader_id': '06b4a36349624051a9ba52ac3a91d268', + 'upload_date': '20221205', + 'timestamp': 1670262586, + 'duration': 31.7317, + 'thumbnail': r're:^https?://.*\.jpg$', + 'extra_param_to_segment_url': str, }, 'params': { - # m3u8 download - 'skip_download': True, + 'skip_download': 'm3u8', }, - 'add_ie': ['ThePlatform'], - } + }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + json_ld = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={}) + data = self._download_json( + f'https://api3.fox.com/v2.0/vodplayer/sportsclip/{video_id}', + video_id, note='Downloading API JSON', headers={ + 'x-api-key': 'cf289e299efdfa39fb6316f259d1de93', + }) + preplay_url = self._request_webpage( + HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl() - return self.url_result( - 'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed') + return { + '_type': 'url_transparent', + 'ie_key': UplynkPreplayIE.ie_key(), + 'url': smuggle_url(preplay_url, {'Origin': 'https://www.foxsports.com'}), + 'display_id': video_id, + 'title': data.get('name') or json_ld.get('title'), + 'description': data.get('description') or json_ld.get('description'), + 'duration': float_or_none(data.get('durationInSeconds')), + 'timestamp': json_ld.get('timestamp'), + 'thumbnails': json_ld.get('thumbnails'), + '_old_archive_ids': [make_archive_id(self, video_id)], + } diff --git a/yt_dlp/extractor/uplynk.py b/yt_dlp/extractor/uplynk.py index 87c427f63..e7d816ef4 100644 --- a/yt_dlp/extractor/uplynk.py +++ b/yt_dlp/extractor/uplynk.py @@ -2,40 +2,42 @@ import re from .common import InfoExtractor from ..utils import ( - float_or_none, ExtractorError, + float_or_none, + smuggle_url, + traverse_obj, + unsmuggle_url, + update_url_query, ) -class UplynkIE(InfoExtractor): - IE_NAME = 'uplynk' - _VALID_URL = r'https?://.*?\.uplynk\.com/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P<session_id>[^&]+))?' - _TEST = { - 'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8', - 'info_dict': { - 'id': 'e89eaf2ce9054aa89d92ddb2d817a52e', - 'ext': 'mp4', - 'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4', - 'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } +class UplynkBaseIE(InfoExtractor): + _UPLYNK_URL_RE = r'''(?x) + https?://[\w-]+\.uplynk\.com/(?P<path> + ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)| + (?P<id>[0-9a-f]{32}) + )\.(?:m3u8|json) + (?:.*?\bpbs=(?P<session_id>[^&]+))?''' - def _extract_uplynk_info(self, uplynk_content_url): - path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups() + def _extract_uplynk_info(self, url): + uplynk_content_url, smuggled_data = unsmuggle_url(url, {}) + mobj = re.match(self._UPLYNK_URL_RE, uplynk_content_url) + if not mobj: + raise ExtractorError('Necessary parameters not found in Uplynk URL') + path, external_id, video_id, session_id = mobj.group('path', 'external_id', 'id', 'session_id') display_id = video_id or external_id + headers = traverse_obj( + smuggled_data, {'Referer': 'Referer', 'Origin': 'Origin'}, casesense=False) formats, subtitles = self._extract_m3u8_formats_and_subtitles( - 'http://content.uplynk.com/%s.m3u8' % path, - display_id, 'mp4', 'm3u8_native') + f'http://content.uplynk.com/{path}.m3u8', display_id, 'mp4', headers=headers) if session_id: for f in formats: - f['extra_param_to_segment_url'] = 'pbs=' + session_id - asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id) + f['extra_param_to_segment_url'] = f'pbs={session_id}' + asset = self._download_json( + f'http://content.uplynk.com/player/assetinfo/{path}.json', display_id) if asset.get('error') == 1: - raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True) + msg = asset.get('msg') or 'unknown error' + raise ExtractorError(f'{self.IE_NAME} said: {msg}', expected=True) return { 'id': asset['asset'], @@ -47,20 +49,40 @@ class UplynkIE(InfoExtractor): 'subtitles': subtitles, } + +class UplynkIE(UplynkBaseIE): + IE_NAME = 'uplynk' + _VALID_URL = UplynkBaseIE._UPLYNK_URL_RE + _TEST = { + 'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8', + 'info_dict': { + 'id': 'e89eaf2ce9054aa89d92ddb2d817a52e', + 'ext': 'mp4', + 'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4', + 'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa', + 'duration': 530.2739166666679, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': 'm3u8', + }, + } + def _real_extract(self, url): return self._extract_uplynk_info(url) -class UplynkPreplayIE(UplynkIE): # XXX: Do not subclass from concrete IE +class UplynkPreplayIE(UplynkBaseIE): IE_NAME = 'uplynk:preplay' - _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json' + _VALID_URL = r'https?://[\w-]+\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json' def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) path, external_id, video_id = self._match_valid_url(url).groups() display_id = video_id or external_id preplay = self._download_json(url, display_id) - content_url = 'http://content.uplynk.com/%s.m3u8' % path + content_url = f'http://content.uplynk.com/{path}.m3u8' session_id = preplay.get('sid') if session_id: - content_url += '?pbs=' + session_id - return self._extract_uplynk_info(content_url) + content_url = update_url_query(content_url, {'pbs': session_id}) + return self._extract_uplynk_info(smuggle_url(content_url, smuggled_data)) From f549b18512570d0c000179df9147415e4eba1649 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 9 Dec 2022 23:46:04 +0000 Subject: [PATCH 1823/2552] [extractor/pinterest] Fix extractor (#5739) Closes #1772 Authored by: bashonly --- yt_dlp/extractor/pinterest.py | 153 ++++++++++++++++++++++------------ 1 file changed, 102 insertions(+), 51 deletions(-) diff --git a/yt_dlp/extractor/pinterest.py b/yt_dlp/extractor/pinterest.py index 2c6cd6d4b..8361fbbc5 100644 --- a/yt_dlp/extractor/pinterest.py +++ b/yt_dlp/extractor/pinterest.py @@ -1,19 +1,24 @@ import json from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, float_or_none, int_or_none, - try_get, + str_or_none, + strip_or_none, + traverse_obj, unified_timestamp, url_or_none, ) class PinterestBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)' + _VALID_URL_BASE = r'''(?x) + https?://(?:[^/]+\.)?pinterest\.(?: + com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx| + dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu| + co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)''' def _call_api(self, resource, video_id, options): return self._download_json( @@ -24,14 +29,53 @@ class PinterestBaseIE(InfoExtractor): def _extract_video(self, data, extract_formats=True): video_id = data['id'] + thumbnails = [] + images = data.get('images') + if isinstance(images, dict): + for thumbnail_id, thumbnail in images.items(): + if not isinstance(thumbnail, dict): + continue + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) - title = (data.get('title') or data.get('grid_title') or video_id).strip() + info = { + 'title': strip_or_none(traverse_obj(data, 'title', 'grid_title', default='')), + 'description': traverse_obj(data, 'seo_description', 'description'), + 'timestamp': unified_timestamp(data.get('created_at')), + 'thumbnails': thumbnails, + 'uploader': traverse_obj(data, ('closeup_attribution', 'full_name')), + 'uploader_id': str_or_none(traverse_obj(data, ('closeup_attribution', 'id'))), + 'repost_count': int_or_none(data.get('repin_count')), + 'comment_count': int_or_none(data.get('comment_count')), + 'categories': traverse_obj(data, ('pin_join', 'visual_annotation'), expected_type=list), + 'tags': traverse_obj(data, 'hashtags', expected_type=list), + } urls = [] formats = [] duration = None - if extract_formats: - for format_id, format_dict in data['videos']['video_list'].items(): + domain = data.get('domain', '') + if domain.lower() != 'uploaded by user' and traverse_obj(data, ('embed', 'src')): + if not info['title']: + info['title'] = None + return { + '_type': 'url_transparent', + 'url': data['embed']['src'], + **info, + } + + elif extract_formats: + video_list = traverse_obj( + data, ('videos', 'video_list'), + ('story_pin_data', 'pages', ..., 'blocks', ..., 'video', 'video_list'), + expected_type=dict, get_all=False, default={}) + for format_id, format_dict in video_list.items(): if not isinstance(format_dict, dict): continue format_url = url_or_none(format_dict.get('url')) @@ -53,72 +97,79 @@ class PinterestBaseIE(InfoExtractor): 'duration': duration, }) - description = data.get('description') or data.get('description_html') or data.get('seo_description') - timestamp = unified_timestamp(data.get('created_at')) - - def _u(field): - return try_get(data, lambda x: x['closeup_attribution'][field], compat_str) - - uploader = _u('full_name') - uploader_id = _u('id') - - repost_count = int_or_none(data.get('repin_count')) - comment_count = int_or_none(data.get('comment_count')) - categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list) - tags = data.get('hashtags') - - thumbnails = [] - images = data.get('images') - if isinstance(images, dict): - for thumbnail_id, thumbnail in images.items(): - if not isinstance(thumbnail, dict): - continue - thumbnail_url = url_or_none(thumbnail.get('url')) - if not thumbnail_url: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int_or_none(thumbnail.get('width')), - 'height': int_or_none(thumbnail.get('height')), - }) - return { 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': timestamp, - 'thumbnails': thumbnails, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'repost_count': repost_count, - 'comment_count': comment_count, - 'categories': categories, - 'tags': tags, 'formats': formats, + 'duration': duration, + 'webpage_url': f'https://www.pinterest.com/pin/{video_id}/', 'extractor_key': PinterestIE.ie_key(), + 'extractor': PinterestIE.IE_NAME, + **info, } class PinterestIE(PinterestBaseIE): _VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE _TESTS = [{ + # formats found in data['videos'] 'url': 'https://www.pinterest.com/pin/664281013778109217/', 'md5': '6550c2af85d6d9f3fe3b88954d1577fc', 'info_dict': { 'id': '664281013778109217', 'ext': 'mp4', 'title': 'Origami', - 'description': 'md5:b9d90ddf7848e897882de9e73344f7dd', + 'description': 'md5:e29801cab7d741ea8c741bc50c8d00ab', 'duration': 57.7, 'timestamp': 1593073622, 'upload_date': '20200625', - 'uploader': 'Love origami -I am Dafei', - 'uploader_id': '586523688879454212', - 'repost_count': 50, - 'comment_count': 0, + 'repost_count': int, + 'comment_count': int, 'categories': list, 'tags': list, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + }, { + # formats found in data['story_pin_data'] + 'url': 'https://www.pinterest.com/pin/1084663891475263837/', + 'md5': '069ac19919ab9e1e13fa60de46290b03', + 'info_dict': { + 'id': '1084663891475263837', + 'ext': 'mp4', + 'title': 'Gadget, Cool products, Amazon product, technology, Kitchen gadgets', + 'description': 'md5:d0a4b6ae996ff0c6eed83bc869598d13', + 'uploader': 'CoolCrazyGadgets', + 'uploader_id': '1084664028912989237', + 'upload_date': '20211003', + 'timestamp': 1633246654.0, + 'duration': 14.9, + 'comment_count': int, + 'repost_count': int, + 'categories': 'count:9', + 'tags': list, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + }, { + # vimeo.com embed + 'url': 'https://www.pinterest.ca/pin/441282463481903715/', + 'info_dict': { + 'id': '111691128', + 'ext': 'mp4', + 'title': 'Tonite Let\'s All Make Love In London (1967)', + 'description': 'md5:8190f37b3926807809ec57ec21aa77b2', + 'uploader': 'Vimeo', + 'uploader_id': '473792960706651251', + 'upload_date': '20180120', + 'timestamp': 1516409040, + 'duration': 3404, + 'comment_count': int, + 'repost_count': int, + 'categories': 'count:9', + 'tags': [], + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'uploader_url': 'https://vimeo.com/willardandrade', + }, + 'params': { + 'skip_download': 'm3u8', }, }, { 'url': 'https://co.pinterest.com/pin/824721750502199491/', From e318b5b87ab2e04f554c97f2d7b9989f8c24156c Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Sat, 10 Dec 2022 17:29:13 +0900 Subject: [PATCH 1824/2552] [extractor/airtv] Add extractor (#5533) Authored by: HobbyistDev Closes #5132 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/airtv.py | 96 +++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 yt_dlp/extractor/airtv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c90d7b7f6..b1bbc5b72 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -78,6 +78,7 @@ from .agora import ( WyborczaVideoIE, ) from .airmozilla import AirMozillaIE +from .airtv import AirTVIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE from .amara import AmaraIE diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py new file mode 100644 index 000000000..0b73a966e --- /dev/null +++ b/yt_dlp/extractor/airtv.py @@ -0,0 +1,96 @@ +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import ( + determine_ext, + int_or_none, + mimetype2ext, + parse_iso8601, + traverse_obj +) + + +class AirTVIE(InfoExtractor): + _VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)' + _TESTS = [{ + # without youtube_id + 'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ', + 'info_dict': { + 'id': 'W87jcWleSn2hXZN47zJZsQ', + 'ext': 'mp4', + 'release_date': '20221003', + 'release_timestamp': 1664792603, + 'channel_id': 'vgfManQlRQKgoFQ8i8peFQ', + 'title': 'md5:c12d49ed367c3dadaa67659aff43494c', + 'upload_date': '20221003', + 'duration': 151, + 'view_count': int, + 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', + 'timestamp': 1664792603, + } + }, { + # with youtube_id + 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', + 'info_dict': { + 'id': '2ZTqmpee-bQ', + 'ext': 'mp4', + 'comment_count': int, + 'tags': 'count:11', + 'channel_follower_count': int, + 'like_count': int, + 'uploader': 'Newsflare', + 'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp', + 'availability': 'public', + 'title': 'Geese Chase Alligator Across Golf Course', + 'uploader_id': 'NewsflareBreaking', + 'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ', + 'description': 'md5:99b21d9cea59330149efbd9706e208f5', + 'age_limit': 0, + 'channel_id': 'UCzSSoloGEz10HALUAbYhngQ', + 'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking', + 'view_count': int, + 'categories': ['News & Politics'], + 'live_status': 'not_live', + 'playable_in_embed': True, + 'channel': 'Newsflare', + 'duration': 37, + 'upload_date': '20180511', + } + }] + + def _get_formats_and_subtitle(self, json_data, video_id): + formats, subtitles = [], {} + for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...): + ext = determine_ext(source.get('src'), mimetype2ext(source.get('type'))) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.append({'url': source.get('src'), 'ext': ext}) + return formats, subtitles + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id] + if nextjs_json.get('youtube_id'): + return self.url_result( + f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE) + + formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id) + return { + 'id': display_id, + 'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage), + 'formats': formats, + 'subtitles': subtitles, + 'description': nextjs_json.get('description') or None, + 'duration': int_or_none(nextjs_json.get('duration')), + 'thumbnails': [ + {'url': thumbnail} + for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))], + 'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'), + 'timestamp': parse_iso8601(nextjs_json.get('created')), + 'release_timestamp': parse_iso8601(nextjs_json.get('published')), + 'view_count': int_or_none(nextjs_json.get('views')), + } From 3ac54764301a0e97bf0d2eeb0c32d45a7e03d1f7 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Sat, 10 Dec 2022 17:34:55 +0900 Subject: [PATCH 1825/2552] [extractor/nosnl] Add support for /video (#5590) Authored by: HobbyistDev --- yt_dlp/extractor/nosnl.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/nosnl.py b/yt_dlp/extractor/nosnl.py index eba94c416..cea54c98e 100644 --- a/yt_dlp/extractor/nosnl.py +++ b/yt_dlp/extractor/nosnl.py @@ -3,7 +3,7 @@ from ..utils import parse_duration, parse_iso8601, traverse_obj class NOSNLArticleIE(InfoExtractor): - _VALID_URL = r'https?://nos\.nl/((?!video)(\w+/)?\w+/)\d+-(?P<display_id>[\w-]+)' + _VALID_URL = r'https?://nos\.nl/(?P<type>video|(\w+/)?\w+)/?\d+-(?P<display_id>[\w-]+)' _TESTS = [ { # only 1 video @@ -22,13 +22,14 @@ class NOSNLArticleIE(InfoExtractor): 'info_dict': { 'id': '2440409', 'title': 'Vannacht sliepen weer enkele honderden asielzoekers in Ter Apel buiten', - 'description': 'Er werd wel geprobeerd om kwetsbare migranten onderdak te bieden, zegt het COA.', + 'description': 'md5:72b1e1674d798460e79d78fa37e9f56d', 'tags': ['aanmeldcentrum', 'Centraal Orgaan opvang asielzoekers', 'COA', 'asielzoekers', 'Ter Apel'], 'modified_timestamp': 1660452773, 'modified_date': '20220814', 'upload_date': '20220813', 'thumbnail': 'https://cdn.nos.nl/image/2022/07/18/880346/1024x576a.jpg', 'timestamp': 1660401384, + 'categories': ['Regionaal nieuws', 'Binnenland'], }, 'playlist_count': 2, }, { @@ -37,20 +38,37 @@ class NOSNLArticleIE(InfoExtractor): 'info_dict': { 'id': '2440789', 'title': 'Wekdienst 16/8: Groningse acties tien jaar na zware aardbeving • Femke Bol in actie op EK atletiek ', - 'description': 'Nieuws, weer, verkeer: met dit overzicht begin je geïnformeerd aan de dag.', + 'description': 'md5:0bd277ed7a44fc15cb12a9d27d8f6641', 'tags': ['wekdienst'], 'modified_date': '20220816', 'modified_timestamp': 1660625449, 'timestamp': 1660625449, 'upload_date': '20220816', 'thumbnail': 'https://cdn.nos.nl/image/2022/08/16/888178/1024x576a.jpg', + 'categories': ['Binnenland', 'Buitenland'], }, 'playlist_count': 2, + }, { + # video url + 'url': 'https://nos.nl/video/2452718-xi-en-trudeau-botsen-voor-de-camera-op-g20-top-je-hebt-gelekt', + 'info_dict': { + 'id': '2452718', + 'title': 'Xi en Trudeau botsen voor de camera op G20-top: \'Je hebt gelekt\'', + 'modified_date': '20221117', + 'description': 'md5:61907dac576f75c11bf8ffffd4a3cc0f', + 'tags': ['Xi', 'Trudeau', 'G20', 'indonesié'], + 'upload_date': '20221117', + 'thumbnail': 'https://cdn.nos.nl/image/2022/11/17/916155/1024x576a.jpg', + 'modified_timestamp': 1668663388, + 'timestamp': 1668663388, + 'categories': ['Buitenland'], + }, + 'playlist_mincount': 1, } ] def _entries(self, nextjs_json, display_id): - for item in nextjs_json['items']: + for item in nextjs_json: if item.get('type') == 'video': formats, subtitle = self._extract_m3u8_formats_and_subtitles( traverse_obj(item, ('source', 'url')), display_id, ext='mp4') @@ -77,13 +95,14 @@ class NOSNLArticleIE(InfoExtractor): } def _real_extract(self, url): - display_id = self._match_valid_url(url).group('display_id') + site_type, display_id = self._match_valid_url(url).group('type', 'display_id') webpage = self._download_webpage(url, display_id) nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['data'] return { '_type': 'playlist', - 'entries': self._entries(nextjs_json, display_id), + 'entries': self._entries( + [nextjs_json['video']] if site_type == 'video' else nextjs_json['items'], display_id), 'id': str(nextjs_json['id']), 'title': nextjs_json.get('title') or self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage), 'description': (nextjs_json.get('description') @@ -91,5 +110,6 @@ class NOSNLArticleIE(InfoExtractor): 'tags': nextjs_json.get('keywords'), 'modified_timestamp': parse_iso8601(nextjs_json.get('modifiedAt')), 'thumbnail': nextjs_json.get('shareImageSrc') or self._html_search_meta(['og:image', 'twitter:image'], webpage), - 'timestamp': parse_iso8601(nextjs_json.get('publishedAt')) + 'timestamp': parse_iso8601(nextjs_json.get('publishedAt')), + 'categories': traverse_obj(nextjs_json, ('categories', ..., 'label')), } From 22697a84f6aa5de0b1731c10068aad97704f21fa Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Sat, 10 Dec 2022 17:44:43 +0900 Subject: [PATCH 1826/2552] [extractor/europarl] Add EuroParlWebstream Extractor (#5547) Authored by: HobbyistDev Closes #4933 --- yt_dlp/extractor/_extractors.py | 2 +- yt_dlp/extractor/europa.py | 84 +++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b1bbc5b72..e76a80ee1 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -537,7 +537,7 @@ from .espn import ( ESPNCricInfoIE, ) from .esri import EsriVideoIE -from .europa import EuropaIE +from .europa import EuropaIE, EuroParlWebstreamIE from .europeantour import EuropeanTourIE from .eurosport import EurosportIE from .euscreen import EUScreenIE diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index c2b493765..29daabe4a 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -3,6 +3,7 @@ from ..utils import ( int_or_none, orderedSet, parse_duration, + parse_iso8601, parse_qs, qualities, unified_strdate, @@ -87,3 +88,86 @@ class EuropaIE(InfoExtractor): 'view_count': view_count, 'formats': formats } + + +class EuroParlWebstreamIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?:multimedia|webstreaming)\.europarl\.europa\.eu/[^/#?]+/ + (?:embed/embed\.html\?event=|(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+) + ''' + _TESTS = [{ + 'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY', + 'info_dict': { + 'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe', + 'ext': 'mp4', + 'release_timestamp': 1663137900, + 'title': 'Plenary session', + 'release_date': '20220914', + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/eu-cop27-un-climate-change-conference-in-sharm-el-sheikh-egypt-ep-delegation-meets-with-ngo-represen_20221114-1600-SPECIAL-OTHER', + 'info_dict': { + 'id': 'a8428de8-b9cd-6a2e-11e4-3805d9c9ff5c', + 'ext': 'mp4', + 'release_timestamp': 1668434400, + 'release_date': '20221114', + 'title': 'md5:d3550280c33cc70e0678652e3d52c028', + }, + 'params': { + 'skip_download': True, + } + }, { + # embed webpage + 'url': 'https://webstreaming.europarl.europa.eu/ep/embed/embed.html?event=20220914-0900-PLENARY&language=en&autoplay=true&logo=true', + 'info_dict': { + 'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe', + 'ext': 'mp4', + 'title': 'Plenary session', + 'release_date': '20220914', + 'release_timestamp': 1663137900, + }, + 'params': { + 'skip_download': True, + } + }, { + # live webstream + 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA', + 'info_dict': { + 'ext': 'mp4', + 'id': '510eda7f-ba72-161b-7ee7-0e836cd2e715', + 'release_timestamp': 1668502800, + 'title': 'Euroscola 2022-11-15 19:21', + 'release_date': '20221115', + 'live_status': 'is_live', + }, + 'skip': 'not live anymore' + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + json_info = self._download_json( + 'https://vis-api.vuplay.co.uk/event/external', display_id, + query={ + 'player_key': 'europarl|718f822c-a48c-4841-9947-c9cb9bb1743c', + 'external_id': display_id, + }) + + formats, subtitles = self._extract_mpd_formats_and_subtitles(json_info['streaming_url'], display_id) + fmts, subs = self._extract_m3u8_formats_and_subtitles( + json_info['streaming_url'].replace('.mpd', '.m3u8'), display_id) + + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': json_info['id'], + 'title': json_info.get('title'), + 'formats': formats, + 'subtitles': subtitles, + 'release_timestamp': parse_iso8601(json_info.get('published_start')), + 'is_live': 'LIVE' in json_info.get('state', '') + } From f0f3fa028bc54921c793de2e48a05fef5227fee5 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Sat, 10 Dec 2022 17:47:06 +0900 Subject: [PATCH 1827/2552] [extractor/netverse] Extract comments (#5568) Authored by: HobbyistDev --- yt_dlp/extractor/netverse.py | 85 ++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py index 719a9dabe..3c4fd92eb 100644 --- a/yt_dlp/extractor/netverse.py +++ b/yt_dlp/extractor/netverse.py @@ -1,3 +1,5 @@ +import itertools + from .common import InfoExtractor from .dailymotion import DailymotionIE from ..utils import smuggle_url, traverse_obj @@ -16,6 +18,26 @@ class NetverseBaseIE(InfoExtractor): f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}', display_id or slug, query=query) + def _get_comments(self, video_id): + last_page_number = None + for i in itertools.count(1): + comment_data = self._download_json( + f'https://api.netverse.id/mediadetails/api/v3/videos/comments/{video_id}', + video_id, data=b'', fatal=False, query={'page': i}, + note=f'Downloading JSON comment metadata page {i}') or {} + yield from traverse_obj(comment_data, ('response', 'comments', 'data', ..., { + 'id': '_id', + 'text': 'comment', + 'author_id': 'customer_id', + 'author': ('customer', 'name'), + 'author_thumbnail': ('customer', 'profile_picture'), + })) + + if not last_page_number: + last_page_number = traverse_obj(comment_data, ('response', 'comments', 'last_page')) + if i >= (last_page_number or 0): + break + class NetverseIE(NetverseBaseIE): _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)' @@ -28,7 +50,7 @@ class NetverseIE(NetverseBaseIE): 'ext': 'mp4', 'season': 'Season 2016', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/T7aV31Y0eGRWBbwkK/x1080', + 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', 'episode_number': 22, 'episode': 'Episode 22', 'uploader_id': 'x2ir3vq', @@ -51,7 +73,7 @@ class NetverseIE(NetverseBaseIE): 'ext': 'mp4', 'season': 'Season 2', 'description': 'md5:8a74f70812cca267e19ee0635f0af835', - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/Thwuy1YURicFmGu0v/x1080', + 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', 'episode_number': 2, 'episode': 'Episode 2', 'view_count': int, @@ -75,7 +97,7 @@ class NetverseIE(NetverseBaseIE): 'title': 'Tetangga Baru', 'season': 'Season 1', 'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9', - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/T3Ogm1YEnnyjVKAFF/x1080', + 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', 'episode_number': 1, 'episode': 'Episode 1', 'timestamp': 1624538169, @@ -96,7 +118,7 @@ class NetverseIE(NetverseBaseIE): 'info_dict': { 'id': 'x887jzz', 'ext': 'mp4', - 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/TfuZ_1Y6PboJ5An_s/x1080', + 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', 'season': 'Season 1', 'episode_number': 1, 'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5', @@ -114,6 +136,60 @@ class NetverseIE(NetverseBaseIE): 'upload_date': '20220225', }, 'skip': 'This video get Geo-blocked for some country' + }, { + # video with comments + 'url': 'https://netverse.id/video/episode-1-season-2016-ok-food', + 'info_dict': { + 'id': 'k6hetBPiQMljSxxvAy7', + 'ext': 'mp4', + 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', + 'display_id': 'episode-1-season-2016-ok-food', + 'like_count': int, + 'description': '', + 'duration': 1471, + 'age_limit': 0, + 'timestamp': 1642405848, + 'episode_number': 1, + 'season': 'Season 2016', + 'uploader_id': 'x2ir3vq', + 'title': 'Episode 1 - Season 2016 - Ok Food', + 'upload_date': '20220117', + 'tags': [], + 'view_count': int, + 'episode': 'Episode 1', + 'uploader': 'Net Prime', + 'comment_count': int, + }, + 'params':{ + 'getcomments': True + } + }, { + # video with multiple page comment + 'url': 'https://netverse.id/video/match-island-eps-1-fix', + 'info_dict': { + 'id': 'x8aznjc', + 'ext': 'mp4', + 'like_count': int, + 'tags': ['Match-Island', 'Pd00111'], + 'display_id': 'match-island-eps-1-fix', + 'view_count': int, + 'episode': 'Episode 1', + 'uploader': 'Net Prime', + 'duration': 4070, + 'timestamp': 1653068165, + 'description': 'md5:e9cf3b480ad18e9c33b999e3494f223f', + 'age_limit': 0, + 'title': 'Welcome To Match Island', + 'upload_date': '20220520', + 'episode_number': 1, + 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080', + 'uploader_id': 'x2ir3vq', + 'season': 'Season 1', + 'comment_count': int, + }, + 'params':{ + 'getcomments': True + } }] def _real_extract(self, url): @@ -131,6 +207,7 @@ class NetverseIE(NetverseBaseIE): 'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')), 'description': traverse_obj(videos, ('program_detail', 'description')), 'episode_number': videos.get('episode_order'), + '__post_extractor': self.extract_comments(display_id), } From df10bad2670d63349dc3c99a34baafe992e2fffb Mon Sep 17 00:00:00 2001 From: Denis <github@mexus.xyz> Date: Sat, 10 Dec 2022 16:17:01 +0300 Subject: [PATCH 1828/2552] [extractor/rutube] Support private videos (#5761) Authored by: mexus --- yt_dlp/extractor/rutube.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 5a4fd975e..97e6354b4 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -91,12 +91,12 @@ class RutubeBaseIE(InfoExtractor): class RutubeIE(RutubeBaseIE): IE_NAME = 'rutube' IE_DESC = 'Rutube videos' - _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})' + _VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})' _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1'] _TESTS = [{ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', - 'md5': '1d24f180fac7a02f3900712e5a5764d6', + 'md5': 'e33ac625efca66aba86cbec9851f2692', 'info_dict': { 'id': '3eac3b4561676c17df9132a9a1e62e3e', 'ext': 'mp4', @@ -108,6 +108,10 @@ class RutubeIE(RutubeBaseIE): 'timestamp': 1381943602, 'upload_date': '20131016', 'age_limit': 0, + 'view_count': int, + 'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', + 'category': ['Новости и СМИ'], + }, }, { 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', @@ -121,6 +125,24 @@ class RutubeIE(RutubeBaseIE): }, { 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', 'only_matching': True, + }, { + 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg', + 'md5': 'd106225f15d625538fe22971158e896f', + 'info_dict': { + 'id': '884fb55f07a97ab673c7d654553e0f48', + 'ext': 'mp4', + 'title': 'Яцуноками, Nioh2', + 'description': 'Nioh2: финал сражения с боссом Яцуноками', + 'duration': 15, + 'uploader': 'mexus', + 'uploader_id': '24222106', + 'timestamp': 1670646232, + 'upload_date': '20221210', + 'age_limit': 0, + 'view_count': int, + 'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg', + 'category': ['Видеоигры'], + }, }] @classmethod @@ -129,8 +151,9 @@ class RutubeIE(RutubeBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - info = self._download_and_extract_info(video_id) - info['formats'] = self._download_and_extract_formats(video_id) + query = parse_qs(url) + info = self._download_and_extract_info(video_id, query) + info['formats'] = self._download_and_extract_formats(video_id, query) return info From 81388c0954a07fbfeab09831ce350d9f91de1cdd Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Sat, 10 Dec 2022 22:40:24 +0900 Subject: [PATCH 1829/2552] [extractor/oneplace] Add OnePlacePodcast extractor (#5549) Closes #5543 Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/oneplace.py | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 yt_dlp/extractor/oneplace.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e76a80ee1..a12328f04 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1282,6 +1282,7 @@ from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE from .onefootball import OneFootballIE from .onenewsnz import OneNewsNZIE +from .oneplace import OnePlacePodcastIE from .onet import ( OnetIE, OnetChannelIE, diff --git a/yt_dlp/extractor/oneplace.py b/yt_dlp/extractor/oneplace.py new file mode 100644 index 000000000..86337ad0a --- /dev/null +++ b/yt_dlp/extractor/oneplace.py @@ -0,0 +1,43 @@ +from .common import InfoExtractor + + +class OnePlacePodcastIE(InfoExtractor): + _VALID_URL = r'https?://www\.oneplace\.com/[\w]+/[^/]+/listen/[\w-]+-(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.oneplace.com/ministries/a-daily-walk/listen/living-in-the-last-days-part-2-958461.html', + 'info_dict': { + 'id': '958461', + 'ext': 'mp3', + 'title': 'Living in the Last Days Part 2 | A Daily Walk with John Randall', + 'description': 'md5:fbb8f1cf21447ac54ecaa2887fc20c6e', + } + }, { + 'url': 'https://www.oneplace.com/ministries/ankerberg-show/listen/ep-3-relying-on-the-constant-companionship-of-the-holy-spirit-part-2-922513.html', + 'info_dict': { + 'id': '922513', + 'ext': 'mp3', + 'description': 'md5:8b810b4349aa40a5d033b4536fe428e1', + 'title': 'md5:ce10f7d8d5ddcf485ed8905ef109659d', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + return { + 'id': video_id, + 'url': self._search_regex(( + r'mp3-url\s*=\s*"([^"]+)', + r'<div[^>]+id\s*=\s*"player"[^>]+data-media-url\s*=\s*"(?P<media_url>[^"]+)', + ), webpage, 'media url'), + 'ext': 'mp3', + 'vcodec': 'none', + 'title': self._html_search_regex(( + r'<div[^>]class\s*=\s*"details"[^>]+>[^<]<h2[^>]+>(?P<content>[^>]+)>', + self._meta_regex('og:title'), self._meta_regex('title'), + ), webpage, 'title', group='content', default=None), + 'description': self._html_search_regex( + r'<div[^>]+class="[^"]+epDesc"[^>]*>\s*(?P<desc>.+?)\s*</div>', + webpage, 'description', default=None), + } From c73355510629e3eda5a79d4e2876a35316ca6ed2 Mon Sep 17 00:00:00 2001 From: Matthew <coletdjnz@protonmail.com> Date: Mon, 12 Dec 2022 23:08:14 +0000 Subject: [PATCH 1830/2552] [extractor/youtube:tab] Extract metadata from channel items (#5569) Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 83 ++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index c6c89915b..9dde34fb0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4382,6 +4382,25 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): elif key.startswith('grid') and key.endswith('Renderer'): return renderer + def _extract_channel_renderer(self, renderer): + channel_id = renderer['channelId'] + title = self._get_text(renderer, 'title') + channel_url = f'https://www.youtube.com/channel/{channel_id}' + return { + '_type': 'url', + 'url': channel_url, + 'id': channel_id, + 'ie_key': YoutubeTabIE.ie_key(), + 'channel': title, + 'channel_id': channel_id, + 'channel_url': channel_url, + 'title': title, + 'channel_follower_count': self._get_count(renderer, 'subscriberCountText'), + 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'), + 'playlist_count': self._get_count(renderer, 'videoCountText'), + 'description': self._get_text(renderer, 'descriptionSnippet'), + } + def _grid_entries(self, grid_renderer): for item in grid_renderer['items']: if not isinstance(item, dict): @@ -4407,9 +4426,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): # channel channel_id = renderer.get('channelId') if channel_id: - yield self.url_result( - 'https://www.youtube.com/channel/%s' % channel_id, - ie=YoutubeTabIE.ie_key(), video_title=title) + yield self._extract_channel_renderer(renderer) continue # generic endpoint URL support ep_url = urljoin('https://www.youtube.com/', try_get( @@ -5762,7 +5779,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader': 'cole-dlp-test-acc', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel': 'cole-dlp-test-acc', - 'channel_follower_count': int, }, 'playlist_mincount': 1, 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}}, @@ -5930,7 +5946,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'title': 'cole-dlp-test-acc - Shorts', 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel': 'cole-dlp-test-acc', - 'channel_follower_count': int, 'description': 'test description', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', @@ -5976,8 +5991,40 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': str, } }], - 'params': {'extract_flat': True}, + 'params': {'extract_flat': True, 'playlist_items': '1'}, 'playlist_mincount': 1 + }, { + # Channel renderer metadata. Contains number of videos on the channel + 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels', + 'info_dict': { + 'id': 'UCiu-3thuViMebBjw_5nWYrA', + 'title': 'cole-dlp-test-acc - Channels', + 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA', + 'channel': 'cole-dlp-test-acc', + 'description': 'test description', + 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', + 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', + 'tags': [], + 'uploader': 'cole-dlp-test-acc', + 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', + + }, + 'playlist': [{ + 'info_dict': { + '_type': 'url', + 'ie_key': 'YoutubeTab', + 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw', + 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw', + 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw', + 'title': 'PewDiePie', + 'channel': 'PewDiePie', + 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw', + 'thumbnails': list, + 'channel_follower_count': int, + 'playlist_count': int + } + }], + 'params': {'extract_flat': True}, }] @classmethod @@ -6531,6 +6578,30 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): # 'title': '#cats', # }], }, + }, { + # Channel results + 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D', + 'info_dict': { + 'id': 'kurzgesagt', + 'title': 'kurzgesagt', + }, + 'playlist': [{ + 'info_dict': { + '_type': 'url', + 'id': 'UCsXVk37bltHxD1rDPwtNM8Q', + 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q', + 'ie_key': 'YoutubeTab', + 'channel': 'Kurzgesagt – In a Nutshell', + 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc', + 'title': 'Kurzgesagt – In a Nutshell', + 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q', + 'playlist_count': int, # XXX: should have a way of saying > 1 + 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q', + 'thumbnails': list + } + }], + 'params': {'extract_flat': True, 'playlist_items': '1'}, + 'playlist_mincount': 1, }, { 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', 'only_matching': True, From 5424dbaf91728aaf77458e68d993ba6c34e8e222 Mon Sep 17 00:00:00 2001 From: Lesmiscore <nao20010128@gmail.com> Date: Mon, 19 Dec 2022 11:36:14 +0900 Subject: [PATCH 1831/2552] Deprioritize HEVC-over-FLV formats (#5823) Authored by: Lesmiscore --- yt_dlp/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9697ba1c1..65408bf19 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6307,6 +6307,12 @@ class FormatSorter: # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported? # format['preference'] = -1000 + if format.get('preference') is None and format.get('ext') == 'flv' and re.match('[hx]265|he?vc?', format.get('vcodec') or ''): + # HEVC-over-FLV is out-of-spec by FLV's original spec + # ref. https://trac.ffmpeg.org/ticket/6389 + # ref. https://github.com/yt-dlp/yt-dlp/pull/5821 + format['preference'] = -100 + # Determine missing bitrates if format.get('tbr') is None: if format.get('vbr') is not None and format.get('abr') is not None: From 1fc089143c79b02b8373ae1d785d5e3a68635d4d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 21 Dec 2022 00:55:47 +0000 Subject: [PATCH 1832/2552] [extractor/reddit] Extract crossposted media (#5801) Closes #5798 Authored by: bashonly --- yt_dlp/extractor/reddit.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index f1a5c852a..fcfee51e8 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -64,6 +64,25 @@ class RedditIE(InfoExtractor): 'id': 'wzqkxp', 'title': 'md5:72d3d19402aa11eff5bd32fc96369b37', }, + }, { + # crossposted reddit-hosted media + 'url': 'https://www.reddit.com/r/dumbfuckers_club/comments/zjjw82/cringe/', + 'md5': '746180895c7b75a9d6b05341f507699a', + 'info_dict': { + 'id': 'a1oneun6pa5a1', + 'ext': 'mp4', + 'display_id': 'zjjw82', + 'title': 'Cringe', + 'uploader': 'Otaku-senpai69420', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'upload_date': '20221212', + 'timestamp': 1670812309, + 'duration': 16, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 0, + }, }, { 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', 'only_matching': True, @@ -179,7 +198,8 @@ class RedditIE(InfoExtractor): raise ExtractorError('No media found', expected=True) # Check if media is hosted on reddit: - reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False) + reddit_video = traverse_obj(data, ( + (None, ('crosspost_parent_list', ...)), ('secure_media', 'media'), 'reddit_video'), get_all=False) if reddit_video: playlist_urls = [ try_get(reddit_video, lambda x: unescapeHTML(x[y])) From 0b5546c723b9fb212e7e0199dbdaae8b8e0bf206 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 15 Dec 2022 19:58:57 +0530 Subject: [PATCH 1833/2552] [extractor] Let `_extract_format` functions obey `--ignore-no-formats` --- yt_dlp/extractor/common.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3910c55ad..9031f3c11 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1759,6 +1759,9 @@ class InfoExtractor: def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None, data=None, headers={}, query={}): + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_xml_handle( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1908,6 +1911,9 @@ class InfoExtractor: errnote=None, fatal=True, live=False, data=None, headers={}, query={}): + if self.get_param('ignore_no_formats_error'): + fatal = False + if not m3u8_url: if errnote is not False: errnote = errnote or 'Failed to obtain m3u8 URL' @@ -2187,6 +2193,9 @@ class InfoExtractor: return '/'.join(out) def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None): + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) if res is False: assert not fatal @@ -2462,6 +2471,10 @@ class InfoExtractor: def _extract_mpd_formats_and_subtitles( self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): + + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_xml_handle( mpd_url, video_id, note='Downloading MPD manifest' if note is None else note, @@ -2831,6 +2844,9 @@ class InfoExtractor: return fmts def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_xml_handle( ism_url, video_id, note='Downloading ISM manifest' if note is None else note, From 69f5fe45b98ef3ecb8e5ac69ebebdce7733a3ae4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 20 Dec 2022 00:41:45 +0530 Subject: [PATCH 1834/2552] [FFmpegVideoConvertor] Add `gif` to `--recode-video` --- README.md | 10 +++++----- yt_dlp/postprocessor/ffmpeg.py | 5 ++++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c0a2a420b..440ed1934 100644 --- a/README.md +++ b/README.md @@ -893,11 +893,11 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if necessary (currently supported: avi, flv, - mkv, mov, mp4, webm, aac, aiff, alac, flac, - m4a, mka, mp3, ogg, opus, vorbis, wav). If - target container does not support the - video/audio codec, remuxing will fail. You - can specify multiple rules; e.g. + gif, mkv, mov, mp4, webm, aac, aiff, alac, + flac, m4a, mka, mp3, ogg, opus, vorbis, + wav). If target container does not support + the video/audio codec, remuxing will fail. + You can specify multiple rules; e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv --recode-video FORMAT Re-encode the video into another format if diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 67890fc31..069066e0c 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -538,7 +538,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): class FFmpegVideoConvertorPP(FFmpegPostProcessor): - SUPPORTED_EXTS = (*MEDIA_EXTENSIONS.common_video, *sorted(MEDIA_EXTENSIONS.common_audio + ('aac', 'vorbis'))) + SUPPORTED_EXTS = ( + *sorted((*MEDIA_EXTENSIONS.common_video, 'gif')), + *sorted((*MEDIA_EXTENSIONS.common_audio, 'aac', 'vorbis')), + ) FORMAT_RE = create_mapping_re(SUPPORTED_EXTS) _ACTION = 'converting' From 8791e78cccd68db8161f06dc8567280e0d99a5e1 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 21 Dec 2022 20:30:26 +0530 Subject: [PATCH 1835/2552] Fix `original_url` in playlists --- yt_dlp/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8d28783d8..abb0ddfe5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1626,8 +1626,8 @@ class YoutubeDL: if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url( ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https') - if ie_result.get('original_url'): - extra_info.setdefault('original_url', ie_result['original_url']) + if ie_result.get('original_url') and not extra_info.get('original_url'): + extra_info = {'original_url': ie_result['original_url'], **extra_info} extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) From 1c226ccdd464c09218a33824aedbcf3aa305a678 Mon Sep 17 00:00:00 2001 From: skbeh <60107333+skbeh@users.noreply.github.com> Date: Sat, 24 Dec 2022 18:47:37 +0800 Subject: [PATCH 1836/2552] [extractor/bilibili] Improve `_VALID_URL` (#5820) Authored by: skbeh --- yt_dlp/extractor/bilibili.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index bc0424194..616a54960 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1034,7 +1034,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): class BiliLiveIE(InfoExtractor): - _VALID_URL = r'https?://live.bilibili.com/(?P<id>\d+)' + _VALID_URL = r'https?://live.bilibili.com/(blanc/)?(?P<id>\d+)' _TESTS = [{ 'url': 'https://live.bilibili.com/196', @@ -1050,6 +1050,9 @@ class BiliLiveIE(InfoExtractor): }, { 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', 'only_matching': True + }, { + 'url': 'https://live.bilibili.com/blanc/196', + 'only_matching': True }] _FORMATS = { From d61ef7f34395eae33810ec16397f86c54bf06af6 Mon Sep 17 00:00:00 2001 From: Giulio Muscarello <capacitorset@gmail.com> Date: Sat, 24 Dec 2022 11:49:10 +0100 Subject: [PATCH 1837/2552] [extractor/ARD] Add vtt subtitles (#5835) Authored by: CapacitorSet --- yt_dlp/extractor/ard.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 0a8a8746a..8660741ce 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -46,6 +46,9 @@ class ARDMediathekBaseIE(InfoExtractor): subtitles['de'] = [{ 'ext': 'ttml', 'url': subtitle_url, + }, { + 'ext': 'vtt', + 'url': subtitle_url.replace('/ebutt/', '/webvtt/') + '.vtt', }] return { @@ -286,16 +289,16 @@ class ARDMediathekIE(ARDMediathekBaseIE): class ARDIE(InfoExtractor): _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html' _TESTS = [{ - # available till 7.01.2022 - 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html', - 'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1', + # available till 7.12.2023 + 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', + 'md5': 'a438f671e87a7eba04000336a119ccc4', 'info_dict': { - 'id': 'maischberger-die-woche-video100', - 'display_id': 'maischberger-die-woche-video100', + 'id': 'maischberger-video-424', + 'display_id': 'maischberger-video-424', 'ext': 'mp4', - 'duration': 3687.0, - 'title': 'maischberger. die woche vom 7. Januar 2021', - 'upload_date': '20210107', + 'duration': 4452.0, + 'title': 'maischberger am 07.12.2022', + 'upload_date': '20221207', 'thumbnail': r're:^https?://.*\.jpg$', }, }, { From 9012d20b23b01827c8d75b460da22485c5cc80ef Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 27 Dec 2022 03:01:08 +0530 Subject: [PATCH 1838/2552] [extractor/mixch] Support `--wait-for-video` --- yt_dlp/extractor/mixch.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index 3f430a717..7eedbc752 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -32,8 +32,10 @@ class MixchIE(InfoExtractor): initial_js_state = self._parse_json(self._search_regex( r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id) - if not initial_js_state.get('liveInfo'): - raise ExtractorError('Livestream has ended.', expected=True) + + is_live = initial_js_state.get('liveInfo') + if not is_live: + self.raise_no_formats('Livestream has ended or has not started', expected=True) return { 'id': video_id, @@ -48,8 +50,8 @@ class MixchIE(InfoExtractor): 'url': traverse_obj(initial_js_state, ('liveInfo', 'hls')) or 'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_%s.m3u8' % video_id, 'ext': 'mp4', 'protocol': 'm3u8', - }], - 'is_live': True, + }] if is_live else [], + 'live_status': 'is_live' if is_live else 'is_upcoming', } From 4af47a00038dfbe6a243119e499f2e876e0f2766 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 27 Dec 2022 10:13:22 +0530 Subject: [PATCH 1839/2552] Fix 9012d20b23b01827c8d75b460da22485c5cc80ef --- yt_dlp/extractor/mixch.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index 7eedbc752..4be694728 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - ExtractorError, - traverse_obj, -) +from ..utils import UserNotLive, traverse_obj class MixchIE(InfoExtractor): @@ -32,10 +29,8 @@ class MixchIE(InfoExtractor): initial_js_state = self._parse_json(self._search_regex( r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id) - - is_live = initial_js_state.get('liveInfo') - if not is_live: - self.raise_no_formats('Livestream has ended or has not started', expected=True) + if not initial_js_state.get('liveInfo'): + raise UserNotLive(video_id=video_id) return { 'id': video_id, @@ -47,11 +42,12 @@ class MixchIE(InfoExtractor): 'uploader_id': video_id, 'formats': [{ 'format_id': 'hls', - 'url': traverse_obj(initial_js_state, ('liveInfo', 'hls')) or 'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_%s.m3u8' % video_id, + 'url': (traverse_obj(initial_js_state, ('liveInfo', 'hls')) + or f'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_{video_id}.m3u8'), 'ext': 'mp4', 'protocol': 'm3u8', - }] if is_live else [], - 'live_status': 'is_live' if is_live else 'is_upcoming', + }], + 'is_live': True, } From 032f22020c3aaf0c1be1bb500498d13782d01c73 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Tue, 27 Dec 2022 15:25:09 +0900 Subject: [PATCH 1840/2552] [extractor/trtcocuk] Add extractor (#5009) Closes #2635 Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/trtcocuk.py | 48 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 yt_dlp/extractor/trtcocuk.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a12328f04..63c7abb10 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1905,6 +1905,7 @@ from .trovo import ( TrovoChannelVodIE, TrovoChannelClipIE, ) +from .trtcocuk import TrtCocukVideoIE from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE diff --git a/yt_dlp/extractor/trtcocuk.py b/yt_dlp/extractor/trtcocuk.py new file mode 100644 index 000000000..f27f5a1e3 --- /dev/null +++ b/yt_dlp/extractor/trtcocuk.py @@ -0,0 +1,48 @@ +from .common import InfoExtractor +from ..utils import ExtractorError, int_or_none, parse_iso8601, traverse_obj + + +class TrtCocukVideoIE(InfoExtractor): + _VALID_URL = r'https?://www\.trtcocuk\.net\.tr/video/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://www.trtcocuk.net.tr/video/kaptan-pengu-ve-arkadaslari-1', + 'info_dict': { + 'id': '3789738', + 'ext': 'mp4', + 'season_number': 1, + 'series': '"Kaptan Pengu ve Arkadaşları"', + 'season': 'Season 1', + 'title': 'Kaptan Pengu ve Arkadaşları 1 Bölüm İzle TRT Çocuk', + 'release_date': '20201209', + 'release_timestamp': 1607513774, + } + }, { + 'url': 'https://www.trtcocuk.net.tr/video/sef-rokanin-lezzet-dunyasi-17', + 'info_dict': { + 'id': '10260842', + 'ext': 'mp4', + 'series': '"Şef Roka\'nın Lezzet Dünyası"', + 'title': 'Şef Roka\'nın Lezzet Dünyası 17 Bölüm İzle TRT Çocuk', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + nuxtjs_data = self._search_nuxt_data(webpage, display_id)['data'] + + try: + video_url = self._parse_json(nuxtjs_data['video'], display_id) + except ExtractorError: + video_url = nuxtjs_data['video'] + formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id) + + return { + 'id': str(nuxtjs_data['id']), + 'formats': formats, + 'subtitles': subtitles, + 'season_number': int_or_none(nuxtjs_data.get('season')), + 'release_timestamp': parse_iso8601(nuxtjs_data.get('publishedDate')), + 'series': traverse_obj(nuxtjs_data, ('show', 0, 'title')), + 'title': self._html_extract_title(webpage) # TODO: get better title + } From 247c8dd4f548436e2cf0f2e55a80aa37ec62555a Mon Sep 17 00:00:00 2001 From: barsnick <barsnick@users.noreply.github.com> Date: Tue, 27 Dec 2022 07:34:01 +0100 Subject: [PATCH 1841/2552] [extractor/urplay] Support for audio-only formats (#4606) Closes #4605 Authored by: barsnick --- yt_dlp/extractor/urplay.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index 0f0d6592d..5d69dadd6 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -14,12 +14,13 @@ class URPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand', - 'md5': 'ff5b0c89928f8083c74bbd5099c9292d', + 'md5': '5ba36643c77cc3d34ffeadad89937d1e', 'info_dict': { 'id': '203704', 'ext': 'mp4', 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', + 'thumbnail': r're:^https?://.+\.jpg', 'timestamp': 1513292400, 'upload_date': '20171214', 'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik', @@ -29,6 +30,24 @@ class URPlayIE(InfoExtractor): 'episode': 'Om vetenskap, kritiskt tänkande och motstånd', 'age_limit': 15, }, + }, { + 'url': 'https://urplay.se/program/222967-en-foralders-dagbok-mitt-barn-skadar-sig-sjalv', + 'info_dict': { + 'id': '222967', + 'ext': 'mp4', + 'title': 'En förälders dagbok : Mitt barn skadar sig själv', + 'description': 'md5:9f771eef03a732a213b367b52fe826ca', + 'thumbnail': r're:^https?://.+\.jpg', + 'timestamp': 1629676800, + 'upload_date': '20210823', + 'series': 'En förälders dagbok', + 'duration': 1740, + 'age_limit': 15, + 'episode_number': 3, + 'categories': 'count:2', + 'tags': 'count:7', + 'episode': 'Mitt barn skadar sig själv', + }, }, { 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', 'info_dict': { @@ -36,12 +55,17 @@ class URPlayIE(InfoExtractor): 'ext': 'mp4', 'title': 'Tripp, Trapp, Träd : Sovkudde', 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', + 'thumbnail': r're:^https?://.+\.jpg', 'timestamp': 1440086400, 'upload_date': '20150820', 'series': 'Tripp, Trapp, Träd', 'duration': 865, + 'age_limit': 1, + 'episode_number': 1, + 'categories': [], 'tags': ['Sova'], 'episode': 'Sovkudde', + 'season': 'Säsong 1', }, }, { 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden', @@ -69,7 +93,7 @@ class URPlayIE(InfoExtractor): urplayer_streams = urplayer_data.get('streamingInfo', {}) for k, v in urplayer_streams.get('raw', {}).items(): - if not (k in ('sd', 'hd') and isinstance(v, dict)): + if not (k in ('sd', 'hd', 'mp3', 'm4a') and isinstance(v, dict)): continue file_http = v.get('location') if file_http: From 0ef3d470272694533301294e733e96343dab57af Mon Sep 17 00:00:00 2001 From: Bobscorn <qwertster0@gmail.com> Date: Tue, 27 Dec 2022 20:04:56 +1300 Subject: [PATCH 1842/2552] [extractor/beatbump] Add extractors (#5304) Authored by: Bobscorn, pukkandan Closes #4653 --- yt_dlp/extractor/_extractors.py | 4 ++ yt_dlp/extractor/beatbump.py | 101 ++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 yt_dlp/extractor/beatbump.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 63c7abb10..71cd54bf4 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -184,6 +184,10 @@ from .bbc import ( from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE +from .beatbump import ( + BeatBumpVideoIE, + BeatBumpPlaylistIE, +) from .beatport import BeatportIE from .berufetv import BerufeTVIE from .bet import BetIE diff --git a/yt_dlp/extractor/beatbump.py b/yt_dlp/extractor/beatbump.py new file mode 100644 index 000000000..0f40ebe7a --- /dev/null +++ b/yt_dlp/extractor/beatbump.py @@ -0,0 +1,101 @@ +from .common import InfoExtractor +from .youtube import YoutubeIE, YoutubeTabIE + + +class BeatBumpVideoIE(InfoExtractor): + _VALID_URL = r'https://beatbump\.ml/listen\?id=(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs', + 'md5': '5ff3fff41d3935b9810a9731e485fe66', + 'info_dict': { + 'id': 'MgNrAu2pzNs', + 'ext': 'mp4', + 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA', + 'artist': 'Stephen', + 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', + 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA', + 'upload_date': '20190312', + 'categories': ['Music'], + 'playable_in_embed': True, + 'duration': 169, + 'like_count': int, + 'alt_title': 'Voyeur Girl', + 'view_count': int, + 'track': 'Voyeur Girl', + 'uploader': 'Stephen - Topic', + 'title': 'Voyeur Girl', + 'channel_follower_count': int, + 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA', + 'age_limit': 0, + 'availability': 'public', + 'live_status': 'not_live', + 'album': 'it\'s too much love to know my dear', + 'channel': 'Stephen', + 'comment_count': int, + 'description': 'md5:7ae382a65843d6df2685993e90a8628f', + 'tags': 'count:11', + 'creator': 'Stephen', + 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', + } + }] + + def _real_extract(self, url): + id_ = self._match_id(url) + return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_) + + +class BeatBumpPlaylistIE(InfoExtractor): + _VALID_URL = r'https://beatbump\.ml/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE', + 'playlist_count': 50, + 'info_dict': { + 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', + 'availability': 'unlisted', + 'view_count': int, + 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', + 'description': '', + 'tags': [], + 'modified_date': '20221223', + } + }, { + 'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'playlist_mincount': 1, + 'params': {'flatplaylist': True}, + 'info_dict': { + 'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'uploader_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'channel_follower_count': int, + 'title': 'NoCopyrightSounds - Videos', + 'uploader': 'NoCopyrightSounds', + 'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a', + 'channel': 'NoCopyrightSounds', + 'tags': 'count:12', + 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + }, + }, { + 'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'playlist_mincount': 1, + 'params': {'flatplaylist': True}, + 'info_dict': { + 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds', + 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', + 'view_count': int, + 'channel_url': 'https://www.youtube.com/@NoCopyrightSounds', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'title': 'NCS : All Releases 💿', + 'uploader': 'NoCopyrightSounds', + 'availability': 'public', + 'channel': 'NoCopyrightSounds', + 'tags': [], + 'modified_date': '20221225', + 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + } + }] + + def _real_extract(self, url): + id_ = self._match_id(url) + return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_) From 15e9e578c04f1fa3f408dc3ec99491cc3f0ba839 Mon Sep 17 00:00:00 2001 From: chris <6024426+iw0nderhow@users.noreply.github.com> Date: Tue, 27 Dec 2022 20:52:58 +0100 Subject: [PATCH 1843/2552] [extractor/ArteTV] Extract chapters (#5879) Authored by: iw0nderhow, bashonly --- yt_dlp/extractor/arte.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 54e4d2d0c..dfbfe03c3 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -65,6 +65,21 @@ class ArteTVIE(ArteTVBaseIE): }, { 'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE', 'only_matching': True, + }, { + 'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/', + 'info_dict': { + 'id': '110203-006-A', + 'chapters': 'count:16', + 'description': 'md5:cf592f1df52fe52007e3f8eac813c084', + 'alt_title': 'Zaz', + 'title': 'Baloise Session 2022', + 'timestamp': 1668445200, + 'duration': 4054, + 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530', + 'upload_date': '20221114', + 'ext': 'mp4', + }, + 'expected_warnings': ['geo restricted'] }] _GEO_BYPASS = True @@ -180,9 +195,6 @@ class ArteTVIE(ArteTVBaseIE): else: self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}') - # TODO: chapters from stream['segments']? - # The JS also looks for chapters in config['data']['attributes']['chapters'], - # but I am yet to find a video having those formats.extend(secondary_formats) self._remove_duplicate_formats(formats) @@ -205,6 +217,11 @@ class ArteTVIE(ArteTVBaseIE): {'url': image['url'], 'id': image.get('caption')} for image in metadata.get('images') or [] if url_or_none(image.get('url')) ], + # TODO: chapters may also be in stream['segments']? + 'chapters': traverse_obj(config, ('data', 'attributes', 'chapters', 'elements', ..., { + 'start_time': 'startTime', + 'title': 'title', + })) or None, } From da8d2de2082ab55f11d76d0aef7e6c3614672b45 Mon Sep 17 00:00:00 2001 From: "lauren n. liberda" <lauren@selfisekai.rocks> Date: Tue, 27 Dec 2022 20:57:26 +0100 Subject: [PATCH 1844/2552] [extractor/cda] Support premium and misc improvements (#5529) * Fix cache for non-ASCII key * Improve error messages * Better UA for fingerprint bypass Authored by: selfisekai --- yt_dlp/cache.py | 9 ++++---- yt_dlp/extractor/cda.py | 47 +++++++++++++++++++++++++++++++++++------ 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index 4f9fb78d3..7be91eae5 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -5,6 +5,7 @@ import os import re import shutil import traceback +import urllib.parse from .utils import expand_path, traverse_obj, version_tuple, write_json_file from .version import __version__ @@ -22,11 +23,9 @@ class Cache: return expand_path(res) def _get_cache_fn(self, section, key, dtype): - assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ - 'invalid section %r' % section - assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key - return os.path.join( - self._get_root_dir(), section, f'{key}.{dtype}') + assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}' + key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters + return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}') @property def enabled(self): diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index d1212e686..1157114b2 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -4,6 +4,7 @@ import datetime import hashlib import hmac import json +import random import re from .common import InfoExtractor @@ -27,11 +28,10 @@ class CDAIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' _NETRC_MACHINE = 'cdapl' - _BASE_URL = 'http://www.cda.pl/' + _BASE_URL = 'https://www.cda.pl' _BASE_API_URL = 'https://api.cda.pl' _API_HEADERS = { 'Accept': 'application/vnd.cda.public+json', - 'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)', } # hardcoded in the app _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q' @@ -101,6 +101,38 @@ class CDAIE(InfoExtractor): }, **kwargs) def _perform_login(self, username, password): + app_version = random.choice(( + '1.2.88 build 15306', + '1.2.174 build 18469', + )) + android_version = random.randrange(8, 14) + phone_model = random.choice(( + # x-kom.pl top selling Android smartphones, as of 2022-12-26 + # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android + 'ASUS ZenFone 8', + 'Motorola edge 20 5G', + 'Motorola edge 30 neo 5G', + 'Motorola moto g22', + 'OnePlus Nord 2T 5G', + 'Samsung Galaxy A32 SM‑A325F', + 'Samsung Galaxy M13', + 'Samsung Galaxy S20 FE 5G', + 'Xiaomi 11T', + 'Xiaomi POCO M4 Pro', + 'Xiaomi Redmi 10', + 'Xiaomi Redmi 10C', + 'Xiaomi Redmi 9C NFC', + 'Xiaomi Redmi Note 10 Pro', + 'Xiaomi Redmi Note 11 Pro', + 'Xiaomi Redmi Note 11', + 'Xiaomi Redmi Note 11S 5G', + 'Xiaomi Redmi Note 11S', + 'realme 10', + 'realme 9 Pro+', + 'vivo Y33s', + )) + self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})' + cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {} if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5: self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}' @@ -138,9 +170,6 @@ class CDAIE(InfoExtractor): meta = self._download_json( f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video'] - if meta.get('premium') and not meta.get('premium_free'): - self.report_drm(video_id) - uploader = traverse_obj(meta, 'author', 'login') formats = [{ @@ -151,6 +180,10 @@ class CDAIE(InfoExtractor): 'filesize': quality.get('length'), } for quality in meta['qualities'] if quality.get('file')] + if meta.get('premium') and not meta.get('premium_free') and not formats: + raise ExtractorError( + 'Video requires CDA Premium - subscription needed', expected=True) + return { 'id': video_id, 'title': meta.get('title'), @@ -167,10 +200,10 @@ class CDAIE(InfoExtractor): def _web_extract(self, video_id, url): self._set_cookie('cda.pl', 'cda.player', 'html5') webpage = self._download_webpage( - self._BASE_URL + '/video/' + video_id, video_id) + f'{self._BASE_URL}/video/{video_id}/vfilm', video_id) if 'Ten film jest dostępny dla użytkowników premium' in webpage: - raise ExtractorError('This video is only available for premium users.', expected=True) + self.raise_login_required('This video is only available for premium users') if re.search(r'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage): self.raise_geo_restricted() From d1b5f3d79cb33f393f17aa12df24fca33c7ef3aa Mon Sep 17 00:00:00 2001 From: "lauren n. liberda" <lauren@selfisekai.rocks> Date: Tue, 27 Dec 2022 21:47:25 +0100 Subject: [PATCH 1845/2552] [extractor/polskieradio] Adapt to next.js redesigns (#5416) Authored by: selfisekai --- yt_dlp/extractor/_extractors.py | 2 + yt_dlp/extractor/arte.py | 1 - yt_dlp/extractor/polskieradio.py | 213 ++++++++++++++++++++++++------- 3 files changed, 167 insertions(+), 49 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 71cd54bf4..ea1d0a2df 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1409,6 +1409,8 @@ from .pokergo import ( from .polsatgo import PolsatGoIE from .polskieradio import ( PolskieRadioIE, + PolskieRadioLegacyIE, + PolskieRadioAuditionIE, PolskieRadioCategoryIE, PolskieRadioPlayerIE, PolskieRadioPodcastIE, diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index dfbfe03c3..e3cc5afb0 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -195,7 +195,6 @@ class ArteTVIE(ArteTVBaseIE): else: self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}') - formats.extend(secondary_formats) self._remove_duplicate_formats(formats) diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index 99244f6b4..68c4a2afd 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -10,6 +10,7 @@ from ..compat import ( compat_urlparse ) from ..utils import ( + determine_ext, extract_attributes, ExtractorError, InAdvancePagedList, @@ -17,6 +18,7 @@ from ..utils import ( js_to_json, parse_iso8601, strip_or_none, + traverse_obj, unified_timestamp, unescapeHTML, url_or_none, @@ -48,28 +50,11 @@ class PolskieRadioBaseExtractor(InfoExtractor): yield entry -class PolskieRadioIE(PolskieRadioBaseExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)' - _TESTS = [{ # Old-style single broadcast. - 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', - 'info_dict': { - 'id': '1587943', - 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', - 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', - }, - 'playlist': [{ - 'md5': '2984ee6ce9046d91fc233bc1a864a09a', - 'info_dict': { - 'id': '1540576', - 'ext': 'mp3', - 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', - 'timestamp': 1456594200, - 'upload_date': '20160227', - 'duration': 2364, - 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' - }, - }], - }, { # New-style single broadcast. +class PolskieRadioLegacyIE(PolskieRadioBaseExtractor): + # legacy sites + IE_NAME = 'polskieradio:legacy' + _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/[Aa]rtykul/(?P<id>\d+)' + _TESTS = [{ 'url': 'https://www.polskieradio.pl/8/2382/Artykul/2534482,Zagarysci-Poezja-jak-spoiwo', 'info_dict': { 'id': '2534482', @@ -96,16 +81,6 @@ class PolskieRadioIE(PolskieRadioBaseExtractor): 'ext': 'mp3', 'title': 'Pogłos 29 października godz. 23:01', }, - }, { - 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', - 'only_matching': True, - }, { - 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943', - 'only_matching': True, - }, { - # with mp4 video - 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', - 'only_matching': True, }, { 'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci', 'only_matching': True, @@ -114,7 +89,9 @@ class PolskieRadioIE(PolskieRadioBaseExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) + webpage, urlh = self._download_webpage_handle(url, playlist_id) + if PolskieRadioIE.suitable(urlh.url): + return self.url_result(urlh.url, PolskieRadioIE, playlist_id) content = self._search_regex( r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>', @@ -153,23 +130,160 @@ class PolskieRadioIE(PolskieRadioBaseExtractor): return self.playlist_result(entries, playlist_id, title, description) -class PolskieRadioCategoryIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)' +class PolskieRadioIE(InfoExtractor): + # new next.js sites, excluding radiokierowcow.pl + _VALID_URL = r'https?://(?:[^/]+\.)?polskieradio(?:24)?\.pl/artykul/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://jedynka.polskieradio.pl/artykul/1587943', + 'info_dict': { + 'id': '1587943', + 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', + 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', + }, + 'playlist': [{ + 'md5': '2984ee6ce9046d91fc233bc1a864a09a', + 'info_dict': { + 'id': '7a85d429-5356-4def-a347-925e4ae7406b', + 'ext': 'mp3', + 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', + }, + }], + }, { + 'url': 'https://trojka.polskieradio.pl/artykul/1632955', + 'only_matching': True, + }, { + # with mp4 video + 'url': 'https://trojka.polskieradio.pl/artykul/1634903', + 'only_matching': True, + }, { + 'url': 'https://jedynka.polskieradio.pl/artykul/3042436,Polityka-wschodnia-ojca-i-syna-Wladyslawa-Lokietka-i-Kazimierza-Wielkiego', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + article_data = traverse_obj( + self._search_nextjs_data(webpage, playlist_id), ('props', 'pageProps', 'data', 'articleData')) + + title = strip_or_none(article_data['title']) + + description = strip_or_none(article_data.get('lead')) + + entries = [{ + 'url': entry['file'], + 'ext': determine_ext(entry.get('fileName')), + 'id': self._search_regex( + r'([a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12})', entry['file'], 'entry id'), + 'title': strip_or_none(entry.get('description')) or title, + } for entry in article_data.get('attachments') or () if entry['fileType'] in ('Audio', )] + + return self.playlist_result(entries, playlist_id, title, description) + + +class PolskieRadioAuditionIE(InfoExtractor): + # new next.js sites + IE_NAME = 'polskieradio:audition' + _VALID_URL = r'https?://(?:[^/]+\.)?polskieradio\.pl/audycj[ae]/(?P<id>\d+)' _TESTS = [{ - 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', + # articles, PR1 + 'url': 'https://jedynka.polskieradio.pl/audycje/5102', 'info_dict': { 'id': '5102', - 'title': 'HISTORIA ŻYWA', + 'title': 'Historia żywa', + 'thumbnail': r're:https://static\.prsa\.pl/images/.+', }, 'playlist_mincount': 38, }, { - 'url': 'http://www.polskieradio.pl/7/4807', + # episodes, PR1 + 'url': 'https://jedynka.polskieradio.pl/audycje/5769', 'info_dict': { - 'id': '4807', - 'title': 'Vademecum 1050. rocznicy Chrztu Polski' + 'id': '5769', + 'title': 'AgroFakty', + 'thumbnail': r're:https://static\.prsa\.pl/images/.+', }, - 'playlist_mincount': 5 + 'playlist_mincount': 269, }, { + # both episodes and articles, PR3 + 'url': 'https://trojka.polskieradio.pl/audycja/8906', + 'info_dict': { + 'id': '8906', + 'title': 'Trójka budzi', + 'thumbnail': r're:https://static\.prsa\.pl/images/.+', + }, + 'playlist_mincount': 722, + }] + + def _call_lp3(self, path, query, video_id, note): + return self._download_json( + f'https://lp3test.polskieradio.pl/{path}', video_id, note, + query=query, headers={'x-api-key': '9bf6c5a2-a7d0-4980-9ed7-a3f7291f2a81'}) + + def _entries(self, playlist_id, has_episodes, has_articles): + for i in itertools.count(1) if has_episodes else []: + page = self._call_lp3( + 'AudioArticle/GetListByCategoryId', { + 'categoryId': playlist_id, + 'PageSize': 10, + 'skip': i, + 'format': 400, + }, playlist_id, f'Downloading episode list page {i}') + if not traverse_obj(page, 'data'): + break + for episode in page['data']: + yield { + 'id': str(episode['id']), + 'url': episode['file'], + 'title': episode.get('title'), + 'duration': int_or_none(episode.get('duration')), + 'timestamp': parse_iso8601(episode.get('datePublic')), + } + + for i in itertools.count(1) if has_articles else []: + page = self._call_lp3( + 'Article/GetListByCategoryId', { + 'categoryId': playlist_id, + 'PageSize': 9, + 'skip': i, + 'format': 400, + }, playlist_id, f'Downloading article list page {i}') + if not traverse_obj(page, 'data'): + break + for article in page['data']: + yield { + '_type': 'url_transparent', + 'ie_key': PolskieRadioIE.ie_key(), + 'id': str(article['id']), + 'url': article['url'], + 'title': article.get('shortTitle'), + 'description': traverse_obj(article, ('description', 'lead')), + 'timestamp': parse_iso8601(article.get('datePublic')), + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + page_props = traverse_obj( + self._search_nextjs_data(self._download_webpage(url, playlist_id), playlist_id), + ('props', 'pageProps', ('data', None)), get_all=False) + + has_episodes = bool(traverse_obj(page_props, 'episodes', 'audios')) + has_articles = bool(traverse_obj(page_props, 'articles')) + + return self.playlist_result( + self._entries(playlist_id, has_episodes, has_articles), playlist_id, + title=traverse_obj(page_props, ('details', 'name')), + description=traverse_obj(page_props, ('details', 'description', 'lead')), + thumbnail=traverse_obj(page_props, ('details', 'photo'))) + + +class PolskieRadioCategoryIE(InfoExtractor): + # legacy sites + IE_NAME = 'polskieradio:category' + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)' + _TESTS = [{ 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', 'only_matching': True }, { @@ -186,9 +300,6 @@ class PolskieRadioCategoryIE(InfoExtractor): 'title': 'Muzyka', }, 'playlist_mincount': 61 - }, { - 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', - 'only_matching': True, }, { 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', 'only_matching': True, @@ -196,7 +307,7 @@ class PolskieRadioCategoryIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url) + return False if PolskieRadioLegacyIE.suitable(url) else super().suitable(url) def _entries(self, url, page, category_id): content = page @@ -209,7 +320,7 @@ class PolskieRadioCategoryIE(InfoExtractor): if not href: continue yield self.url_result( - compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(), + compat_urlparse.urljoin(url, href), PolskieRadioLegacyIE, entry_id, entry.get('title')) mobj = re.search( r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', @@ -222,7 +333,9 @@ class PolskieRadioCategoryIE(InfoExtractor): def _real_extract(self, url): category_id = self._match_id(url) - webpage = self._download_webpage(url, category_id) + webpage, urlh = self._download_webpage_handle(url, category_id) + if PolskieRadioAuditionIE.suitable(urlh.url): + return self.url_result(urlh.url, PolskieRadioAuditionIE, category_id) title = self._html_search_regex( r'<title>([^<]+) - [^<]+ - [^<]+', webpage, 'title', fatal=False) @@ -358,7 +471,7 @@ class PolskieRadioPodcastListIE(PolskieRadioPodcastBaseExtractor): 'entries': InAdvancePagedList( get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE), 'id': str(data['id']), - 'title': data['title'], + 'title': data.get('title'), 'description': data.get('description'), 'uploader': data.get('announcer'), } @@ -374,6 +487,10 @@ class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor): 'ext': 'mp3', 'title': 'Theresa May rezygnuje. Co dalej z brexitem?', 'description': 'md5:e41c409a29d022b70ef0faa61dbded60', + 'episode': 'Theresa May rezygnuje. Co dalej z brexitem?', + 'duration': 2893, + 'thumbnail': 'https://static.prsa.pl/images/58649376-c8a0-4ba2-a714-78b383285f5f.jpg', + 'series': 'Raport o stanie świata', }, }] From a4d6ead30fde0e85eb34859e86c707621e38f8a1 Mon Sep 17 00:00:00 2001 From: Damiano Amatruda Date: Thu, 29 Dec 2022 07:54:19 +0100 Subject: [PATCH 1846/2552] [extractor/ciscowebex] Support password-protected videos (#5601) Authored by: damianoamatruda --- yt_dlp/extractor/ciscowebex.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 44595d854..0fcf02282 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -1,5 +1,6 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, try_get, unified_timestamp, @@ -38,11 +39,30 @@ class CiscoWebexIE(InfoExtractor): siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2') video_id = mobj.group('id') - stream = self._download_json( + password = self.get_param('videopassword') + + headers = {'Accept': 'application/json'} + if password: + headers['accessPwd'] = password + + stream, urlh = self._download_json_handle( 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), - video_id, fatal=False, query={'siteurl': siteurl}) - if not stream: - self.raise_login_required(method='cookies') + video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429)) + + if urlh.status == 403: + if stream['code'] == 53004: + self.raise_login_required() + if stream['code'] == 53005: + if password: + raise ExtractorError('Wrong password', expected=True) + raise ExtractorError( + 'This video is protected by a password, use the --video-password option', expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True) + + if urlh.status == 429: + self.raise_login_required( + f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and', + method='cookies') video_id = stream.get('recordUUID') or video_id @@ -78,7 +98,7 @@ class CiscoWebexIE(InfoExtractor): 'title': stream['recordName'], 'description': stream.get('description'), 'uploader': stream.get('ownerDisplayName'), - 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), # mail or id + 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), 'timestamp': unified_timestamp(stream.get('createTime')), 'duration': int_or_none(stream.get('duration'), 1000), 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), From 06a9d68eb8413120f7e03d6c288cf855cd782f77 Mon Sep 17 00:00:00 2001 From: Kurt Bestor Date: Thu, 29 Dec 2022 16:18:55 +0900 Subject: [PATCH 1847/2552] [extractor/youku] Fix extractor (#5622) Closes #4456 Authored by: KurtBestor --- yt_dlp/extractor/youku.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py index 624975b98..ab59200d7 100644 --- a/yt_dlp/extractor/youku.py +++ b/yt_dlp/extractor/youku.py @@ -96,25 +96,35 @@ class YoukuIE(InfoExtractor): 'thumbnail': r're:^https?://.*', 'uploader': '明月庄主moon', 'uploader_id': '38465621', - 'uploader_url': 'http://i.youku.com/u/UMTUzODYyNDg0', + 'uploader_url': 'https://www.youku.com/profile/index/?uid=UMTUzODYyNDg0', 'tags': list, }, }, { - 'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805', + 'url': 'https://v.youku.com/v_show/id_XNTA2NTA0MjA1Mg==.html', 'info_dict': { - 'id': 'XMjIyNzAzMTQ4NA', + 'id': 'XNTA2NTA0MjA1Mg', 'ext': 'mp4', - 'title': '卡马乔国足开大脚长传冲吊集锦', - 'duration': 289, + 'title': 'Minecraft我的世界:建造超大巨型航空飞机,菜鸟vs高手vs黑客', + 'duration': 542.13, 'thumbnail': r're:^https?://.*', - 'uploader': '阿卜杜拉之星', - 'uploader_id': '2382249', - 'uploader_url': 'http://i.youku.com/u/UOTUyODk5Ng==', + 'uploader': '波哥游戏解说', + 'uploader_id': '156688084', + 'uploader_url': 'https://www.youku.com/profile/index/?uid=UNjI2NzUyMzM2', 'tags': list, }, }, { - 'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html', - 'only_matching': True, + 'url': 'https://v.youku.com/v_show/id_XNTE1MzczOTg4MA==.html', + 'info_dict': { + 'id': 'XNTE1MzczOTg4MA', + 'ext': 'mp4', + 'title': '国产超A特工片', + 'duration': 362.97, + 'thumbnail': r're:^https?://.*', + 'uploader': '陈晓娟说历史', + 'uploader_id': '1640913339', + 'uploader_url': 'https://www.youku.com/profile/index/?uid=UNjU2MzY1MzM1Ng==', + 'tags': list, + }, }] @staticmethod @@ -151,7 +161,7 @@ class YoukuIE(InfoExtractor): # request basic data basic_data_params = { 'vid': video_id, - 'ccode': '0532', + 'ccode': '0524', 'client_ip': '192.168.1.1', 'utid': cna, 'client_ts': time.time() / 1000, From 074b2fae9076221faaa8697381428131ad968dc9 Mon Sep 17 00:00:00 2001 From: lkw123 <2020393267@qq.com> Date: Thu, 29 Dec 2022 15:38:49 +0800 Subject: [PATCH 1848/2552] [extractor/kankanews] Add extractor (#5729) Authored by: synthpop123 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/kankanews.py | 48 +++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 yt_dlp/extractor/kankanews.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index ea1d0a2df..672eb9596 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -831,6 +831,7 @@ from .jwplatform import JWPlatformIE from .kakao import KakaoIE from .kaltura import KalturaIE from .kanal2 import Kanal2IE +from .kankanews import KankaNewsIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE diff --git a/yt_dlp/extractor/kankanews.py b/yt_dlp/extractor/kankanews.py new file mode 100644 index 000000000..46e239bd6 --- /dev/null +++ b/yt_dlp/extractor/kankanews.py @@ -0,0 +1,48 @@ +import time +import random +import string +import hashlib +import urllib.parse + +from .common import InfoExtractor + + +class KankaNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P\d+)\.shtml' + _TESTS = [{ + 'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227', + 'md5': '05e126513c74b1258d657452a6f4eef9', + 'info_dict': { + 'id': '4485057', + 'url': 'http://mediaplay.kksmg.com/2022/11/08/h264_450k_mp4_1a388ad771e0e4cc28b0da44d245054e_ncm.mp4', + 'ext': 'mp4', + 'title': '视频|第23个中国记者节,我们在进博切蛋糕', + 'thumbnail': r're:^https?://.*\.jpg*', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r'omsid\s*=\s*"(\d+)"', webpage, 'video id') + + params = { + 'nonce': ''.join(random.choices(string.ascii_lowercase + string.digits, k=8)), + 'omsid': video_id, + 'platform': 'pc', + 'timestamp': int(time.time()), + 'version': '1.0', + } + params['sign'] = hashlib.md5((hashlib.md5(( + urllib.parse.urlencode(params) + '&28c8edde3d61a0411511d3b1866f0636' + ).encode()).hexdigest()).encode()).hexdigest() + + meta = self._download_json('https://api-app.kankanews.com/kankan/pc/getvideo', + video_id, query=params)['result']['video'] + + return { + 'id': video_id, + 'url': meta['videourl'], + 'title': self._search_regex(r'g\.title\s*=\s*"([^"]+)"', webpage, 'title'), + 'thumbnail': meta.get('titlepic'), + } From 6b71d186dda5c71b8ff2ec665cbda6f9d4ffb06e Mon Sep 17 00:00:00 2001 From: monnef <1975567+mnn@users.noreply.github.com> Date: Thu, 29 Dec 2022 08:47:23 +0100 Subject: [PATCH 1849/2552] [extractor/curiositystream] Fix auth (#5730) Authored by: mnn --- yt_dlp/extractor/curiositystream.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 26cf24fbb..941cf4e79 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -1,4 +1,5 @@ import re +import urllib.parse from .common import InfoExtractor from ..compat import compat_str @@ -23,7 +24,7 @@ class CuriosityStreamBaseIE(InfoExtractor): auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token') if auth_cookie: self.write_debug('Obtained auth_token cookie') - self._auth_token = auth_cookie.value + self._auth_token = urllib.parse.unquote(auth_cookie.value) if self._auth_token: headers['X-Auth-Token'] = self._auth_token result = self._download_json( @@ -54,8 +55,11 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', 'channel': 'Curiosity Stream', 'categories': ['Technology', 'Interview'], - 'average_rating': 96.79, + 'average_rating': float, 'series_id': '2', + 'thumbnail': r're:https://img.curiositystream.com/.+\.jpg', + 'tags': [], + 'duration': 158 }, 'params': { # m3u8 download From 9fcd8ad1f21377f8cf784c35ebc758743227666e Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Thu, 29 Dec 2022 04:08:22 -0400 Subject: [PATCH 1850/2552] [extractor/spankbang] Fix extractor (#5791) Authored by: JChris246 Closes #5731 --- yt_dlp/extractor/spankbang.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index f242d334c..43da34a32 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -177,7 +177,6 @@ class SpankBangPlaylistIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) playlist_id = mobj.group('id') - display_id = mobj.group('display_id') webpage = self._download_webpage( url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) @@ -186,11 +185,11 @@ class SpankBangPlaylistIE(InfoExtractor): urljoin(url, mobj.group('path')), ie=SpankBangIE.ie_key(), video_id=mobj.group('id')) for mobj in re.finditer( - r']+\bhref=(["\'])(?P/?[\da-z]+-(?P[\da-z]+)/playlist/%s(?:(?!\1).)*)\1' - % re.escape(display_id), webpage)] + r']+\bhref=(["\'])(?P/?[\da-z]+-(?P[\da-z]+)/playlist/[^"\'](?:(?!\1).)*)\1', + webpage)] title = self._html_search_regex( - r'
    ', webpage, 'title') + + lecture_urls = [] + for lecture_url in re.findall(r'(?i)href="/w/(.+)(?[0-9]+) - (?:/(?P[\da-f]{10}))? - /?(?:[?&].*)?(?:[#].*)?$ - ''' + https?:// + (?: + (?: + www| + player + ) + \. + )? + vimeo\.com/ + (?: + (?Puser)| + (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) + (?:.*?/)?? + (?P + (?: + play_redirect_hls| + moogaloop\.swf)\?clip_id= + )? + (?:videos?/)? + ) + (?P[0-9]+) + (?(u) + /(?!videos|likes)[^/?#]+/?| + (?(q)|/(?P[\da-f]{10}))? + ) + (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$ + ''' IE_NAME = 'vimeo' _EMBED_REGEX = [ # iframe @@ -705,7 +711,12 @@ class VimeoIE(VimeoBaseInfoExtractor): 'params': { 'skip_download': True, }, - } + }, + { + # user playlist alias -> https://vimeo.com/258705797 + 'url': 'https://vimeo.com/user26785108/newspiritualguide', + 'only_matching': True, + }, # https://gettingthingsdone.com/workflowmap/ # vimeo embed with check-password page protected by Referer header ] diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 59eececb6..7af6c8f03 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -21,7 +21,7 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)' + _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)' _VALID_URL = r'''(?x) https?:// (?:.+?\.)?%s/ @@ -120,6 +120,9 @@ class XHamsterIE(InfoExtractor): }, { 'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf', 'only_matching': True, + }, { + 'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6', + 'only_matching': True, }] def _real_extract(self, url): @@ -422,6 +425,9 @@ class XHamsterUserIE(InfoExtractor): }, { 'url': 'https://xhday.com/users/mobhunter', 'only_matching': True, + }, { + 'url': 'https://xhvid.com/users/pelushe21', + 'only_matching': True, }] def _entries(self, user_id): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9eb9495a0..994239897 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3149,14 +3149,28 @@ def urlencode_postdata(*args, **kargs): return urllib.parse.urlencode(*args, **kargs).encode('ascii') +def update_url(url, *, query_update=None, **kwargs): + """Replace URL components specified by kwargs + @param url str or parse url tuple + @param query_update update query + @returns str + """ + if isinstance(url, str): + if not kwargs and not query_update: + return url + else: + url = urllib.parse.urlparse(url) + if query_update: + assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time' + kwargs['query'] = urllib.parse.urlencode({ + **urllib.parse.parse_qs(url.query), + **query_update + }, True) + return urllib.parse.urlunparse(url._replace(**kwargs)) + + def update_url_query(url, query): - if not query: - return url - parsed_url = urllib.parse.urlparse(url) - qs = urllib.parse.parse_qs(parsed_url.query) - qs.update(query) - return urllib.parse.urlunparse(parsed_url._replace( - query=urllib.parse.urlencode(qs, True))) + return update_url(url, query_update=query) def update_Request(req, url=None, data=None, headers=None, query=None): From a0a7c0154252900b7b154898744b698624d92b2a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 17 Feb 2023 17:22:03 +0530 Subject: [PATCH 2003/2552] Release 2023.02.17 --- CONTRIBUTORS | 24 +++++ Changelog.md | 253 ++++++++++++++++++++++++++++++++++++++++++++++ Collaborators.md | 8 ++ supportedsites.md | 33 ++++-- 4 files changed, 312 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index e3b95e2f3..10fb5775b 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -381,3 +381,27 @@ gschizas JC-Chung mzhou OndrejBakan +ab4cbef +aionescu +amra +ByteDream +carusocr +chexxor +felixonmars +FrankZ85 +FriedrichRehren +gregsadetsky +LeoniePhiline +LowSuggestion912 +Matumo +OIRNOIR +OMEGARAZER +oxamun +pmitchell86 +qbnu +qulaz +rebane2001 +road-master +rohieb +sdht0 +seproDev diff --git a/Changelog.md b/Changelog.md index e4cc7fd30..36856e016 100644 --- a/Changelog.md +++ b/Changelog.md @@ -10,6 +10,259 @@ * Dispatch the workflow https://github.com/yt-dlp/yt-dlp/actions/workflows/build.yml on master --> +# 2023.02.17 + +* Merge youtube-dl: Upto [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e) +* Fix `--concat-playlist` +* Imply `--no-progress` when `--print` +* Improve default subtitle language selection by [sdht0](https://github.com/sdht0) +* Make `title` completely non-fatal +* Sanitize formats before sorting by [pukkandan](https://github.com/pukkandan) +* Support module level `__bool__` and `property` +* [dependencies] Standardize `Cryptodome` imports +* [hls] Allow extractors to provide AES key by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [ExtractAudio] Handle outtmpl without ext by [carusocr](https://github.com/carusocr) +* [extractor/common] Fix `_search_nuxt_data` by [LowSuggestion912](https://github.com/LowSuggestion912) +* [extractor/generic] Avoid catastrophic backtracking in KVS regex by [bashonly](https://github.com/bashonly) +* [jsinterp] Support `if` statements +* [plugins] Fix zip search paths +* [utils] `traverse_obj`: Various improvements by [Grub4K](https://github.com/Grub4K) +* [utils] `traverse_obj`: Fix more bugs +* [utils] `traverse_obj`: Fix several behavioral problems by [Grub4K](https://github.com/Grub4K) +* [utils] Don't use Content-length with encoding by [felixonmars](https://github.com/felixonmars) +* [utils] Fix `time_seconds` to use the provided TZ by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) +* [utils] Fix race condition in `make_dir` by [aionescu](https://github.com/aionescu) +* [utils] Use local kernel32 for file locking on Windows by [Grub4K](https://github.com/Grub4K) +* [compat_utils] Improve `passthrough_module` +* [compat_utils] Simplify `EnhancedModule` +* [build] Update pyinstaller +* [pyinst] Fix for pyinstaller 5.8 +* [devscripts] Provide `pyinstaller` hooks +* [devscripts/pyinstaller] Analyze sub-modules of `Cryptodome` +* [cleanup] Misc fixes and cleanup +* [extractor/anchorfm] Add episode extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/boxcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/ebay] Add extractor by [JChris246](https://github.com/JChris246) +* [extractor/hypergryph] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/NZOnScreen] Add extractor by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) +* [extractor/rozhlas] Add extractor RozhlasVltavaIE by [amra](https://github.com/amra) +* [extractor/tempo] Add IVXPlayer extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/txxx] Add extractors by [chio0hai](https://github.com/chio0hai) +* [extractor/vocaroo] Add extractor by [SuperSonicHub1](https://github.com/SuperSonicHub1), [qbnu](https://github.com/qbnu) +* [extractor/wrestleuniverse] Add extractors by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* **[extractor/youtube] Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) +* [extractor/youtube] Add hyperpipe instances by [Generator](https://github.com/Generator) +* [extractor/youtube] Handle `consent.youtube` +* [extractor/youtube] Support `/live/` URL +* [extractor/youtube] Update invidious and piped instances by [rohieb](https://github.com/rohieb) +* [extractor/91porn] Fix title and comment extraction by [pmitchell86](https://github.com/pmitchell86) +* [extractor/AbemaTV] Cache user token whenever appropriate by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/bfmtv] Support `rmc` prefix by [carusocr](https://github.com/carusocr) +* [extractor/biliintl] Add intro and ending chapters by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/clyp] Support `wav` by [qulaz](https://github.com/qulaz) +* [extractor/crunchyroll] Add intro chapter by [ByteDream](https://github.com/ByteDream) +* [extractor/crunchyroll] Better message for premium videos +* [extractor/crunchyroll] Fix incorrect premium-only error by [Grub4K](https://github.com/Grub4K) +* [extractor/DouyuTV] Use new API by [hatienl0i261299](https://github.com/hatienl0i261299) +* [extractor/embedly] Embedded links may be for other extractors +* [extractor/freesound] Workaround invalid URL in webpage by [rebane2001](https://github.com/rebane2001) +* [extractor/GoPlay] Use new API by [jeroenj](https://github.com/jeroenj) +* [extractor/Hidive] Fix subtitles and age-restriction by [chexxor](https://github.com/chexxor) +* [extractor/huya] Support HD streams by [felixonmars](https://github.com/felixonmars) +* [extractor/moviepilot] Fix extractor by [panatexxa](https://github.com/panatexxa) +* [extractor/nbc] Fix `NBC` and `NBCStations` extractors by [bashonly](https://github.com/bashonly) +* [extractor/nbc] Fix XML parsing by [bashonly](https://github.com/bashonly) +* [extractor/nebula] Remove broken cookie support by [hheimbuerger](https://github.com/hheimbuerger) +* [extractor/nfl] Add `NFLPlus` extractors by [bashonly](https://github.com/bashonly) +* [extractor/niconico] Add support for like history by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) +* [extractor/nitter] Update instance list by [OIRNOIR](https://github.com/OIRNOIR) +* [extractor/npo] Fix extractor and add HD support by [seproDev](https://github.com/seproDev) +* [extractor/odkmedia] Add `OnDemandChinaEpisodeIE` by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) +* [extractor/pornez] Handle relative URLs in iframe by [JChris246](https://github.com/JChris246) +* [extractor/radiko] Fix format sorting for Time Free by [road-master](https://github.com/road-master) +* [extractor/rcs] Fix extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +* [extractor/reddit] Support user posts by [OMEGARAZER](https://github.com/OMEGARAZER) +* [extractor/rumble] Fix format sorting by [pukkandan](https://github.com/pukkandan) +* [extractor/servus] Rewrite extractor by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +* [extractor/slideslive] Fix slides and chapters/duration by [bashonly](https://github.com/bashonly) +* [extractor/SportDeutschland] Fix extractor by [FriedrichRehren](https://github.com/FriedrichRehren) +* [extractor/Stripchat] Fix extractor by [JChris246](https://github.com/JChris246), [bashonly](https://github.com/bashonly) +* [extractor/tnaflix] Fix extractor by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) +* [extractor/tvp] Support `stream.tvp.pl` by [selfisekai](https://github.com/selfisekai) +* [extractor/twitter] Fix `--no-playlist` and add media `view_count` when using GraphQL by [Grub4K](https://github.com/Grub4K) +* [extractor/twitter] Fix graphql extraction on some tweets by [selfisekai](https://github.com/selfisekai) +* [extractor/vimeo] Fix `playerConfig` extraction by [LeoniePhiline](https://github.com/LeoniePhiline), [bashonly](https://github.com/bashonly) +* [extractor/viu] Add `ViuOTTIndonesiaIE` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vk] Fix playlists for new API by [the-marenga](https://github.com/the-marenga) +* [extractor/vlive] Replace with `VLiveWebArchiveIE` by [seproDev](https://github.com/seproDev) +* [extractor/ximalaya] Update album `_VALID_URL` by [carusocr](https://github.com/carusocr) +* [extractor/zdf] Use android API endpoint for UHD downloads by [seproDev](https://github.com/seproDev) +* [extractor/drtv] Fix bug in [ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/ab4cbef) by [bashonly](https://github.com/bashonly) + + +### 2023.02.17 + +#### Core changes +### Core changes +- [Bugfix for 39f32f1715c0dffb7626dda7307db6388bb7abaa](https://github.com/yt-dlp/yt-dlp/commit/9ebac35577e61c3d25fafc959655fa3ab04ca7ef) by [pukkandan](https://github.com/pukkandan) +- [Bugfix for 39f32f1715c0dffb7626dda7307db6388bb7abaa](https://github.com/yt-dlp/yt-dlp/commit/c154302c588c3d4362cec4fc5545e7e5d2bcf7a3) by [pukkandan](https://github.com/pukkandan) +- [Fix `--concat-playlist`](https://github.com/yt-dlp/yt-dlp/commit/59d7de0da545944c48a82fc2937b996d7cd8cc9c) by [pukkandan](https://github.com/pukkandan) +- [Imply `--no-progress` when `--print`](https://github.com/yt-dlp/yt-dlp/commit/5712943b764ba819ef479524c32700228603817a) by [pukkandan](https://github.com/pukkandan) +- [Improve default subtitle language selection](https://github.com/yt-dlp/yt-dlp/commit/376aa24b1541e2bfb23337c0ae9bafa5bb3787f1) ([#6240](https://github.com/yt-dlp/yt-dlp/issues/6240)) by [sdht0](https://github.com/sdht0) +- [Make `title` completely non-fatal](https://github.com/yt-dlp/yt-dlp/commit/7aefd19afed357c80743405ec2ace2148cba42e3) by [pukkandan](https://github.com/pukkandan) +- [Sanitize formats before sorting](https://github.com/yt-dlp/yt-dlp/commit/39f32f1715c0dffb7626dda7307db6388bb7abaa) by [pukkandan](https://github.com/pukkandan) +- [Support module level `__bool__` and `property`](https://github.com/yt-dlp/yt-dlp/commit/754c84e2e416cf6609dd0e4632b4985a08d34043) by [pukkandan](https://github.com/pukkandan) +- [Update to ytdl-commit-2dd6c6e](https://github.com/yt-dlp/yt-dlp/commit/48fde8ac4ccbaaea868f6378814dde395f649fbf) by [pukkandan](https://github.com/pukkandan) +- [extractor/douyutv]: [Use new API](https://github.com/yt-dlp/yt-dlp/commit/f14c2333481c63c24017a41ded7d8f36726504b7) ([#6074](https://github.com/yt-dlp/yt-dlp/issues/6074)) by [hatienl0i261299](https://github.com/hatienl0i261299) +- compat_utils + - [Improve `passthrough_module`](https://github.com/yt-dlp/yt-dlp/commit/88426d9446758c707fb511408f2d6f56de952db4) by [pukkandan](https://github.com/pukkandan) + - [Simplify `EnhancedModule`](https://github.com/yt-dlp/yt-dlp/commit/768a00178109508893488e53a0e720b117fbccf6) by [pukkandan](https://github.com/pukkandan) +- dependencies + - [Standardize `Cryptodome` imports](https://github.com/yt-dlp/yt-dlp/commit/f6a765ceb59c55aea06921880c1c87d1ff36e5de) by [pukkandan](https://github.com/pukkandan) +- jsinterp + - [Support `if` statements](https://github.com/yt-dlp/yt-dlp/commit/8b008d62544b82e24a0ba36c30e8e51855d93419) by [pukkandan](https://github.com/pukkandan) +- plugins + - [Fix zip search paths](https://github.com/yt-dlp/yt-dlp/commit/88d8928bf7630801865cf8728ae5c77234324b7b) by [pukkandan](https://github.com/pukkandan) +- utils + - [Don't use Content-length with encoding](https://github.com/yt-dlp/yt-dlp/commit/65e5c021e7c5f23ecbc6a982b72a02ac6cd6900d) ([#6176](https://github.com/yt-dlp/yt-dlp/issues/6176)) by [felixonmars](https://github.com/felixonmars) + - [Fix `time_seconds` to use the provided TZ](https://github.com/yt-dlp/yt-dlp/commit/83c4970e52839ce8761ec61bd19d549aed7d7920) ([#6118](https://github.com/yt-dlp/yt-dlp/issues/6118)) by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) + - [Fix race condition in `make_dir`](https://github.com/yt-dlp/yt-dlp/commit/b25d6cb96337d479bdcb41768356da414c3aa835) ([#6089](https://github.com/yt-dlp/yt-dlp/issues/6089)) by [aionescu](https://github.com/aionescu) + - [Use local kernel32 for file locking on Windows](https://github.com/yt-dlp/yt-dlp/commit/37e325b92ff9d784715ac0e5d1f7d96bf5f45ad9) by [Grub4K](https://github.com/Grub4K) + - traverse_obj + - [Fix more bugs](https://github.com/yt-dlp/yt-dlp/commit/6839ae1f6dde4c0442619e351b3f0442312ab4f9) by [pukkandan](https://github.com/pukkandan) + - [Fix several behavioral problems](https://github.com/yt-dlp/yt-dlp/commit/b1bde57bef878478e3503ab07190fd207914ade9) by [Grub4K](https://github.com/Grub4K) + - [Various improvements](https://github.com/yt-dlp/yt-dlp/commit/776995bc109c5cd1aa56b684fada2ce718a386ec) by [Grub4K](https://github.com/Grub4K) +### Extractor changes +- [Fix `_search_nuxt_data`](https://github.com/yt-dlp/yt-dlp/commit/b23167e7542c177f32b22b29857b637dc4aede69) ([#6062](https://github.com/yt-dlp/yt-dlp/issues/6062)) by [LowSuggestion912](https://github.com/LowSuggestion912) +- 91porn + - [Fix title and comment extraction](https://github.com/yt-dlp/yt-dlp/commit/c085cc2def9862ac8a7619ce8ea5dcc177325719) ([#5932](https://github.com/yt-dlp/yt-dlp/issues/5932)) by [pmitchell86](https://github.com/pmitchell86) +- abematv + - [Cache user token whenever appropriate](https://github.com/yt-dlp/yt-dlp/commit/a4f16832213d9e29beecf685d6cd09a2f0b48c87) ([#6216](https://github.com/yt-dlp/yt-dlp/issues/6216)) by [Lesmiscore](https://github.com/Lesmiscore) +- anchorfm + - [Add episode extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ad59ff2ded208bf33f6fe07299a3449eadccdc) ([#6092](https://github.com/yt-dlp/yt-dlp/issues/6092)) by [bashonly](https://github.com/bashonly), [HobbyistDev](https://github.com/HobbyistDev) +- bfmtv + - [Support `rmc` prefix](https://github.com/yt-dlp/yt-dlp/commit/20266508dd6247dd3cf0e97b9b9f14c3afc046db) ([#6025](https://github.com/yt-dlp/yt-dlp/issues/6025)) by [carusocr](https://github.com/carusocr) +- biliintl + - [Add intro and ending chapters](https://github.com/yt-dlp/yt-dlp/commit/0ba87dd279d3565ed93c559cf7880ad61eb83af8) ([#6018](https://github.com/yt-dlp/yt-dlp/issues/6018)) by [HobbyistDev](https://github.com/HobbyistDev) +- boxcast + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/9acca71237f42a4775008e51fe26e42f0a39c552) ([#5983](https://github.com/yt-dlp/yt-dlp/issues/5983)) by [HobbyistDev](https://github.com/HobbyistDev) +- clyp + - [Support `wav`](https://github.com/yt-dlp/yt-dlp/commit/cc13293c2819b5461be211a9729fd02bb1e2f476) ([#6102](https://github.com/yt-dlp/yt-dlp/issues/6102)) by [qulaz](https://github.com/qulaz) +- crunchyroll + - [Add intro chapter](https://github.com/yt-dlp/yt-dlp/commit/93abb7406b95793f6872d12979b91d5f336b4f43) ([#6023](https://github.com/yt-dlp/yt-dlp/issues/6023)) by [ByteDream](https://github.com/ByteDream) + - [Better message for premium videos](https://github.com/yt-dlp/yt-dlp/commit/44699d10dc8de9c6a338f4a8e5c63506ec4d2118) by [pukkandan](https://github.com/pukkandan) + - [Fix incorrect premium-only error](https://github.com/yt-dlp/yt-dlp/commit/c9d14bd22ab31e2a41f9f8061843668a06db583b) by [Grub4K](https://github.com/Grub4K) +- drtv + - [Fix bug in ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/7481998b169b2a52049fc33bff82034d6563ead4) ([#6034](https://github.com/yt-dlp/yt-dlp/issues/6034)) by [bashonly](https://github.com/bashonly) +- ebay + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/da880559a6ecbbf374cc9f3378e696b55b9599af) ([#6170](https://github.com/yt-dlp/yt-dlp/issues/6170)) by [JChris246](https://github.com/JChris246) +- embedly + - [Embedded links may be for other extractors](https://github.com/yt-dlp/yt-dlp/commit/87ebab0615b1bf9b14b478b055e7059d630b4833) by [pukkandan](https://github.com/pukkandan) +- freesound + - [Workaround invalid URL in webpage](https://github.com/yt-dlp/yt-dlp/commit/9cfdbcbf3f17be51f5b6bb9bb6d880b2f3d67362) ([#6147](https://github.com/yt-dlp/yt-dlp/issues/6147)) by [rebane2001](https://github.com/rebane2001) +- generic + - [Avoid catastrophic backtracking in KVS regex](https://github.com/yt-dlp/yt-dlp/commit/8aa0bd5d10627ece3c1815c01d02fb8bf22847a7) by [bashonly](https://github.com/bashonly) +- goplay + - [Use new API](https://github.com/yt-dlp/yt-dlp/commit/d27bde98832e3b7ffb39f3cf6346011b97bb3bc3) ([#6151](https://github.com/yt-dlp/yt-dlp/issues/6151)) by [jeroenj](https://github.com/jeroenj) +- hidive + - [Fix subtitles and age-restriction](https://github.com/yt-dlp/yt-dlp/commit/7708df8da05c94270b43e0630e4e20f6d2d62c55) ([#5828](https://github.com/yt-dlp/yt-dlp/issues/5828)) by [chexxor](https://github.com/chexxor) +- huya + - [Support HD streams](https://github.com/yt-dlp/yt-dlp/commit/fbbb5508ea98ed8709847f5ecced7d70ff05e0ee) ([#6172](https://github.com/yt-dlp/yt-dlp/issues/6172)) by [felixonmars](https://github.com/felixonmars) +- hypergryph + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/31c279a2a2c2ef402a9e6dad9992b310d16439a6) ([#6094](https://github.com/yt-dlp/yt-dlp/issues/6094)) by [bashonly](https://github.com/bashonly), [HobbyistDev](https://github.com/HobbyistDev) +- moviepilot + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c62e64cf0122e52fa2175dd1b004ca6b8e1d82af) ([#5954](https://github.com/yt-dlp/yt-dlp/issues/5954)) by [panatexxa](https://github.com/panatexxa) +- nbc + - [Fix XML parsing](https://github.com/yt-dlp/yt-dlp/commit/176a068cde4f2d9dfa0336168caead0b1edcb8ac) by [bashonly](https://github.com/bashonly) + - [Fix `NBC` and `NBCStations` extractors](https://github.com/yt-dlp/yt-dlp/commit/cb73b8460c3ce6d37ab651a4e44bb23b10056154) ([#6033](https://github.com/yt-dlp/yt-dlp/issues/6033)) by [bashonly](https://github.com/bashonly) +- nebula + - [Remove broken cookie support](https://github.com/yt-dlp/yt-dlp/commit/d50ea3ce5abc3b0defc0e5d1e22b22ce9b01b07b) ([#5979](https://github.com/yt-dlp/yt-dlp/issues/5979)) by [hheimbuerger](https://github.com/hheimbuerger) +- nfl + - [Add `NFLPlus` extractors](https://github.com/yt-dlp/yt-dlp/commit/8b37c58f8b5494504acdb5ebe3f8bbd26230f725) ([#6222](https://github.com/yt-dlp/yt-dlp/issues/6222)) by [bashonly](https://github.com/bashonly) +- niconico + - [Add support for like history](https://github.com/yt-dlp/yt-dlp/commit/3b161265add30613bde2e46fca214fe94d09e651) ([#5705](https://github.com/yt-dlp/yt-dlp/issues/5705)) by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) +- nitter + - [Update instance list](https://github.com/yt-dlp/yt-dlp/commit/a9189510baadf0dccd2d4d363bc6f3a441128bb0) ([#6236](https://github.com/yt-dlp/yt-dlp/issues/6236)) by [OIRNOIR](https://github.com/OIRNOIR) +- npo + - [Fix extractor and add HD support](https://github.com/yt-dlp/yt-dlp/commit/cc2389c8ac72a514d4e002a0f6ca5a7d65c7eff0) ([#6155](https://github.com/yt-dlp/yt-dlp/issues/6155)) by [seproDev](https://github.com/seproDev) +- nzonscreen + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d3bb187f01e1e30db05e639fc23a2e1935d777fe) ([#6208](https://github.com/yt-dlp/yt-dlp/issues/6208)) by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) +- odkmedia + - [Add `OnDemandChinaEpisodeIE`](https://github.com/yt-dlp/yt-dlp/commit/10fd9e6ee833c88edf6c633f864f42843a708d32) ([#6116](https://github.com/yt-dlp/yt-dlp/issues/6116)) by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) +- pornez + - [Handle relative URLs in iframe](https://github.com/yt-dlp/yt-dlp/commit/f7efe6dc958eb0689cb9534ff0b4e592040be8df) ([#6171](https://github.com/yt-dlp/yt-dlp/issues/6171)) by [JChris246](https://github.com/JChris246) +- radiko + - [Fix format sorting for Time Free](https://github.com/yt-dlp/yt-dlp/commit/203a06f8554df6db07d8f20f465ecbfe8a14e591) ([#6159](https://github.com/yt-dlp/yt-dlp/issues/6159)) by [road-master](https://github.com/road-master) +- rcs + - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c6b657867ad68af6b930ed0aa11ec5d93ee187b7) ([#5700](https://github.com/yt-dlp/yt-dlp/issues/5700)) by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +- reddit + - [Support user posts](https://github.com/yt-dlp/yt-dlp/commit/c77df98b1a477a020a57141464d10c0f4d0fdbc9) ([#6173](https://github.com/yt-dlp/yt-dlp/issues/6173)) by [OMEGARAZER](https://github.com/OMEGARAZER) +- rozhlas + - [Add extractor RozhlasVltavaIE](https://github.com/yt-dlp/yt-dlp/commit/355d781bed497cbcb254bf2a2737b83fa51c84ea) ([#5951](https://github.com/yt-dlp/yt-dlp/issues/5951)) by [amra](https://github.com/amra) +- rumble + - [Fix format sorting](https://github.com/yt-dlp/yt-dlp/commit/acacb57c7e173b93c6e0f0c43e61b9b2912719d8) by [pukkandan](https://github.com/pukkandan) +- servus + - [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/f40e32fb1ac67be5bdbc8e32a3c235abfc4be260) ([#6036](https://github.com/yt-dlp/yt-dlp/issues/6036)) by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- slideslive + - [Fix slides and chapters/duration](https://github.com/yt-dlp/yt-dlp/commit/5ab3534d44231f7711398bc3cfc520e2efd09f50) ([#6024](https://github.com/yt-dlp/yt-dlp/issues/6024)) by [bashonly](https://github.com/bashonly) +- sportdeutschland + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5e1a54f63e393c218a40949012ff0de0ce63cb15) ([#6041](https://github.com/yt-dlp/yt-dlp/issues/6041)) by [FriedrichRehren](https://github.com/FriedrichRehren) +- stripchat + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d5f919bad07017f4b39b55725491b1e9717d47a) ([#5985](https://github.com/yt-dlp/yt-dlp/issues/5985)) by [bashonly](https://github.com/bashonly), [JChris246](https://github.com/JChris246) +- tempo + - [Add IVXPlayer extractor](https://github.com/yt-dlp/yt-dlp/commit/30031be974d210f451100339699ef03b0ddb5f10) ([#5837](https://github.com/yt-dlp/yt-dlp/issues/5837)) by [HobbyistDev](https://github.com/HobbyistDev) +- tnaflix + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/989f47b6315541989bb507f26b431d9586430995) ([#6086](https://github.com/yt-dlp/yt-dlp/issues/6086)) by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) +- tvp + - [Support `stream.tvp.pl`](https://github.com/yt-dlp/yt-dlp/commit/a31d0fa6c315b1145d682361149003d98f1e3782) ([#6139](https://github.com/yt-dlp/yt-dlp/issues/6139)) by [selfisekai](https://github.com/selfisekai) +- twitter + - [Fix `--no-playlist` and add media `view_count` when using GraphQL](https://github.com/yt-dlp/yt-dlp/commit/b6795fd310f1dd61dddc9fd08e52fe485bdc8a3e) ([#6211](https://github.com/yt-dlp/yt-dlp/issues/6211)) by [Grub4K](https://github.com/Grub4K) + - [Fix graphql extraction on some tweets](https://github.com/yt-dlp/yt-dlp/commit/7543c9c99bcb116b085fdb1f41b84a0ead04c05d) ([#6075](https://github.com/yt-dlp/yt-dlp/issues/6075)) by [selfisekai](https://github.com/selfisekai) +- txxx + - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/389896df85ed14eaf74f72531da6c4491d6b73b0) ([#5240](https://github.com/yt-dlp/yt-dlp/issues/5240)) by [chio0hai](https://github.com/chio0hai) +- vimeo + - [Fix `playerConfig` extraction](https://github.com/yt-dlp/yt-dlp/commit/c0cd13fb1c71b842c3d272d0273c03542b467766) ([#6203](https://github.com/yt-dlp/yt-dlp/issues/6203)) by [bashonly](https://github.com/bashonly), [LeoniePhiline](https://github.com/LeoniePhiline) +- viu + - [Add `ViuOTTIndonesiaIE` extractor](https://github.com/yt-dlp/yt-dlp/commit/72671a212d7c939329cb5d34335fa089dd3acbd3) ([#6099](https://github.com/yt-dlp/yt-dlp/issues/6099)) by [HobbyistDev](https://github.com/HobbyistDev) +- vk + - [Fix playlists for new API](https://github.com/yt-dlp/yt-dlp/commit/a9c685453f7019bee94170f936619c6db76c964e) ([#6122](https://github.com/yt-dlp/yt-dlp/issues/6122)) by [the-marenga](https://github.com/the-marenga) +- vlive + - [Replace with `VLiveWebArchiveIE`](https://github.com/yt-dlp/yt-dlp/commit/b3eaab7ca2e118d4db73dcb44afd9c8717db8b67) ([#6196](https://github.com/yt-dlp/yt-dlp/issues/6196)) by [seproDev](https://github.com/seproDev) +- vocaroo + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e4a8b1769e19755acba6d8f212208359905a3159) ([#6117](https://github.com/yt-dlp/yt-dlp/issues/6117)) by [qbnu](https://github.com/qbnu), [SuperSonicHub1](https://github.com/SuperSonicHub1) +- wrestleuniverse + - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/e61acb40b2cb6ef45508d72235026d458c9d5dff) ([#6158](https://github.com/yt-dlp/yt-dlp/issues/6158)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- ximalaya + - [Update album `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/417cdaae08fc447c9d15c53a88e2e9a027cdbf0a) ([#6110](https://github.com/yt-dlp/yt-dlp/issues/6110)) by [carusocr](https://github.com/carusocr) +- yappy + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/361630015535026712bdb67f804a15b65ff9ee7e) ([#6111](https://github.com/yt-dlp/yt-dlp/issues/6111)) by [HobbyistDev](https://github.com/HobbyistDev) +- youtube + - [Add hyperpipe instances](https://github.com/yt-dlp/yt-dlp/commit/78a78fa74dbc888d20f1b65e1382bf99131597d5) ([#6020](https://github.com/yt-dlp/yt-dlp/issues/6020)) by [Generator](https://github.com/Generator) + - [Fix `uploader_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/149eb0bbf34fa8fdf8d1e2aa28e17479d099e26b) by [bashonly](https://github.com/bashonly) + - [Handle `consent.youtube`](https://github.com/yt-dlp/yt-dlp/commit/b032ff0f032512bd6fc70c9c1994d906eacc06cb) by [pukkandan](https://github.com/pukkandan) + - [Support `/live/` URL](https://github.com/yt-dlp/yt-dlp/commit/dad2210c0cb9cf03702a9511817ee5ec646d7bc8) by [pukkandan](https://github.com/pukkandan) + - [Update invidious and piped instances](https://github.com/yt-dlp/yt-dlp/commit/05799a48c7dec12b34c8bf951c8d2eceedda59f8) ([#6030](https://github.com/yt-dlp/yt-dlp/issues/6030)) by [rohieb](https://github.com/rohieb) + - [`uploader_id` includes `@` with handle](https://github.com/yt-dlp/yt-dlp/commit/c61cf091a54d3aa3c611722035ccde5ecfe981bb) by [bashonly](https://github.com/bashonly) +- zdf + - [Use android API endpoint for UHD downloads](https://github.com/yt-dlp/yt-dlp/commit/0fe87a8730638490415d630f48e61d264d89c358) ([#6150](https://github.com/yt-dlp/yt-dlp/issues/6150)) by [seproDev](https://github.com/seproDev) +### Downloader changes +- hls + - [Allow extractors to provide AES key](https://github.com/yt-dlp/yt-dlp/commit/7e68567e508168b345266c0c19812ad50a829eaa) ([#6158](https://github.com/yt-dlp/yt-dlp/issues/6158)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +### Postprocessor changes +- extractaudio + - [Handle outtmpl without ext](https://github.com/yt-dlp/yt-dlp/commit/f737fb16d8234408c85bc189ccc926fea000515b) ([#6005](https://github.com/yt-dlp/yt-dlp/issues/6005)) by [carusocr](https://github.com/carusocr) +- pyinst + - [Fix for pyinstaller 5.8](https://github.com/yt-dlp/yt-dlp/commit/2e269bd998c61efaf7500907d114a56e5e83e65e) by [pukkandan](https://github.com/pukkandan) +### Misc. changes +- build + - [Update pyinstaller](https://github.com/yt-dlp/yt-dlp/commit/365b9006051ac7d735c20bb63c4907b758233048) by [pukkandan](https://github.com/pukkandan) +- cleanup + - Miscellaneous: [76c9c52](https://github.com/yt-dlp/yt-dlp/commit/76c9c523071150053df7b56956646b680b6a6e05) by [pukkandan](https://github.com/pukkandan) +- devscripts + - [Provide pyinstaller hooks](https://github.com/yt-dlp/yt-dlp/commit/acb1042a9ffa8769fe691beac1011d6da1fcf321) by [pukkandan](https://github.com/pukkandan) +- pyinstaller + - [Analyze sub-modules of `Cryptodome`](https://github.com/yt-dlp/yt-dlp/commit/b85faf6ffb700058e774e99c04304a7a9257cdd0) by [pukkandan](https://github.com/pukkandan) ### 2023.01.06 diff --git a/Collaborators.md b/Collaborators.md index fe2a7f4b4..83dfbe389 100644 --- a/Collaborators.md +++ b/Collaborators.md @@ -59,3 +59,11 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho * `--cookies-from-browser` support for Firefox containers * Added support for new websites Genius, Kick, NBCStations, Triller, VideoKen etc * Improved/fixed support for Anvato, Brightcove, Instagram, ParamountPlus, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc + + +## [Grub4K](https://github.com/Grub4K) + +[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K) [![gh-sponsor](https://img.shields.io/badge/_-Github-red.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K) + +* Rework internals like `traverse_obj`, various core refactors and bugs fixes +* Helped fix crunchyroll, Twitter, wrestleuniverse, wistia, slideslive etc diff --git a/supportedsites.md b/supportedsites.md index 5cef7ac90..b545ec540 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -63,14 +63,15 @@ - **AluraCourse**: [aluracourse] - **Amara** - **AmazonMiniTV** - - **amazonminitv:season**: Amazon MiniTV Series, "minitv:season:" prefix - - **amazonminitv:series** + - **amazonminitv:season**: Amazon MiniTV Season, "minitv:season:" prefix + - **amazonminitv:series**: Amazon MiniTV Series, "minitv:series:" prefix - **AmazonReviews** - **AmazonStore** - **AMCNetworks** - **AmericasTestKitchen** - **AmericasTestKitchenSeason** - **AmHistoryChannel** + - **AnchorFMEpisode** - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Angel** - **AnimalPlanet** @@ -177,6 +178,7 @@ - **BlackboardCollaborate** - **BleacherReport** - **BleacherReportCMS** + - **blerp** - **blogger.com** - **Bloomberg** - **BokeCC** @@ -184,6 +186,7 @@ - **BooyahClips** - **BostonGlobe** - **Box** + - **BoxCastVideo** - **Bpb**: Bundeszentrale für politische Bildung - **BR**: Bayerischer Rundfunk - **BravoTV** @@ -364,6 +367,7 @@ - **dw:article** - **EaglePlatform** - **EbaumsWorld** + - **Ebay** - **EchoMsk** - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson @@ -595,6 +599,7 @@ - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV + - **IVXPlayer** - **Iwara** - **iwara:playlist** - **iwara:user** @@ -626,6 +631,7 @@ - **KickVOD** - **KinjaEmbed** - **KinoPoisk** + - **Kommunetv** - **KompasVideo** - **KonserthusetPlay** - **Koo** @@ -773,6 +779,7 @@ - **Mofosex** - **MofosexEmbed** - **Mojvideo** + - **MonsterSirenHypergryphMusic** - **Morningstar**: morningstar.com - **Motherless** - **MotherlessGroup** @@ -878,6 +885,8 @@ - **NFHSNetwork** - **nfl.com** - **nfl.com:article** + - **nfl.com:​plus:episode** + - **nfl.com:​plus:replay** - **NhkForSchoolBangumi** - **NhkForSchoolProgramList** - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学) @@ -890,7 +899,7 @@ - **nickelodeonru** - **nicknight** - **niconico**: [niconico] ニコニコ動画 - - **niconico:history**: NicoNico user history. Requires cookies. + - **niconico:history**: NicoNico user history or likes. Requires cookies. - **niconico:playlist** - **niconico:series** - **niconico:tag**: NicoNico video tag URLs @@ -940,6 +949,7 @@ - **NYTimesArticle** - **NYTimesCooking** - **nzherald** + - **NZOnScreen** - **NZZ** - **ocw.mit.edu** - **OdaTV** @@ -949,6 +959,7 @@ - **OktoberfestTV** - **OlympicsReplay** - **on24**: ON24 + - **OnDemandChinaEpisode** - **OnDemandKorea** - **OneFootball** - **OnePlacePodcast** @@ -1063,7 +1074,10 @@ - **Pornotube** - **PornoVoisines** - **PornoXO** + - **PornTop** - **PornTube** + - **Pr0gramm** + - **Pr0grammStatic** - **PrankCast** - **PremiershipRugby** - **PressTV** @@ -1115,6 +1129,8 @@ - **RaiSudtirol** - **RayWenderlich** - **RayWenderlichCourse** + - **RbgTum** + - **RbgTumCourse** - **RBMARadio** - **RCS** - **RCSEmbeds** @@ -1149,6 +1165,7 @@ - **RoosterTeethSeries**: [roosterteeth] - **RottenTomatoes** - **Rozhlas** + - **RozhlasVltava** - **RTBF**: [rtbf] - **RTDocumentry** - **RTDocumentryPlaylist** @@ -1485,6 +1502,7 @@ - **twitter:card** - **twitter:shortener** - **twitter:spaces** + - **Txxx** - **udemy**: [udemy] - **udemy:course**: [udemy] - **UDNEmbed**: 聯合影音 @@ -1572,14 +1590,13 @@ - **Viu** - **viu:ott**: [viu] - **viu:playlist** + - **ViuOTTIndonesia** - **Vivo**: vivo.sx - **vk**: [vk] VK - **vk:uservideos**: [vk] VK - User's Videos - **vk:wallpost**: [vk] - - **vlive**: [vlive] - - **vlive:channel**: [vlive] - - **vlive:post**: [vlive] - **vm.tiktok** + - **Vocaroo** - **Vodlocker** - **VODPl** - **VODPlatform** @@ -1628,6 +1645,7 @@ - **wdr:mobile**: (**Currently broken**) - **WDRElefant** - **WDRPage** + - **web.archive:vlive**: web.archive.org saved vlive videos - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix - **Webcamerapl** - **Webcaster** @@ -1653,6 +1671,8 @@ - **WorldStarHipHop** - **wppilot** - **wppilot:channels** + - **WrestleUniversePPV** + - **WrestleUniverseVOD** - **WSJ**: Wall Street Journal - **WSJArticle** - **WWE** @@ -1689,6 +1709,7 @@ - **YandexVideo** - **YandexVideoPreview** - **YapFiles** + - **Yappy** - **YesJapan** - **yinyuetai:video**: 音悦Tai - **YleAreena** From 41bd0dc4d71919dceeb84a3aab9c9934d46eee9f Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 17 Feb 2023 12:31:30 +0000 Subject: [PATCH 2004/2552] [version] update Created by: pukkandan :ci skip all :ci run dl --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 8 ++++---- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 8 ++++---- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 8 ++++---- .github/ISSUE_TEMPLATE/4_bug_report.yml | 8 ++++---- .github/ISSUE_TEMPLATE/5_feature_request.yml | 8 ++++---- .github/ISSUE_TEMPLATE/6_question.yml | 8 ++++---- yt_dlp/version.py | 4 ++-- 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 2237665e3..e1103fb84 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -62,7 +62,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -70,8 +70,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 0e2940d86..90d7294ac 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -74,7 +74,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -82,8 +82,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 92501be2e..5b59852c7 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm requesting a site-specific feature required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -70,7 +70,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -78,8 +78,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index bdfc0efb8..bd4695f87 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -55,7 +55,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -63,8 +63,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index c9e3aba38..8c7f315e9 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -20,7 +20,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true @@ -51,7 +51,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -59,7 +59,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index fe6a4ee3f..4a1344628 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -26,7 +26,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true @@ -57,7 +57,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -65,7 +65,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell diff --git a/yt_dlp/version.py b/yt_dlp/version.py index f722ec665..3c92a85e1 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2023.01.06' +__version__ = '2023.02.17' -RELEASE_GIT_HEAD = '6becd2508' +RELEASE_GIT_HEAD = 'a0a7c0154' VARIANT = None From 17ca19ab60a6a13eb8a629c51442b5248b0d8394 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 17 Feb 2023 18:38:05 +0530 Subject: [PATCH 2005/2552] [cleanup] Fix `Changelog` --- Changelog.md | 165 +-------------------------------------------------- 1 file changed, 1 insertion(+), 164 deletions(-) diff --git a/Changelog.md b/Changelog.md index 36856e016..8d3ac089c 100644 --- a/Changelog.md +++ b/Changelog.md @@ -10,7 +10,7 @@ * Dispatch the workflow https://github.com/yt-dlp/yt-dlp/actions/workflows/build.yml on master --> -# 2023.02.17 +### 2023.02.17 * Merge youtube-dl: Upto [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e) * Fix `--concat-playlist` @@ -101,169 +101,6 @@ * [extractor/drtv] Fix bug in [ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/ab4cbef) by [bashonly](https://github.com/bashonly) -### 2023.02.17 - -#### Core changes -### Core changes -- [Bugfix for 39f32f1715c0dffb7626dda7307db6388bb7abaa](https://github.com/yt-dlp/yt-dlp/commit/9ebac35577e61c3d25fafc959655fa3ab04ca7ef) by [pukkandan](https://github.com/pukkandan) -- [Bugfix for 39f32f1715c0dffb7626dda7307db6388bb7abaa](https://github.com/yt-dlp/yt-dlp/commit/c154302c588c3d4362cec4fc5545e7e5d2bcf7a3) by [pukkandan](https://github.com/pukkandan) -- [Fix `--concat-playlist`](https://github.com/yt-dlp/yt-dlp/commit/59d7de0da545944c48a82fc2937b996d7cd8cc9c) by [pukkandan](https://github.com/pukkandan) -- [Imply `--no-progress` when `--print`](https://github.com/yt-dlp/yt-dlp/commit/5712943b764ba819ef479524c32700228603817a) by [pukkandan](https://github.com/pukkandan) -- [Improve default subtitle language selection](https://github.com/yt-dlp/yt-dlp/commit/376aa24b1541e2bfb23337c0ae9bafa5bb3787f1) ([#6240](https://github.com/yt-dlp/yt-dlp/issues/6240)) by [sdht0](https://github.com/sdht0) -- [Make `title` completely non-fatal](https://github.com/yt-dlp/yt-dlp/commit/7aefd19afed357c80743405ec2ace2148cba42e3) by [pukkandan](https://github.com/pukkandan) -- [Sanitize formats before sorting](https://github.com/yt-dlp/yt-dlp/commit/39f32f1715c0dffb7626dda7307db6388bb7abaa) by [pukkandan](https://github.com/pukkandan) -- [Support module level `__bool__` and `property`](https://github.com/yt-dlp/yt-dlp/commit/754c84e2e416cf6609dd0e4632b4985a08d34043) by [pukkandan](https://github.com/pukkandan) -- [Update to ytdl-commit-2dd6c6e](https://github.com/yt-dlp/yt-dlp/commit/48fde8ac4ccbaaea868f6378814dde395f649fbf) by [pukkandan](https://github.com/pukkandan) -- [extractor/douyutv]: [Use new API](https://github.com/yt-dlp/yt-dlp/commit/f14c2333481c63c24017a41ded7d8f36726504b7) ([#6074](https://github.com/yt-dlp/yt-dlp/issues/6074)) by [hatienl0i261299](https://github.com/hatienl0i261299) -- compat_utils - - [Improve `passthrough_module`](https://github.com/yt-dlp/yt-dlp/commit/88426d9446758c707fb511408f2d6f56de952db4) by [pukkandan](https://github.com/pukkandan) - - [Simplify `EnhancedModule`](https://github.com/yt-dlp/yt-dlp/commit/768a00178109508893488e53a0e720b117fbccf6) by [pukkandan](https://github.com/pukkandan) -- dependencies - - [Standardize `Cryptodome` imports](https://github.com/yt-dlp/yt-dlp/commit/f6a765ceb59c55aea06921880c1c87d1ff36e5de) by [pukkandan](https://github.com/pukkandan) -- jsinterp - - [Support `if` statements](https://github.com/yt-dlp/yt-dlp/commit/8b008d62544b82e24a0ba36c30e8e51855d93419) by [pukkandan](https://github.com/pukkandan) -- plugins - - [Fix zip search paths](https://github.com/yt-dlp/yt-dlp/commit/88d8928bf7630801865cf8728ae5c77234324b7b) by [pukkandan](https://github.com/pukkandan) -- utils - - [Don't use Content-length with encoding](https://github.com/yt-dlp/yt-dlp/commit/65e5c021e7c5f23ecbc6a982b72a02ac6cd6900d) ([#6176](https://github.com/yt-dlp/yt-dlp/issues/6176)) by [felixonmars](https://github.com/felixonmars) - - [Fix `time_seconds` to use the provided TZ](https://github.com/yt-dlp/yt-dlp/commit/83c4970e52839ce8761ec61bd19d549aed7d7920) ([#6118](https://github.com/yt-dlp/yt-dlp/issues/6118)) by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) - - [Fix race condition in `make_dir`](https://github.com/yt-dlp/yt-dlp/commit/b25d6cb96337d479bdcb41768356da414c3aa835) ([#6089](https://github.com/yt-dlp/yt-dlp/issues/6089)) by [aionescu](https://github.com/aionescu) - - [Use local kernel32 for file locking on Windows](https://github.com/yt-dlp/yt-dlp/commit/37e325b92ff9d784715ac0e5d1f7d96bf5f45ad9) by [Grub4K](https://github.com/Grub4K) - - traverse_obj - - [Fix more bugs](https://github.com/yt-dlp/yt-dlp/commit/6839ae1f6dde4c0442619e351b3f0442312ab4f9) by [pukkandan](https://github.com/pukkandan) - - [Fix several behavioral problems](https://github.com/yt-dlp/yt-dlp/commit/b1bde57bef878478e3503ab07190fd207914ade9) by [Grub4K](https://github.com/Grub4K) - - [Various improvements](https://github.com/yt-dlp/yt-dlp/commit/776995bc109c5cd1aa56b684fada2ce718a386ec) by [Grub4K](https://github.com/Grub4K) -### Extractor changes -- [Fix `_search_nuxt_data`](https://github.com/yt-dlp/yt-dlp/commit/b23167e7542c177f32b22b29857b637dc4aede69) ([#6062](https://github.com/yt-dlp/yt-dlp/issues/6062)) by [LowSuggestion912](https://github.com/LowSuggestion912) -- 91porn - - [Fix title and comment extraction](https://github.com/yt-dlp/yt-dlp/commit/c085cc2def9862ac8a7619ce8ea5dcc177325719) ([#5932](https://github.com/yt-dlp/yt-dlp/issues/5932)) by [pmitchell86](https://github.com/pmitchell86) -- abematv - - [Cache user token whenever appropriate](https://github.com/yt-dlp/yt-dlp/commit/a4f16832213d9e29beecf685d6cd09a2f0b48c87) ([#6216](https://github.com/yt-dlp/yt-dlp/issues/6216)) by [Lesmiscore](https://github.com/Lesmiscore) -- anchorfm - - [Add episode extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ad59ff2ded208bf33f6fe07299a3449eadccdc) ([#6092](https://github.com/yt-dlp/yt-dlp/issues/6092)) by [bashonly](https://github.com/bashonly), [HobbyistDev](https://github.com/HobbyistDev) -- bfmtv - - [Support `rmc` prefix](https://github.com/yt-dlp/yt-dlp/commit/20266508dd6247dd3cf0e97b9b9f14c3afc046db) ([#6025](https://github.com/yt-dlp/yt-dlp/issues/6025)) by [carusocr](https://github.com/carusocr) -- biliintl - - [Add intro and ending chapters](https://github.com/yt-dlp/yt-dlp/commit/0ba87dd279d3565ed93c559cf7880ad61eb83af8) ([#6018](https://github.com/yt-dlp/yt-dlp/issues/6018)) by [HobbyistDev](https://github.com/HobbyistDev) -- boxcast - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/9acca71237f42a4775008e51fe26e42f0a39c552) ([#5983](https://github.com/yt-dlp/yt-dlp/issues/5983)) by [HobbyistDev](https://github.com/HobbyistDev) -- clyp - - [Support `wav`](https://github.com/yt-dlp/yt-dlp/commit/cc13293c2819b5461be211a9729fd02bb1e2f476) ([#6102](https://github.com/yt-dlp/yt-dlp/issues/6102)) by [qulaz](https://github.com/qulaz) -- crunchyroll - - [Add intro chapter](https://github.com/yt-dlp/yt-dlp/commit/93abb7406b95793f6872d12979b91d5f336b4f43) ([#6023](https://github.com/yt-dlp/yt-dlp/issues/6023)) by [ByteDream](https://github.com/ByteDream) - - [Better message for premium videos](https://github.com/yt-dlp/yt-dlp/commit/44699d10dc8de9c6a338f4a8e5c63506ec4d2118) by [pukkandan](https://github.com/pukkandan) - - [Fix incorrect premium-only error](https://github.com/yt-dlp/yt-dlp/commit/c9d14bd22ab31e2a41f9f8061843668a06db583b) by [Grub4K](https://github.com/Grub4K) -- drtv - - [Fix bug in ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/7481998b169b2a52049fc33bff82034d6563ead4) ([#6034](https://github.com/yt-dlp/yt-dlp/issues/6034)) by [bashonly](https://github.com/bashonly) -- ebay - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/da880559a6ecbbf374cc9f3378e696b55b9599af) ([#6170](https://github.com/yt-dlp/yt-dlp/issues/6170)) by [JChris246](https://github.com/JChris246) -- embedly - - [Embedded links may be for other extractors](https://github.com/yt-dlp/yt-dlp/commit/87ebab0615b1bf9b14b478b055e7059d630b4833) by [pukkandan](https://github.com/pukkandan) -- freesound - - [Workaround invalid URL in webpage](https://github.com/yt-dlp/yt-dlp/commit/9cfdbcbf3f17be51f5b6bb9bb6d880b2f3d67362) ([#6147](https://github.com/yt-dlp/yt-dlp/issues/6147)) by [rebane2001](https://github.com/rebane2001) -- generic - - [Avoid catastrophic backtracking in KVS regex](https://github.com/yt-dlp/yt-dlp/commit/8aa0bd5d10627ece3c1815c01d02fb8bf22847a7) by [bashonly](https://github.com/bashonly) -- goplay - - [Use new API](https://github.com/yt-dlp/yt-dlp/commit/d27bde98832e3b7ffb39f3cf6346011b97bb3bc3) ([#6151](https://github.com/yt-dlp/yt-dlp/issues/6151)) by [jeroenj](https://github.com/jeroenj) -- hidive - - [Fix subtitles and age-restriction](https://github.com/yt-dlp/yt-dlp/commit/7708df8da05c94270b43e0630e4e20f6d2d62c55) ([#5828](https://github.com/yt-dlp/yt-dlp/issues/5828)) by [chexxor](https://github.com/chexxor) -- huya - - [Support HD streams](https://github.com/yt-dlp/yt-dlp/commit/fbbb5508ea98ed8709847f5ecced7d70ff05e0ee) ([#6172](https://github.com/yt-dlp/yt-dlp/issues/6172)) by [felixonmars](https://github.com/felixonmars) -- hypergryph - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/31c279a2a2c2ef402a9e6dad9992b310d16439a6) ([#6094](https://github.com/yt-dlp/yt-dlp/issues/6094)) by [bashonly](https://github.com/bashonly), [HobbyistDev](https://github.com/HobbyistDev) -- moviepilot - - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c62e64cf0122e52fa2175dd1b004ca6b8e1d82af) ([#5954](https://github.com/yt-dlp/yt-dlp/issues/5954)) by [panatexxa](https://github.com/panatexxa) -- nbc - - [Fix XML parsing](https://github.com/yt-dlp/yt-dlp/commit/176a068cde4f2d9dfa0336168caead0b1edcb8ac) by [bashonly](https://github.com/bashonly) - - [Fix `NBC` and `NBCStations` extractors](https://github.com/yt-dlp/yt-dlp/commit/cb73b8460c3ce6d37ab651a4e44bb23b10056154) ([#6033](https://github.com/yt-dlp/yt-dlp/issues/6033)) by [bashonly](https://github.com/bashonly) -- nebula - - [Remove broken cookie support](https://github.com/yt-dlp/yt-dlp/commit/d50ea3ce5abc3b0defc0e5d1e22b22ce9b01b07b) ([#5979](https://github.com/yt-dlp/yt-dlp/issues/5979)) by [hheimbuerger](https://github.com/hheimbuerger) -- nfl - - [Add `NFLPlus` extractors](https://github.com/yt-dlp/yt-dlp/commit/8b37c58f8b5494504acdb5ebe3f8bbd26230f725) ([#6222](https://github.com/yt-dlp/yt-dlp/issues/6222)) by [bashonly](https://github.com/bashonly) -- niconico - - [Add support for like history](https://github.com/yt-dlp/yt-dlp/commit/3b161265add30613bde2e46fca214fe94d09e651) ([#5705](https://github.com/yt-dlp/yt-dlp/issues/5705)) by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) -- nitter - - [Update instance list](https://github.com/yt-dlp/yt-dlp/commit/a9189510baadf0dccd2d4d363bc6f3a441128bb0) ([#6236](https://github.com/yt-dlp/yt-dlp/issues/6236)) by [OIRNOIR](https://github.com/OIRNOIR) -- npo - - [Fix extractor and add HD support](https://github.com/yt-dlp/yt-dlp/commit/cc2389c8ac72a514d4e002a0f6ca5a7d65c7eff0) ([#6155](https://github.com/yt-dlp/yt-dlp/issues/6155)) by [seproDev](https://github.com/seproDev) -- nzonscreen - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d3bb187f01e1e30db05e639fc23a2e1935d777fe) ([#6208](https://github.com/yt-dlp/yt-dlp/issues/6208)) by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) -- odkmedia - - [Add `OnDemandChinaEpisodeIE`](https://github.com/yt-dlp/yt-dlp/commit/10fd9e6ee833c88edf6c633f864f42843a708d32) ([#6116](https://github.com/yt-dlp/yt-dlp/issues/6116)) by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) -- pornez - - [Handle relative URLs in iframe](https://github.com/yt-dlp/yt-dlp/commit/f7efe6dc958eb0689cb9534ff0b4e592040be8df) ([#6171](https://github.com/yt-dlp/yt-dlp/issues/6171)) by [JChris246](https://github.com/JChris246) -- radiko - - [Fix format sorting for Time Free](https://github.com/yt-dlp/yt-dlp/commit/203a06f8554df6db07d8f20f465ecbfe8a14e591) ([#6159](https://github.com/yt-dlp/yt-dlp/issues/6159)) by [road-master](https://github.com/road-master) -- rcs - - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c6b657867ad68af6b930ed0aa11ec5d93ee187b7) ([#5700](https://github.com/yt-dlp/yt-dlp/issues/5700)) by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) -- reddit - - [Support user posts](https://github.com/yt-dlp/yt-dlp/commit/c77df98b1a477a020a57141464d10c0f4d0fdbc9) ([#6173](https://github.com/yt-dlp/yt-dlp/issues/6173)) by [OMEGARAZER](https://github.com/OMEGARAZER) -- rozhlas - - [Add extractor RozhlasVltavaIE](https://github.com/yt-dlp/yt-dlp/commit/355d781bed497cbcb254bf2a2737b83fa51c84ea) ([#5951](https://github.com/yt-dlp/yt-dlp/issues/5951)) by [amra](https://github.com/amra) -- rumble - - [Fix format sorting](https://github.com/yt-dlp/yt-dlp/commit/acacb57c7e173b93c6e0f0c43e61b9b2912719d8) by [pukkandan](https://github.com/pukkandan) -- servus - - [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/f40e32fb1ac67be5bdbc8e32a3c235abfc4be260) ([#6036](https://github.com/yt-dlp/yt-dlp/issues/6036)) by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) -- slideslive - - [Fix slides and chapters/duration](https://github.com/yt-dlp/yt-dlp/commit/5ab3534d44231f7711398bc3cfc520e2efd09f50) ([#6024](https://github.com/yt-dlp/yt-dlp/issues/6024)) by [bashonly](https://github.com/bashonly) -- sportdeutschland - - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5e1a54f63e393c218a40949012ff0de0ce63cb15) ([#6041](https://github.com/yt-dlp/yt-dlp/issues/6041)) by [FriedrichRehren](https://github.com/FriedrichRehren) -- stripchat - - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d5f919bad07017f4b39b55725491b1e9717d47a) ([#5985](https://github.com/yt-dlp/yt-dlp/issues/5985)) by [bashonly](https://github.com/bashonly), [JChris246](https://github.com/JChris246) -- tempo - - [Add IVXPlayer extractor](https://github.com/yt-dlp/yt-dlp/commit/30031be974d210f451100339699ef03b0ddb5f10) ([#5837](https://github.com/yt-dlp/yt-dlp/issues/5837)) by [HobbyistDev](https://github.com/HobbyistDev) -- tnaflix - - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/989f47b6315541989bb507f26b431d9586430995) ([#6086](https://github.com/yt-dlp/yt-dlp/issues/6086)) by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) -- tvp - - [Support `stream.tvp.pl`](https://github.com/yt-dlp/yt-dlp/commit/a31d0fa6c315b1145d682361149003d98f1e3782) ([#6139](https://github.com/yt-dlp/yt-dlp/issues/6139)) by [selfisekai](https://github.com/selfisekai) -- twitter - - [Fix `--no-playlist` and add media `view_count` when using GraphQL](https://github.com/yt-dlp/yt-dlp/commit/b6795fd310f1dd61dddc9fd08e52fe485bdc8a3e) ([#6211](https://github.com/yt-dlp/yt-dlp/issues/6211)) by [Grub4K](https://github.com/Grub4K) - - [Fix graphql extraction on some tweets](https://github.com/yt-dlp/yt-dlp/commit/7543c9c99bcb116b085fdb1f41b84a0ead04c05d) ([#6075](https://github.com/yt-dlp/yt-dlp/issues/6075)) by [selfisekai](https://github.com/selfisekai) -- txxx - - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/389896df85ed14eaf74f72531da6c4491d6b73b0) ([#5240](https://github.com/yt-dlp/yt-dlp/issues/5240)) by [chio0hai](https://github.com/chio0hai) -- vimeo - - [Fix `playerConfig` extraction](https://github.com/yt-dlp/yt-dlp/commit/c0cd13fb1c71b842c3d272d0273c03542b467766) ([#6203](https://github.com/yt-dlp/yt-dlp/issues/6203)) by [bashonly](https://github.com/bashonly), [LeoniePhiline](https://github.com/LeoniePhiline) -- viu - - [Add `ViuOTTIndonesiaIE` extractor](https://github.com/yt-dlp/yt-dlp/commit/72671a212d7c939329cb5d34335fa089dd3acbd3) ([#6099](https://github.com/yt-dlp/yt-dlp/issues/6099)) by [HobbyistDev](https://github.com/HobbyistDev) -- vk - - [Fix playlists for new API](https://github.com/yt-dlp/yt-dlp/commit/a9c685453f7019bee94170f936619c6db76c964e) ([#6122](https://github.com/yt-dlp/yt-dlp/issues/6122)) by [the-marenga](https://github.com/the-marenga) -- vlive - - [Replace with `VLiveWebArchiveIE`](https://github.com/yt-dlp/yt-dlp/commit/b3eaab7ca2e118d4db73dcb44afd9c8717db8b67) ([#6196](https://github.com/yt-dlp/yt-dlp/issues/6196)) by [seproDev](https://github.com/seproDev) -- vocaroo - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e4a8b1769e19755acba6d8f212208359905a3159) ([#6117](https://github.com/yt-dlp/yt-dlp/issues/6117)) by [qbnu](https://github.com/qbnu), [SuperSonicHub1](https://github.com/SuperSonicHub1) -- wrestleuniverse - - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/e61acb40b2cb6ef45508d72235026d458c9d5dff) ([#6158](https://github.com/yt-dlp/yt-dlp/issues/6158)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) -- ximalaya - - [Update album `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/417cdaae08fc447c9d15c53a88e2e9a027cdbf0a) ([#6110](https://github.com/yt-dlp/yt-dlp/issues/6110)) by [carusocr](https://github.com/carusocr) -- yappy - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/361630015535026712bdb67f804a15b65ff9ee7e) ([#6111](https://github.com/yt-dlp/yt-dlp/issues/6111)) by [HobbyistDev](https://github.com/HobbyistDev) -- youtube - - [Add hyperpipe instances](https://github.com/yt-dlp/yt-dlp/commit/78a78fa74dbc888d20f1b65e1382bf99131597d5) ([#6020](https://github.com/yt-dlp/yt-dlp/issues/6020)) by [Generator](https://github.com/Generator) - - [Fix `uploader_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/149eb0bbf34fa8fdf8d1e2aa28e17479d099e26b) by [bashonly](https://github.com/bashonly) - - [Handle `consent.youtube`](https://github.com/yt-dlp/yt-dlp/commit/b032ff0f032512bd6fc70c9c1994d906eacc06cb) by [pukkandan](https://github.com/pukkandan) - - [Support `/live/` URL](https://github.com/yt-dlp/yt-dlp/commit/dad2210c0cb9cf03702a9511817ee5ec646d7bc8) by [pukkandan](https://github.com/pukkandan) - - [Update invidious and piped instances](https://github.com/yt-dlp/yt-dlp/commit/05799a48c7dec12b34c8bf951c8d2eceedda59f8) ([#6030](https://github.com/yt-dlp/yt-dlp/issues/6030)) by [rohieb](https://github.com/rohieb) - - [`uploader_id` includes `@` with handle](https://github.com/yt-dlp/yt-dlp/commit/c61cf091a54d3aa3c611722035ccde5ecfe981bb) by [bashonly](https://github.com/bashonly) -- zdf - - [Use android API endpoint for UHD downloads](https://github.com/yt-dlp/yt-dlp/commit/0fe87a8730638490415d630f48e61d264d89c358) ([#6150](https://github.com/yt-dlp/yt-dlp/issues/6150)) by [seproDev](https://github.com/seproDev) -### Downloader changes -- hls - - [Allow extractors to provide AES key](https://github.com/yt-dlp/yt-dlp/commit/7e68567e508168b345266c0c19812ad50a829eaa) ([#6158](https://github.com/yt-dlp/yt-dlp/issues/6158)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) -### Postprocessor changes -- extractaudio - - [Handle outtmpl without ext](https://github.com/yt-dlp/yt-dlp/commit/f737fb16d8234408c85bc189ccc926fea000515b) ([#6005](https://github.com/yt-dlp/yt-dlp/issues/6005)) by [carusocr](https://github.com/carusocr) -- pyinst - - [Fix for pyinstaller 5.8](https://github.com/yt-dlp/yt-dlp/commit/2e269bd998c61efaf7500907d114a56e5e83e65e) by [pukkandan](https://github.com/pukkandan) -### Misc. changes -- build - - [Update pyinstaller](https://github.com/yt-dlp/yt-dlp/commit/365b9006051ac7d735c20bb63c4907b758233048) by [pukkandan](https://github.com/pukkandan) -- cleanup - - Miscellaneous: [76c9c52](https://github.com/yt-dlp/yt-dlp/commit/76c9c523071150053df7b56956646b680b6a6e05) by [pukkandan](https://github.com/pukkandan) -- devscripts - - [Provide pyinstaller hooks](https://github.com/yt-dlp/yt-dlp/commit/acb1042a9ffa8769fe691beac1011d6da1fcf321) by [pukkandan](https://github.com/pukkandan) -- pyinstaller - - [Analyze sub-modules of `Cryptodome`](https://github.com/yt-dlp/yt-dlp/commit/b85faf6ffb700058e774e99c04304a7a9257cdd0) by [pukkandan](https://github.com/pukkandan) - ### 2023.01.06 * Fix config locations by [Grub4k](https://github.com/Grub4k), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) From 18d295c9e0f95adc179eef345b7af64d6372db78 Mon Sep 17 00:00:00 2001 From: Zhong Lufan Date: Fri, 17 Feb 2023 21:11:16 +0800 Subject: [PATCH 2006/2552] [extractor/tencent] Add more formats and info (#5950) Authored by: Hill-98 --- yt_dlp/extractor/tencent.py | 88 ++++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index 44cae0472..42a2175b0 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -8,6 +8,7 @@ from .common import InfoExtractor from ..aes import aes_cbc_encrypt_bytes from ..utils import ( ExtractorError, + float_or_none, determine_ext, int_or_none, js_to_json, @@ -19,6 +20,16 @@ from ..utils import ( class TencentBaseIE(InfoExtractor): """Subclasses must set _API_URL, _APP_VERSION, _PLATFORM, _HOST, _REFERER""" + def _check_api_response(self, api_response): + msg = api_response.get('msg') + if api_response.get('code') != '0.0' and msg is not None: + if msg in ( + '您所在区域暂无此内容版权(如设置VPN请关闭后重试)', + 'This content is not available in your area due to copyright restrictions. Please choose other videos.' + ): + self.raise_geo_restricted() + raise ExtractorError(f'Tencent said: {msg}') + def _get_ckey(self, video_id, url, guid): ua = self.get_param('http_headers')['User-Agent'] @@ -47,6 +58,11 @@ class TencentBaseIE(InfoExtractor): 'sphttps': '1', # Enable HTTPS 'otype': 'json', 'spwm': '1', + 'hevclv': '28', # Enable HEVC + 'drm': '40', # Enable DRM + # For HDR + 'spvideo': '4', + 'spsfrhdr': '100', # For SHD 'host': self._HOST, 'referer': self._REFERER, @@ -63,7 +79,6 @@ class TencentBaseIE(InfoExtractor): def _extract_video_formats_and_subtitles(self, api_response, video_id): video_response = api_response['vl']['vi'][0] - video_width, video_height = video_response.get('vw'), video_response.get('vh') formats, subtitles = [], {} for video_format in video_response['ul']['ui']: @@ -71,47 +86,61 @@ class TencentBaseIE(InfoExtractor): fmts, subs = self._extract_m3u8_formats_and_subtitles( video_format['url'] + traverse_obj(video_format, ('hls', 'pt'), default=''), video_id, 'mp4', fatal=False) - for f in fmts: - f.update({'width': video_width, 'height': video_height}) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) else: formats.append({ 'url': f'{video_format["url"]}{video_response["fn"]}?vkey={video_response["fvkey"]}', - 'width': video_width, - 'height': video_height, 'ext': 'mp4', }) + identifier = video_response.get('br') + format_response = traverse_obj( + api_response, ('fl', 'fi', lambda _, v: v['br'] == identifier), + expected_type=dict, get_all=False) or {} + common_info = { + 'width': video_response.get('vw'), + 'height': video_response.get('vh'), + 'abr': float_or_none(format_response.get('audiobandwidth'), scale=1000), + 'vbr': float_or_none(format_response.get('bandwidth'), scale=1000), + 'fps': format_response.get('vfps'), + 'format': format_response.get('sname'), + 'format_id': format_response.get('name'), + 'format_note': format_response.get('resolution'), + 'dynamic_range': {'hdr10': 'hdr10'}.get(format_response.get('name'), 'sdr'), + 'has_drm': format_response.get('drm', 0) != 0, + } + for f in formats: + f.update(common_info) + return formats, subtitles - def _extract_video_native_subtitles(self, api_response, subtitles_format): + def _extract_video_native_subtitles(self, api_response): subtitles = {} for subtitle in traverse_obj(api_response, ('sfl', 'fi')) or (): subtitles.setdefault(subtitle['lang'].lower(), []).append({ 'url': subtitle['url'], - 'ext': subtitles_format, + 'ext': 'srt' if subtitle.get('captionType') == 1 else 'vtt', 'protocol': 'm3u8_native' if determine_ext(subtitle['url']) == 'm3u8' else 'http', }) return subtitles def _extract_all_video_formats_and_subtitles(self, url, video_id, series_id): - formats, subtitles = [], {} - for video_format, subtitle_format, video_quality in ( - # '': 480p, 'shd': 720p, 'fhd': 1080p - ('mp4', 'srt', ''), ('hls', 'vtt', 'shd'), ('hls', 'vtt', 'fhd')): - api_response = self._get_video_api_response( - url, video_id, series_id, subtitle_format, video_format, video_quality) - - if api_response.get('em') != 0 and api_response.get('exem') != 0: - if '您所在区域暂无此内容版权' in api_response.get('msg'): - self.raise_geo_restricted() - raise ExtractorError(f'Tencent said: {api_response.get("msg")}') + api_responses = [self._get_video_api_response(url, video_id, series_id, 'srt', 'hls', 'hd')] + self._check_api_response(api_responses[0]) + qualities = traverse_obj(api_responses, (0, 'fl', 'fi', ..., 'name')) or ('shd', 'fhd') + for q in qualities: + if q not in ('ld', 'sd', 'hd'): + api_responses.append(self._get_video_api_response( + url, video_id, series_id, 'vtt', 'hls', q)) + self._check_api_response(api_responses[-1]) + formats, subtitles = [], {} + for api_response in api_responses: fmts, subs = self._extract_video_formats_and_subtitles(api_response, video_id) - native_subtitles = self._extract_video_native_subtitles(api_response, subtitle_format) + native_subtitles = self._extract_video_native_subtitles(api_response) formats.extend(fmts) self._merge_subtitles(subs, native_subtitles, target=subtitles) @@ -120,7 +149,7 @@ class TencentBaseIE(InfoExtractor): def _get_clean_title(self, title): return re.sub( - r'\s*[_\-]\s*(?:Watch online|腾讯视频|(?:高清)?1080P在线观看平台).*?$', + r'\s*[_\-]\s*(?:Watch online|Watch HD Video Online|WeTV|腾讯视频|(?:高清)?1080P在线观看平台).*?$', '', title or '').strip() or None @@ -147,27 +176,29 @@ class VQQVideoIE(VQQBaseIE): _TESTS = [{ 'url': 'https://v.qq.com/x/page/q326831cny0.html', - 'md5': '826ef93682df09e3deac4a6e6e8cdb6e', + 'md5': '84568b3722e15e9cd023b5594558c4a7', 'info_dict': { 'id': 'q326831cny0', 'ext': 'mp4', 'title': '我是选手:雷霆裂阵,终极时刻', 'description': 'md5:e7ed70be89244017dac2a835a10aeb1e', 'thumbnail': r're:^https?://[^?#]+q326831cny0', + 'format_id': r're:^shd', }, }, { 'url': 'https://v.qq.com/x/page/o3013za7cse.html', - 'md5': 'b91cbbeada22ef8cc4b06df53e36fa21', + 'md5': 'cc431c4f9114a55643893c2c8ebf5592', 'info_dict': { 'id': 'o3013za7cse', 'ext': 'mp4', 'title': '欧阳娜娜VLOG', 'description': 'md5:29fe847497a98e04a8c3826e499edd2e', 'thumbnail': r're:^https?://[^?#]+o3013za7cse', + 'format_id': r're:^shd', }, }, { 'url': 'https://v.qq.com/x/cover/7ce5noezvafma27/a00269ix3l8.html', - 'md5': '71459c5375c617c265a22f083facce67', + 'md5': '87968df6238a65d2478f19c25adf850b', 'info_dict': { 'id': 'a00269ix3l8', 'ext': 'mp4', @@ -175,10 +206,11 @@ class VQQVideoIE(VQQBaseIE): 'description': 'md5:8cae3534327315b3872fbef5e51b5c5b', 'thumbnail': r're:^https?://[^?#]+7ce5noezvafma27', 'series': '鸡毛飞上天', + 'format_id': r're:^shd', }, }, { 'url': 'https://v.qq.com/x/cover/mzc00200p29k31e/s0043cwsgj0.html', - 'md5': '96b9fd4a189fdd4078c111f21d7ac1bc', + 'md5': 'fadd10bf88aec3420f06f19ee1d24c5b', 'info_dict': { 'id': 's0043cwsgj0', 'ext': 'mp4', @@ -186,6 +218,7 @@ class VQQVideoIE(VQQBaseIE): 'description': 'md5:1d8c3a0b8729ae3827fa5b2d3ebd5213', 'thumbnail': r're:^https?://[^?#]+s0043cwsgj0', 'series': '青年理工工作者生活研究所', + 'format_id': r're:^shd', }, }, { # Geo-restricted to China @@ -319,6 +352,7 @@ class WeTvEpisodeIE(WeTvBaseIE): 'episode': 'Episode 1', 'episode_number': 1, 'duration': 2835, + 'format_id': r're:^shd', }, }, { 'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu/p0039b9nvik', @@ -333,6 +367,7 @@ class WeTvEpisodeIE(WeTvBaseIE): 'episode': 'Episode 1', 'episode_number': 1, 'duration': 2454, + 'format_id': r're:^shd', }, }, { 'url': 'https://wetv.vip/en/play/lcxgwod5hapghvw-WeTV-PICK-A-BOO/i0042y00lxp-Zhao-Lusi-Describes-The-First-Experiences-She-Had-In-Who-Rules-The-World-%7C-WeTV-PICK-A-BOO', @@ -342,11 +377,12 @@ class WeTvEpisodeIE(WeTvBaseIE): 'ext': 'mp4', 'title': 'md5:f7a0857dbe5fbbe2e7ad630b92b54e6a', 'description': 'md5:76260cb9cdc0ef76826d7ca9d92fadfa', - 'thumbnail': r're:^https?://[^?#]+lcxgwod5hapghvw', + 'thumbnail': r're:^https?://[^?#]+i0042y00lxp', 'series': 'WeTV PICK-A-BOO', 'episode': 'Episode 0', 'episode_number': 0, 'duration': 442, + 'format_id': r're:^shd', }, }] @@ -406,6 +442,7 @@ class IflixEpisodeIE(IflixBaseIE): 'episode': 'Episode 1', 'episode_number': 1, 'duration': 2639, + 'format_id': r're:^shd', }, }, { 'url': 'https://www.iflix.com/en/play/fvvrcc3ra9lbtt1-Take-My-Brother-Away/i0029sd3gm1-EP1%EF%BC%9ATake-My-Brother-Away', @@ -420,6 +457,7 @@ class IflixEpisodeIE(IflixBaseIE): 'episode': 'Episode 1', 'episode_number': 1, 'duration': 228, + 'format_id': r're:^shd', }, }] From da8e2912b165005f76779a115a071cd6132ceedf Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Thu, 23 Feb 2023 04:18:45 +0100 Subject: [PATCH 2007/2552] [utils] `Popen`: Shim undocumented `text_mode` property Fixes #6317 Authored by: Grub4K --- yt_dlp/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 994239897..4fe718bf0 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -879,6 +879,7 @@ class Popen(subprocess.Popen): env = os.environ.copy() self._fix_pyinstaller_ld_path(env) + self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines') if text is True: kwargs['universal_newlines'] = True # For 3.6 compatibility kwargs.setdefault('encoding', 'utf-8') @@ -900,7 +901,7 @@ class Popen(subprocess.Popen): @classmethod def run(cls, *args, timeout=None, **kwargs): with cls(*args, **kwargs) as proc: - default = '' if proc.text_mode else b'' + default = '' if proc.__text_mode else b'' stdout, stderr = proc.communicate_or_kill(timeout=timeout) return stdout or default, stderr or default, proc.returncode From cc09083636ce21e58ff74f45eac2dbda507462b0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Feb 2023 10:39:43 +0530 Subject: [PATCH 2008/2552] [utils] `LenientJSONDecoder`: Parse unclosed objects --- yt_dlp/utils.py | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4fe718bf0..9ff096433 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -593,21 +593,43 @@ def clean_html(html): class LenientJSONDecoder(json.JSONDecoder): - def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs): + # TODO: Write tests + def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs): self.transform_source, self.ignore_extra = transform_source, ignore_extra + self._close_attempts = 2 * close_objects super().__init__(*args, **kwargs) + @staticmethod + def _close_object(err): + doc = err.doc[:err.pos] + # We need to add comma first to get the correct error message + if err.msg.startswith('Expecting \',\''): + return doc + ',' + elif not doc.endswith(','): + return + + if err.msg.startswith('Expecting property name'): + return doc[:-1] + '}' + elif err.msg.startswith('Expecting value'): + return doc[:-1] + ']' + def decode(self, s): if self.transform_source: s = self.transform_source(s) - try: - if self.ignore_extra: - return self.raw_decode(s.lstrip())[0] - return super().decode(s) - except json.JSONDecodeError as e: - if e.pos is not None: + for attempt in range(self._close_attempts + 1): + try: + if self.ignore_extra: + return self.raw_decode(s.lstrip())[0] + return super().decode(s) + except json.JSONDecodeError as e: + if e.pos is None: + raise + elif attempt < self._close_attempts: + s = self._close_object(e) + if s is not None: + continue raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos) - raise + assert False, 'Too many attempts to decode JSON' def sanitize_open(filename, open_mode): From 43a3eaf96393b712d60cbcf5c6cb1e90ed7f42f5 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 26 Feb 2023 10:16:30 +0530 Subject: [PATCH 2009/2552] [extractor] Fix DRM detection in m3u8 Fixes https://github.com/ytdl-org/youtube-dl/issues/31693#issuecomment-1445202857 --- yt_dlp/extractor/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ebacc87bc..86bef173f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2063,6 +2063,7 @@ class InfoExtractor: 'protocol': entry_protocol, 'preference': preference, 'quality': quality, + 'has_drm': has_drm, 'vcodec': 'none' if media_type == 'AUDIO' else None, } for idx in _extract_m3u8_playlist_indices(manifest_url)) @@ -2122,6 +2123,7 @@ class InfoExtractor: 'protocol': entry_protocol, 'preference': preference, 'quality': quality, + 'has_drm': has_drm, } resolution = last_stream_inf.get('RESOLUTION') if resolution: From 8e9fe43cd393e69fa49b3d842aa3180c1d105b8f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 26 Feb 2023 10:27:04 +0530 Subject: [PATCH 2010/2552] [extractor/generic] Handle basic-auth when checking redirects Closes #6352 --- yt_dlp/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 55e55d524..d76ef3e31 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -15,6 +15,7 @@ from ..utils import ( UnsupportedError, determine_ext, dict_get, + extract_basic_auth, format_field, int_or_none, is_html, @@ -2372,9 +2373,8 @@ class GenericIE(InfoExtractor): **smuggled_data.get('http_headers', {}) }) new_url = full_response.geturl() - if new_url == urllib.parse.urlparse(url)._replace(scheme='https').geturl(): - url = new_url - elif url != new_url: + url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl() + if new_url != extract_basic_auth(url)[0]: self.report_following_redirect(new_url) if force_videoid: new_url = smuggle_url(new_url, {'force_videoid': force_videoid}) From 4d248e29d20d983ededab0b03d4fe69dff9eb4ed Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:09:20 +0530 Subject: [PATCH 2011/2552] [extractor/GoogleDrive] Fix some audio Only those with source url, but no confirmation page --- yt_dlp/extractor/googledrive.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py index e027ea7c4..9e2ccde00 100644 --- a/yt_dlp/extractor/googledrive.py +++ b/yt_dlp/extractor/googledrive.py @@ -3,8 +3,8 @@ import re from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ( - determine_ext, ExtractorError, + determine_ext, get_element_by_class, int_or_none, lowercase_escape, @@ -163,15 +163,13 @@ class GoogleDriveIE(InfoExtractor): video_id = self._match_id(url) video_info = compat_parse_qs(self._download_webpage( 'https://drive.google.com/get_video_info', - video_id, query={'docid': video_id})) + video_id, 'Downloading video webpage', query={'docid': video_id})) def get_value(key): return try_get(video_info, lambda x: x[key][0]) reason = get_value('reason') title = get_value('title') - if not title and reason: - raise ExtractorError(reason, expected=True) formats = [] fmt_stream_map = (get_value('fmt_stream_map') or '').split(',') @@ -216,6 +214,11 @@ class GoogleDriveIE(InfoExtractor): urlh = request_source_file(source_url, 'source') if urlh: def add_source_format(urlh): + nonlocal title + if not title: + title = self._search_regex( + r'\bfilename="([^"]+)"', urlh.headers.get('Content-Disposition'), + 'title', default=None) formats.append({ # Use redirect URLs as download URLs in order to calculate # correct cookies in _calc_cookies. @@ -251,7 +254,10 @@ class GoogleDriveIE(InfoExtractor): or 'unable to extract confirmation code') if not formats and reason: - self.raise_no_formats(reason, expected=True) + if title: + self.raise_no_formats(reason, expected=True) + else: + raise ExtractorError(reason, expected=True) hl = get_value('hl') subtitles_id = None From 5038f6d713303e0967d002216e7a88652401c22a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:03:44 +0530 Subject: [PATCH 2012/2552] [extractor/youtube] Construct dash formats with `range` query Closes #6369 --- yt_dlp/extractor/youtube.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index be82bc689..0227a1f83 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3776,10 +3776,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if no_video: dct['abr'] = tbr if no_audio or no_video: - dct['downloader_options'] = { - # Youtube throttles chunks >~10M - 'http_chunk_size': 10485760, - } + CHUNK_SIZE = 10 << 20 + dct.update({ + 'request_data': b'x', + 'protocol': 'http_dash_segments', + 'fragments': [{ + 'url': update_url_query(dct['url'], { + 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, dct["filesize"])}' + }) + } for range_start in range(0, dct['filesize'], CHUNK_SIZE)] + } if dct['filesize'] else { + 'downloader_options': {'http_chunk_size': CHUNK_SIZE} # No longer useful? + }) + if dct.get('ext'): dct['container'] = dct['ext'] + '_dash' From b059188383eee4fa336ef728dda3ff4bb7335625 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 22:32:20 +0530 Subject: [PATCH 2013/2552] [plugins] Don't look in `.egg` directories Closes #6306 --- yt_dlp/plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/plugins.py b/yt_dlp/plugins.py index 6eecdb4d0..6422c7a51 100644 --- a/yt_dlp/plugins.py +++ b/yt_dlp/plugins.py @@ -88,7 +88,7 @@ class PluginFinder(importlib.abc.MetaPathFinder): candidate = path / parts if candidate.is_dir(): yield candidate - elif path.suffix in ('.zip', '.egg', '.whl'): + elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file(): if parts in dirs_in_zip(path): yield candidate From 65f6e807804d2af5e00f2aecd72bfc43af19324a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:10:54 +0530 Subject: [PATCH 2014/2552] [dependencies] Simplify `Cryptodome` Closes #6292, closes #6272, closes #6338 --- test/test_aes.py | 4 +-- yt_dlp/__pyinstaller/hook-yt_dlp.py | 28 +---------------- yt_dlp/aes.py | 6 ++-- yt_dlp/compat/_legacy.py | 2 +- yt_dlp/compat/compat_utils.py | 2 +- yt_dlp/dependencies/Cryptodome.py | 48 +++++++++++++++++++---------- yt_dlp/dependencies/__init__.py | 2 +- yt_dlp/downloader/hls.py | 2 +- yt_dlp/extractor/bilibili.py | 6 ++-- yt_dlp/extractor/ivi.py | 8 ++--- yt_dlp/extractor/wrestleuniverse.py | 6 ++-- 11 files changed, 51 insertions(+), 63 deletions(-) diff --git a/test/test_aes.py b/test/test_aes.py index 18f15fecb..a26abfd7d 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -48,7 +48,7 @@ class TestAES(unittest.TestCase): data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) - if Cryptodome: + if Cryptodome.AES: decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) @@ -78,7 +78,7 @@ class TestAES(unittest.TestCase): decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) - if Cryptodome: + if Cryptodome.AES: decrypted = aes_gcm_decrypt_and_verify_bytes( data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py index 057cfef2f..63dcdffe0 100644 --- a/yt_dlp/__pyinstaller/hook-yt_dlp.py +++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py @@ -1,30 +1,8 @@ -import ast -import os import sys -from pathlib import Path from PyInstaller.utils.hooks import collect_submodules -def find_attribute_accesses(node, name, path=()): - if isinstance(node, ast.Attribute): - path = [*path, node.attr] - if isinstance(node.value, ast.Name) and node.value.id == name: - yield path[::-1] - for child in ast.iter_child_nodes(node): - yield from find_attribute_accesses(child, name, path) - - -def collect_used_submodules(name, level): - for dirpath, _, filenames in os.walk(Path(__file__).parent.parent): - for filename in filenames: - if not filename.endswith('.py'): - continue - with open(Path(dirpath) / filename, encoding='utf8') as f: - for submodule in find_attribute_accesses(ast.parse(f.read()), name): - yield '.'.join(submodule[:level]) - - def pycryptodome_module(): try: import Cryptodome # noqa: F401 @@ -41,12 +19,8 @@ def pycryptodome_module(): def get_hidden_imports(): yield 'yt_dlp.compat._legacy' + yield pycryptodome_module() yield from collect_submodules('websockets') - - crypto = pycryptodome_module() - for sm in set(collect_used_submodules('Cryptodome', 2)): - yield f'{crypto}.{sm}' - # These are auto-detected, but explicitly add them just in case yield from ('mutagen', 'brotli', 'certifi') diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index deff0a2b3..b3a383cd9 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -5,14 +5,14 @@ from .compat import compat_ord from .dependencies import Cryptodome from .utils import bytes_to_intlist, intlist_to_bytes -if Cryptodome: +if Cryptodome.AES: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using pycryptodome """ - return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_CBC, iv).decrypt(data) + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using pycryptodome """ - return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) else: def aes_cbc_decrypt_bytes(data, key, iv): diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index 84d749209..83bf869a8 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -32,9 +32,9 @@ from re import match as compat_Match # noqa: F401 from . import compat_expanduser, compat_HTMLParseError, compat_realpath from .compat_utils import passthrough_module -from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401 from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 +from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py index 8956b3bf1..3ca46d270 100644 --- a/yt_dlp/compat/compat_utils.py +++ b/yt_dlp/compat/compat_utils.py @@ -48,7 +48,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la """Passthrough parent module into a child module, creating the parent if necessary""" def __getattr__(attr): if _is_package(parent): - with contextlib.suppress(ImportError): + with contextlib.suppress(ModuleNotFoundError): return importlib.import_module(f'.{attr}', parent.__name__) ret = from_child(attr) diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py index 2adc51374..a50bce4d4 100644 --- a/yt_dlp/dependencies/Cryptodome.py +++ b/yt_dlp/dependencies/Cryptodome.py @@ -1,8 +1,5 @@ import types -from ..compat import functools -from ..compat.compat_utils import passthrough_module - try: import Cryptodome as _parent except ImportError: @@ -12,19 +9,36 @@ except ImportError: _parent = types.ModuleType('no_Cryptodome') __bool__ = lambda: False -passthrough_module(__name__, _parent, (..., '__version__')) -del passthrough_module +__version__ = '' +AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None +try: + if _parent.__name__ == 'Cryptodome': + from Cryptodome import __version__ + from Cryptodome.Cipher import AES + from Cryptodome.Cipher import PKCS1_v1_5 + from Cryptodome.Cipher import Blowfish + from Cryptodome.Cipher import PKCS1_OAEP + from Cryptodome.Hash import SHA1 + from Cryptodome.Hash import CMAC + from Cryptodome.PublicKey import RSA + elif _parent.__name__ == 'Crypto': + from Crypto import __version__ + from Crypto.Cipher import AES + from Crypto.Cipher import PKCS1_v1_5 + from Crypto.Cipher import Blowfish + from Crypto.Cipher import PKCS1_OAEP + from Crypto.Hash import SHA1 + from Crypto.Hash import CMAC + from Crypto.PublicKey import RSA +except ImportError: + __version__ = f'broken {__version__}'.strip() -@property -@functools.cache -def _yt_dlp__identifier(): - if _parent.__name__ == 'Crypto': - from Crypto.Cipher import AES - try: - # In pycrypto, mode defaults to ECB. See: - # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode - AES.new(b'abcdefghijklmnop') - except TypeError: - return 'pycrypto' - return _parent.__name__ +_yt_dlp__identifier = _parent.__name__ +if AES and _yt_dlp__identifier == 'Crypto': + try: + # In pycrypto, mode defaults to ECB. See: + # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode + AES.new(b'abcdefghijklmnop') + except TypeError: + _yt_dlp__identifier = 'pycrypto' diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py index c2214e6db..6e7d29c5c 100644 --- a/yt_dlp/dependencies/__init__.py +++ b/yt_dlp/dependencies/__init__.py @@ -73,7 +73,7 @@ available_dependencies = {k: v for k, v in all_dependencies.items() if v} # Deprecated -Cryptodome_AES = Cryptodome.Cipher.AES if Cryptodome else None +Cryptodome_AES = Cryptodome.AES __all__ = [ diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 29d6f6241..f2868dc52 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -70,7 +70,7 @@ class HlsFD(FragmentFD): can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download: has_ffmpeg = FFmpegFD.available() - no_crypto = not Cryptodome and '#EXT-X-KEY:METHOD=AES-128' in s + no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s if no_crypto and has_ffmpeg: can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' elif no_crypto: diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index f4180633a..2252840b3 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -894,15 +894,15 @@ class BiliIntlBaseIE(InfoExtractor): } def _perform_login(self, username, password): - if not Cryptodome: + if not Cryptodome.RSA: raise ExtractorError('pycryptodomex not found. Please install', expected=True) key_data = self._download_json( 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None, note='Downloading login key', errnote='Unable to download login key')['data'] - public_key = Cryptodome.PublicKey.RSA.importKey(key_data['key']) - password_hash = Cryptodome.Cipher.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) + public_key = Cryptodome.RSA.importKey(key_data['key']) + password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) login_post = self._download_json( 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ 'username': username, diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 96220bea9..fa5ceec95 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -91,7 +91,7 @@ class IviIE(InfoExtractor): for site in (353, 183): content_data = (data % site).encode() if site == 353: - if not Cryptodome: + if not Cryptodome.CMAC: continue timestamp = (self._download_json( @@ -105,8 +105,8 @@ class IviIE(InfoExtractor): query = { 'ts': timestamp, - 'sign': Cryptodome.Hash.CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, - Cryptodome.Cipher.Blowfish).hexdigest(), + 'sign': Cryptodome.CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, + Cryptodome.Blowfish).hexdigest(), } else: query = {} @@ -126,7 +126,7 @@ class IviIE(InfoExtractor): extractor_msg = 'Video %s does not exist' elif site == 353: continue - elif not Cryptodome: + elif not Cryptodome.CMAC: raise ExtractorError('pycryptodomex not found. Please install', expected=True) elif message: extractor_msg += ': ' + message diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py index 78e7c83ab..5c6dec2c4 100644 --- a/yt_dlp/extractor/wrestleuniverse.py +++ b/yt_dlp/extractor/wrestleuniverse.py @@ -50,10 +50,10 @@ class WrestleUniverseBaseIE(InfoExtractor): data=data, headers=headers, query=query, fatal=fatal) def _call_encrypted_api(self, video_id, param='', msg='API', data={}, query={}, fatal=True): - if not Cryptodome: + if not Cryptodome.RSA: raise ExtractorError('pycryptodomex not found. Please install', expected=True) - private_key = Cryptodome.PublicKey.RSA.generate(2048) - cipher = Cryptodome.Cipher.PKCS1_OAEP.new(private_key, hashAlgo=Cryptodome.Hash.SHA1) + private_key = Cryptodome.RSA.generate(2048) + cipher = Cryptodome.PKCS1_OAEP.new(private_key, hashAlgo=Cryptodome.SHA1) def decrypt(data): if not data: From f34804b2f920f62a6e893a14a9e2a2144b14dd23 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:34:43 +0530 Subject: [PATCH 2015/2552] [extractor/youtube] Fix 5038f6d713303e0967d002216e7a88652401c22a * [fragment] Fix `request_data` * [youtube] Don't use POST for now. It may be easier to break in future Authored by: bashonly, coletdjnz --- yt_dlp/downloader/fragment.py | 3 ++- yt_dlp/extractor/common.py | 1 + yt_dlp/extractor/youtube.py | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 039cb1492..377f138b7 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -466,7 +466,8 @@ class FragmentFD(FileDownloader): for retry in RetryManager(self.params.get('fragment_retries'), error_callback): try: ctx['fragment_count'] = fragment.get('fragment_count') - if not self._download_fragment(ctx, fragment['url'], info_dict, headers): + if not self._download_fragment( + ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')): return except (urllib.error.HTTPError, http.client.IncompleteRead) as err: retry.error = err diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 86bef173f..98efe0e9d 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -132,6 +132,7 @@ class InfoExtractor: is parsed from a string (in case of fragmented media) for MSS - URL of the ISM manifest. + * request_data Data to send in POST request to the URL * manifest_url The URL of the manifest file in case of fragmented media: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 0227a1f83..f5ffce775 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3778,7 +3778,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if no_audio or no_video: CHUNK_SIZE = 10 << 20 dct.update({ - 'request_data': b'x', 'protocol': 'http_dash_segments', 'fragments': [{ 'url': update_url_query(dct['url'], { From 31e183557fcd1b937582f9429f29207c1261f501 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:50:34 +0530 Subject: [PATCH 2016/2552] [extractor/youtube] Extract channel `view_count` when `/about` tab is passed --- yt_dlp/extractor/youtube.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f5ffce775..d1696349a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4905,6 +4905,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): info['view_count'] = self._get_count(playlist_stats, 1) if info['view_count'] is None: # 0 is allowed info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText') + if info['view_count'] is None: + info['view_count'] = self._get_count(data, ( + 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer', + 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText')) info['playlist_count'] = self._get_count(playlist_stats, 0) if info['playlist_count'] is None: # 0 is allowed @@ -6124,6 +6128,23 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): } }], 'params': {'extract_flat': True}, + }, { + 'url': 'https://www.youtube.com/@3blue1brown/about', + 'info_dict': { + 'id': 'UCYO_jab_esuFRV4b17AJtAw', + 'tags': ['Mathematics'], + 'title': '3Blue1Brown - About', + 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', + 'channel_follower_count': int, + 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', + 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', + 'channel': '3Blue1Brown', + 'uploader': '3Blue1Brown', + 'view_count': int, + 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', + 'description': 'md5:e1384e8a133307dd10edee76e875d62f', + }, + 'playlist_count': 0, }] @classmethod From 5b28cef72db3b531680d89c121631c73ae05354f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:31:02 +0530 Subject: [PATCH 2017/2552] [cleanup] Misc --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 2 + .../ISSUE_TEMPLATE/2_site_support_request.yml | 2 + .../ISSUE_TEMPLATE/3_site_feature_request.yml | 2 + .github/ISSUE_TEMPLATE/4_bug_report.yml | 2 + .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 + .github/ISSUE_TEMPLATE/6_question.yml | 2 + CONTRIBUTING.md | 2 +- Changelog.md | 4 +- README.md | 1 + devscripts/make_issue_template.py | 2 + supportedsites.md | 366 +++++++++--------- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/dependencies/Cryptodome.py | 18 +- yt_dlp/downloader/fragment.py | 2 +- yt_dlp/extractor/common.py | 2 +- yt_dlp/extractor/youtube.py | 4 +- 16 files changed, 212 insertions(+), 203 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index e1103fb84..48e8890c5 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -50,6 +50,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 90d7294ac..d43d62f03 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -62,6 +62,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 5b59852c7..352b47242 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -58,6 +58,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index bd4695f87..7588b8ed8 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -43,6 +43,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 8c7f315e9..fdda50b7b 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -40,6 +40,8 @@ body: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index 4a1344628..56ce74654 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -46,6 +46,8 @@ body: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 551db674e..ae2c45423 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -127,7 +127,7 @@ While these steps won't necessarily ensure that no misuse of the account takes p ### Is the website primarily used for piracy? -We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in deep fake. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). +We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). diff --git a/Changelog.md b/Changelog.md index 8d3ac089c..24bc8a2e2 100644 --- a/Changelog.md +++ b/Changelog.md @@ -50,8 +50,8 @@ * [extractor/txxx] Add extractors by [chio0hai](https://github.com/chio0hai) * [extractor/vocaroo] Add extractor by [SuperSonicHub1](https://github.com/SuperSonicHub1), [qbnu](https://github.com/qbnu) * [extractor/wrestleuniverse] Add extractors by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) -* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) -* **[extractor/youtube] Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) +* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [dirkf](https://github.com/dirkf) +* [extractor/youtube] **Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) * [extractor/youtube] Add hyperpipe instances by [Generator](https://github.com/Generator) * [extractor/youtube] Handle `consent.youtube` * [extractor/youtube] Support `/live/` URL diff --git a/README.md b/README.md index 9b91775bc..3d3db933a 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ Features marked with a **\*** have been back-ported to youtube-dl Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: +* yt-dlp supports only [Python 3.7+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index 1ee00f2b8..39b95c8da 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -24,6 +24,8 @@ VERBOSE_TMPL = ''' options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/supportedsites.md b/supportedsites.md index b545ec540..d7ac6dce5 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -28,14 +28,14 @@ - **abcnews:video** - **abcotvs**: ABC Owned Television Stations - **abcotvs:clips** - - **AbemaTV**: [abematv] + - **AbemaTV**: [*abematv*](## "netrc machine") - **AbemaTVTitle** - **AcademicEarth:Course** - **acast** - **acast:channel** - **AcFunBangumi** - **AcFunVideo** - - **ADN**: [animationdigitalnetwork] Animation Digital Network + - **ADN**: [*animationdigitalnetwork*](## "netrc machine") Animation Digital Network - **AdobeConnect** - **adobetv** - **adobetv:channel** @@ -47,8 +47,8 @@ - **aenetworks:collection** - **aenetworks:show** - **AeonCo** - - **afreecatv**: [afreecatv] afreecatv.com - - **afreecatv:live**: [afreecatv] afreecatv.com + - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com + - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com - **afreecatv:user** - **AirMozilla** - **AirTV** @@ -59,8 +59,8 @@ - **AlphaPorno** - **Alsace20TV** - **Alsace20TVEmbed** - - **Alura**: [alura] - - **AluraCourse**: [aluracourse] + - **Alura**: [*alura*](## "netrc machine") + - **AluraCourse**: [*aluracourse*](## "netrc machine") - **Amara** - **AmazonMiniTV** - **amazonminitv:season**: Amazon MiniTV Season, "minitv:season:" prefix @@ -100,7 +100,7 @@ - **ArteTVPlaylist** - **AsianCrush** - **AsianCrushPlaylist** - - **AtresPlayer**: [atresplayer] + - **AtresPlayer**: [*atresplayer*](## "netrc machine") - **AtScaleConfEvent** - **ATTTechChannel** - **ATVAt** @@ -128,15 +128,15 @@ - **Bandcamp:user** - **Bandcamp:weekly** - **BannedVideo** - - **bbc**: [bbc] BBC - - **bbc.co.uk**: [bbc] BBC iPlayer + - **bbc**: [*bbc*](## "netrc machine") BBC + - **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:​iplayer:episodes** - **bbc.co.uk:​iplayer:group** - **bbc.co.uk:playlist** - - **BBVTV**: [bbvtv] - - **BBVTVLive**: [bbvtv] - - **BBVTVRecordings**: [bbvtv] + - **BBVTV**: [*bbvtv*](## "netrc machine") + - **BBVTVLive**: [*bbvtv*](## "netrc machine") + - **BBVTVRecordings**: [*bbvtv*](## "netrc machine") - **BeatBumpPlaylist** - **BeatBumpVideo** - **Beatport** @@ -165,8 +165,8 @@ - **BilibiliSpaceAudio** - **BilibiliSpacePlaylist** - **BilibiliSpaceVideo** - - **BiliIntl**: [biliintl] - - **biliIntl:series**: [biliintl] + - **BiliIntl**: [*biliintl*](## "netrc machine") + - **biliIntl:series**: [*biliintl*](## "netrc machine") - **BiliLive** - **BioBioChileTV** - **Biography** @@ -232,7 +232,7 @@ - **cbssports:embed** - **CCMA** - **CCTV**: 央视网 - - **CDA**: [cdapl] + - **CDA**: [*cdapl*](## "netrc machine") - **Cellebrite** - **CeskaTelevize** - **CGTN** @@ -286,8 +286,8 @@ - **CrooksAndLiars** - **CrowdBunker** - **CrowdBunkerChannel** - - **crunchyroll**: [crunchyroll] - - **crunchyroll:playlist**: [crunchyroll] + - **crunchyroll**: [*crunchyroll*](## "netrc machine") + - **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine") - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 @@ -295,18 +295,18 @@ - **CTVNews** - **cu.ntv.co.jp**: Nippon Television Network - **CultureUnplugged** - - **curiositystream**: [curiositystream] - - **curiositystream:collections**: [curiositystream] - - **curiositystream:series**: [curiositystream] + - **curiositystream**: [*curiositystream*](## "netrc machine") + - **curiositystream:collections**: [*curiositystream*](## "netrc machine") + - **curiositystream:series**: [*curiositystream*](## "netrc machine") - **CWTV** - - **Cybrary**: [cybrary] - - **CybraryCourse**: [cybrary] + - **Cybrary**: [*cybrary*](## "netrc machine") + - **CybraryCourse**: [*cybrary*](## "netrc machine") - **Daftsex** - **DagelijkseKost**: dagelijksekost.een.be - **DailyMail** - - **dailymotion**: [dailymotion] - - **dailymotion:playlist**: [dailymotion] - - **dailymotion:user**: [dailymotion] + - **dailymotion**: [*dailymotion*](## "netrc machine") + - **dailymotion:playlist**: [*dailymotion*](## "netrc machine") + - **dailymotion:user**: [*dailymotion*](## "netrc machine") - **DailyWire** - **DailyWirePodcast** - **damtomo:record** @@ -328,7 +328,7 @@ - **DeuxMNews** - **DHM**: Filmarchiv - Deutsches Historisches Museum - **Digg** - - **DigitalConcertHall**: [digitalconcerthall] DigitalConcertHall extractor + - **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor - **DigitallySpeaking** - **Digiteka** - **Discovery** @@ -351,7 +351,7 @@ - **DRBonanza** - **Drooble** - **Dropbox** - - **Dropout**: [dropout] + - **Dropout**: [*dropout*](## "netrc machine") - **DropoutSeason** - **DrTuber** - **drtv** @@ -373,9 +373,9 @@ - **egghead:lesson**: egghead.io lesson - **ehftv** - **eHow** - - **EinsUndEinsTV**: [1und1tv] - - **EinsUndEinsTVLive**: [1und1tv] - - **EinsUndEinsTVRecordings**: [1und1tv] + - **EinsUndEinsTV**: [*1und1tv*](## "netrc machine") + - **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine") + - **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine") - **Einthusan** - **eitb.tv** - **EllenTube** @@ -390,7 +390,7 @@ - **EpiconSeries** - **Epoch** - **Eporner** - - **EroProfile**: [eroprofile] + - **EroProfile**: [*eroprofile*](## "netrc machine") - **EroProfile:album** - **ertflix**: ERTFLIX videos - **ertflix:codename**: ERTFLIX videos by codename @@ -405,20 +405,20 @@ - **EuropeanTour** - **Eurosport** - **EUScreen** - - **EWETV**: [ewetv] - - **EWETVLive**: [ewetv] - - **EWETVRecordings**: [ewetv] + - **EWETV**: [*ewetv*](## "netrc machine") + - **EWETVLive**: [*ewetv*](## "netrc machine") + - **EWETVRecordings**: [*ewetv*](## "netrc machine") - **ExpoTV** - **Expressen** - **ExtremeTube** - **EyedoTV** - - **facebook**: [facebook] + - **facebook**: [*facebook*](## "netrc machine") - **facebook:reel** - **FacebookPluginsVideo** - - **fancode:live**: [fancode] - - **fancode:vod**: [fancode] + - **fancode:live**: [*fancode*](## "netrc machine") + - **fancode:vod**: [*fancode*](## "netrc machine") - **faz.net** - - **fc2**: [fc2] + - **fc2**: [*fc2*](## "netrc machine") - **fc2:embed** - **fc2:live** - **Fczenit** @@ -452,20 +452,20 @@ - **freespeech.org** - **freetv:series** - **FreeTvMovies** - - **FrontendMasters**: [frontendmasters] - - **FrontendMastersCourse**: [frontendmasters] - - **FrontendMastersLesson**: [frontendmasters] + - **FrontendMasters**: [*frontendmasters*](## "netrc machine") + - **FrontendMastersCourse**: [*frontendmasters*](## "netrc machine") + - **FrontendMastersLesson**: [*frontendmasters*](## "netrc machine") - **FujiTVFODPlus7** - - **Funimation**: [funimation] - - **funimation:page**: [funimation] - - **funimation:show**: [funimation] + - **Funimation**: [*funimation*](## "netrc machine") + - **funimation:page**: [*funimation*](## "netrc machine") + - **funimation:show**: [*funimation*](## "netrc machine") - **Funk** - **Fusion** - **Fux** - **FuyinTV** - **Gab** - **GabTV** - - **Gaia**: [gaia] + - **Gaia**: [*gaia*](## "netrc machine") - **GameInformer** - **GameJolt** - **GameJoltCommunity** @@ -477,9 +477,9 @@ - **GameStar** - **Gaskrank** - **Gazeta** - - **GDCVault**: [gdcvault] + - **GDCVault**: [*gdcvault*](## "netrc machine") - **GediDigital** - - **gem.cbc.ca**: [cbcgem] + - **gem.cbc.ca**: [*cbcgem*](## "netrc machine") - **gem.cbc.ca:live** - **gem.cbc.ca:playlist** - **Genius** @@ -489,11 +489,11 @@ - **Gfycat** - **GiantBomb** - **Giga** - - **GlattvisionTV**: [glattvisiontv] - - **GlattvisionTVLive**: [glattvisiontv] - - **GlattvisionTVRecordings**: [glattvisiontv] + - **GlattvisionTV**: [*glattvisiontv*](## "netrc machine") + - **GlattvisionTVLive**: [*glattvisiontv*](## "netrc machine") + - **GlattvisionTVRecordings**: [*glattvisiontv*](## "netrc machine") - **Glide**: Glide mobile video messages (glide.me) - - **Globo**: [globo] + - **Globo**: [*globo*](## "netrc machine") - **GloboArticle** - **glomex**: Glomex videos - **glomex:embed**: Glomex embedded videos @@ -507,7 +507,7 @@ - **google:​podcasts:feed** - **GoogleDrive** - **GoogleDrive:Folder** - - **GoPlay**: [goplay] + - **GoPlay**: [*goplay*](## "netrc machine") - **GoPro** - **Goshgay** - **GoToStage** @@ -527,7 +527,7 @@ - **hgtv.com:show** - **HGTVDe** - **HGTVUsa** - - **HiDive**: [hidive] + - **HiDive**: [*hidive*](## "netrc machine") - **HistoricFilms** - **history:player** - **history:topic**: History.com Topic @@ -544,8 +544,8 @@ - **Howcast** - **HowStuffWorks** - **hrfernsehen** - - **HRTi**: [hrti] - - **HRTiPlaylist**: [hrti] + - **HRTi**: [*hrti*](## "netrc machine") + - **HRTiPlaylist**: [*hrti*](## "netrc machine") - **HSEProduct** - **HSEShow** - **html5** @@ -575,19 +575,19 @@ - **Inc** - **IndavideoEmbed** - **InfoQ** - - **Instagram**: [instagram] - - **instagram:story**: [instagram] - - **instagram:tag**: [instagram] Instagram hashtag search URLs - - **instagram:user**: [instagram] Instagram user profile + - **Instagram**: [*instagram*](## "netrc machine") + - **instagram:story**: [*instagram*](## "netrc machine") + - **instagram:tag**: [*instagram*](## "netrc machine") Instagram hashtag search URLs + - **instagram:user**: [*instagram*](## "netrc machine") Instagram user profile - **InstagramIOS**: IOS instagram:// URL - **Internazionale** - **InternetVideoArchive** - **InvestigationDiscovery** - - **IPrima**: [iprima] + - **IPrima**: [*iprima*](## "netrc machine") - **IPrimaCNN** - **iq.com**: International version of iQiyi - **iq.com:album** - - **iqiyi**: [iqiyi] 爱奇艺 + - **iqiyi**: [*iqiyi*](## "netrc machine") 爱奇艺 - **IslamChannel** - **IslamChannelSeries** - **IsraelNationalNews** @@ -660,9 +660,9 @@ - **LcpPlay** - **Le**: 乐视网 - **Lecture2Go** - - **Lecturio**: [lecturio] - - **LecturioCourse**: [lecturio] - - **LecturioDeCourse**: [lecturio] + - **Lecturio**: [*lecturio*](## "netrc machine") + - **LecturioCourse**: [*lecturio*](## "netrc machine") + - **LecturioDeCourse**: [*lecturio*](## "netrc machine") - **LEGO** - **Lemonde** - **Lenta** @@ -678,10 +678,10 @@ - **limelight:channel_list** - **LineLive** - **LineLiveChannel** - - **LinkedIn**: [linkedin] - - **linkedin:learning**: [linkedin] - - **linkedin:​learning:course**: [linkedin] - - **LinuxAcademy**: [linuxacademy] + - **LinkedIn**: [*linkedin*](## "netrc machine") + - **linkedin:learning**: [*linkedin*](## "netrc machine") + - **linkedin:​learning:course**: [*linkedin*](## "netrc machine") + - **LinuxAcademy**: [*linuxacademy*](## "netrc machine") - **Liputan6** - **ListenNotes** - **LiTV** @@ -696,8 +696,8 @@ - **LoveHomePorn** - **LRTStream** - **LRTVOD** - - **lynda**: [lynda] lynda.com videos - - **lynda:course**: [lynda] lynda.com online courses + - **lynda**: [*lynda*](## "netrc machine") lynda.com videos + - **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses - **m6** - **MagentaMusik360** - **mailru**: Видео@Mail.Ru @@ -767,13 +767,13 @@ - **mixcloud:user** - **MLB** - **MLBArticle** - - **MLBTV**: [mlb] + - **MLBTV**: [*mlb*](## "netrc machine") - **MLBVideo** - **MLSSoccer** - **Mnet** - - **MNetTV**: [mnettv] - - **MNetTVLive**: [mnettv] - - **MNetTVRecordings**: [mnettv] + - **MNetTV**: [*mnettv*](## "netrc machine") + - **MNetTVLive**: [*mnettv*](## "netrc machine") + - **MNetTVRecordings**: [*mnettv*](## "netrc machine") - **MochaVideo** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** @@ -852,9 +852,9 @@ - **ndr:embed** - **ndr:​embed:base** - **NDTV** - - **Nebula**: [watchnebula] - - **nebula:channel**: [watchnebula] - - **nebula:subscriptions**: [watchnebula] + - **Nebula**: [*watchnebula*](## "netrc machine") + - **nebula:channel**: [*watchnebula*](## "netrc machine") + - **nebula:subscriptions**: [*watchnebula*](## "netrc machine") - **NerdCubedFeed** - **netease:album**: 网易云音乐 - 专辑 - **netease:djradio**: 网易云音乐 - 电台 @@ -863,9 +863,9 @@ - **netease:program**: 网易云音乐 - 电台节目 - **netease:singer**: 网易云音乐 - 歌手 - **netease:song**: 网易云音乐 - - **NetPlusTV**: [netplus] - - **NetPlusTVLive**: [netplus] - - **NetPlusTVRecordings**: [netplus] + - **NetPlusTV**: [*netplus*](## "netrc machine") + - **NetPlusTVLive**: [*netplus*](## "netrc machine") + - **NetPlusTVRecordings**: [*netplus*](## "netrc machine") - **Netverse** - **NetversePlaylist** - **NetverseSearch**: "netsearch:" prefix @@ -898,7 +898,7 @@ - **nickelodeon:br** - **nickelodeonru** - **nicknight** - - **niconico**: [niconico] ニコニコ動画 + - **niconico**: [*niconico*](## "netrc machine") ニコニコ動画 - **niconico:history**: NicoNico user history or likes. Requires cookies. - **niconico:playlist** - **niconico:series** @@ -911,7 +911,7 @@ - **Nitter** - **njoy**: N-JOY - **njoy:embed** - - **NJPWWorld**: [njpwworld] 新日本プロレスワールド + - **NJPWWorld**: [*njpwworld*](## "netrc machine") 新日本プロレスワールド - **NobelPrize** - **NoicePodcast** - **NonkTube** @@ -980,11 +980,11 @@ - **orf:iptv**: iptv.ORF.at - **orf:radio** - **orf:tvthek**: ORF TVthek - - **OsnatelTV**: [osnateltv] - - **OsnatelTVLive**: [osnateltv] - - **OsnatelTVRecordings**: [osnateltv] + - **OsnatelTV**: [*osnateltv*](## "netrc machine") + - **OsnatelTVLive**: [*osnateltv*](## "netrc machine") + - **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine") - **OutsideTV** - - **PacktPub**: [packtpub] + - **PacktPub**: [*packtpub*](## "netrc machine") - **PacktPubCourse** - **PalcoMP3:artist** - **PalcoMP3:song** @@ -1007,7 +1007,7 @@ - **peer.tv** - **PeerTube** - **PeerTube:Playlist** - - **peloton**: [peloton] + - **peloton**: [*peloton*](## "netrc machine") - **peloton:live**: Peloton Live - **People** - **PerformGroup** @@ -1016,7 +1016,7 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** - - **Piapro**: [piapro] + - **Piapro**: [*piapro*](## "netrc machine") - **Picarto** - **PicartoVod** - **Piksel** @@ -1027,11 +1027,11 @@ - **pixiv:​sketch:user** - **Pladform** - **PlanetMarathi** - - **Platzi**: [platzi] - - **PlatziCourse**: [platzi] + - **Platzi**: [*platzi*](## "netrc machine") + - **PlatziCourse**: [*platzi*](## "netrc machine") - **play.fm** - **player.sky.it** - - **PlayPlusTV**: [playplustv] + - **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlayStuff** - **PlaysTV** - **PlaySuisse** @@ -1039,7 +1039,7 @@ - **Playvid** - **PlayVids** - **Playwire** - - **pluralsight**: [pluralsight] + - **pluralsight**: [*pluralsight*](## "netrc machine") - **pluralsight:course** - **PlutoTV** - **PodbayFM** @@ -1048,8 +1048,8 @@ - **podomatic** - **Pokemon** - **PokemonWatch** - - **PokerGo**: [pokergo] - - **PokerGoCollection**: [pokergo] + - **PokerGo**: [*pokergo*](## "netrc machine") + - **PokerGoCollection**: [*pokergo*](## "netrc machine") - **PolsatGo** - **PolskieRadio** - **polskieradio:audition** @@ -1066,11 +1066,11 @@ - **Pornez** - **PornFlip** - **PornHd** - - **PornHub**: [pornhub] PornHub and Thumbzilla - - **PornHubPagedVideoList**: [pornhub] - - **PornHubPlaylist**: [pornhub] - - **PornHubUser**: [pornhub] - - **PornHubUserVideosUpload**: [pornhub] + - **PornHub**: [*pornhub*](## "netrc machine") PornHub and Thumbzilla + - **PornHubPagedVideoList**: [*pornhub*](## "netrc machine") + - **PornHubPlaylist**: [*pornhub*](## "netrc machine") + - **PornHubUser**: [*pornhub*](## "netrc machine") + - **PornHubUserVideosUpload**: [*pornhub*](## "netrc machine") - **Pornotube** - **PornoVoisines** - **PornoXO** @@ -1098,9 +1098,9 @@ - **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 - - **QuantumTV**: [quantumtv] - - **QuantumTVLive**: [quantumtv] - - **QuantumTVRecordings**: [quantumtv] + - **QuantumTV**: [*quantumtv*](## "netrc machine") + - **QuantumTVLive**: [*quantumtv*](## "netrc machine") + - **QuantumTVRecordings**: [*quantumtv*](## "netrc machine") - **Qub** - **R7** - **R7Article** @@ -1157,16 +1157,16 @@ - **RICE** - **RMCDecouverte** - **RockstarGames** - - **Rokfin**: [rokfin] + - **Rokfin**: [*rokfin*](## "netrc machine") - **rokfin:channel**: Rokfin Channels - **rokfin:search**: Rokfin Search; "rkfnsearch:" prefix - **rokfin:stack**: Rokfin Stacks - - **RoosterTeeth**: [roosterteeth] - - **RoosterTeethSeries**: [roosterteeth] + - **RoosterTeeth**: [*roosterteeth*](## "netrc machine") + - **RoosterTeethSeries**: [*roosterteeth*](## "netrc machine") - **RottenTomatoes** - **Rozhlas** - **RozhlasVltava** - - **RTBF**: [rtbf] + - **RTBF**: [*rtbf*](## "netrc machine") - **RTDocumentry** - **RTDocumentryPlaylist** - **rte**: Raidió Teilifís Éireann TV @@ -1208,16 +1208,16 @@ - **Ruutu** - **Ruv** - **ruv.is:spila** - - **safari**: [safari] safaribooksonline.com online video - - **safari:api**: [safari] - - **safari:course**: [safari] safaribooksonline.com online courses + - **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video + - **safari:api**: [*safari*](## "netrc machine") + - **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses - **Saitosan** - - **SAKTV**: [saktv] - - **SAKTVLive**: [saktv] - - **SAKTVRecordings**: [saktv] - - **SaltTV**: [salttv] - - **SaltTVLive**: [salttv] - - **SaltTVRecordings**: [salttv] + - **SAKTV**: [*saktv*](## "netrc machine") + - **SAKTVLive**: [*saktv*](## "netrc machine") + - **SAKTVRecordings**: [*saktv*](## "netrc machine") + - **SaltTV**: [*salttv*](## "netrc machine") + - **SaltTVLive**: [*salttv*](## "netrc machine") + - **SaltTVRecordings**: [*salttv*](## "netrc machine") - **SampleFocus** - **Sangiin**: 参議院インターネット審議中継 (archive) - **Sapo**: SAPO Vídeos @@ -1233,8 +1233,8 @@ - **ScrippsNetworks** - **scrippsnetworks:watch** - **Scrolller** - - **SCTE**: [scte] - - **SCTECourse**: [scte] + - **SCTE**: [*scte*](## "netrc machine") + - **SCTECourse**: [*scte*](## "netrc machine") - **Seeker** - **SenateGov** - **SenateISVP** @@ -1243,7 +1243,7 @@ - **Sexu** - **SeznamZpravy** - **SeznamZpravyArticle** - - **Shahid**: [shahid] + - **Shahid**: [*shahid*](## "netrc machine") - **ShahidShow** - **Shared**: shared.sx - **ShareVideosEmbed** @@ -1273,16 +1273,16 @@ - **Smotrim** - **Snotr** - **Sohu** - - **SonyLIV**: [sonyliv] + - **SonyLIV**: [*sonyliv*](## "netrc machine") - **SonyLIVSeries** - - **soundcloud**: [soundcloud] - - **soundcloud:playlist**: [soundcloud] - - **soundcloud:related**: [soundcloud] - - **soundcloud:search**: [soundcloud] Soundcloud search; "scsearch:" prefix - - **soundcloud:set**: [soundcloud] - - **soundcloud:trackstation**: [soundcloud] - - **soundcloud:user**: [soundcloud] - - **soundcloud:​user:permalink**: [soundcloud] + - **soundcloud**: [*soundcloud*](## "netrc machine") + - **soundcloud:playlist**: [*soundcloud*](## "netrc machine") + - **soundcloud:related**: [*soundcloud*](## "netrc machine") + - **soundcloud:search**: [*soundcloud*](## "netrc machine") Soundcloud search; "scsearch:" prefix + - **soundcloud:set**: [*soundcloud*](## "netrc machine") + - **soundcloud:trackstation**: [*soundcloud*](## "netrc machine") + - **soundcloud:user**: [*soundcloud*](## "netrc machine") + - **soundcloud:​user:permalink**: [*soundcloud*](## "netrc machine") - **SoundcloudEmbed** - **soundgasm** - **soundgasm:profile** @@ -1349,13 +1349,13 @@ - **Tass** - **TBS** - **TDSLifeway** - - **Teachable**: [teachable] - - **TeachableCourse**: [teachable] + - **Teachable**: [*teachable*](## "netrc machine") + - **TeachableCourse**: [*teachable*](## "netrc machine") - **teachertube**: teachertube.com videos - **teachertube:​user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - - **TeamTreeHouse**: [teamtreehouse] + - **TeamTreeHouse**: [*teamtreehouse*](## "netrc machine") - **TechTalks** - **techtv.mit.edu** - **TedEmbed** @@ -1378,8 +1378,8 @@ - **TeleTask** - **Telewebion** - **Tempo** - - **TennisTV**: [tennistv] - - **TenPlay**: [10play] + - **TennisTV**: [*tennistv*](## "netrc machine") + - **TenPlay**: [*10play*](## "netrc machine") - **TF1** - **TFO** - **TheHoleTv** @@ -1417,13 +1417,13 @@ - **tokfm:audition** - **tokfm:podcast** - **ToonGoggles** - - **tou.tv**: [toutv] + - **tou.tv**: [*toutv*](## "netrc machine") - **Toypics**: Toypics video - **ToypicsUser**: Toypics user profile - **TrailerAddict**: (**Currently broken**) - **TravelChannel** - - **Triller**: [triller] - - **TrillerUser**: [triller] + - **Triller**: [*triller*](## "netrc machine") + - **TrillerUser**: [*triller*](## "netrc machine") - **Trilulilu** - **Trovo** - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix @@ -1435,11 +1435,11 @@ - **Truth** - **TruTV** - **Tube8** - - **TubeTuGraz**: [tubetugraz] tube.tugraz.at - - **TubeTuGrazSeries**: [tubetugraz] - - **TubiTv**: [tubitv] + - **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at + - **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine") + - **TubiTv**: [*tubitv*](## "netrc machine") - **TubiTvShow** - - **Tumblr**: [tumblr] + - **Tumblr**: [*tumblr*](## "netrc machine") - **tunein:clip** - **tunein:program** - **tunein:station** @@ -1489,13 +1489,13 @@ - **TwitCasting** - **TwitCastingLive** - **TwitCastingUser** - - **twitch:clips**: [twitch] - - **twitch:stream**: [twitch] - - **twitch:vod**: [twitch] - - **TwitchCollection**: [twitch] - - **TwitchVideos**: [twitch] - - **TwitchVideosClips**: [twitch] - - **TwitchVideosCollections**: [twitch] + - **twitch:clips**: [*twitch*](## "netrc machine") + - **twitch:stream**: [*twitch*](## "netrc machine") + - **twitch:vod**: [*twitch*](## "netrc machine") + - **TwitchCollection**: [*twitch*](## "netrc machine") + - **TwitchVideos**: [*twitch*](## "netrc machine") + - **TwitchVideosClips**: [*twitch*](## "netrc machine") + - **TwitchVideosCollections**: [*twitch*](## "netrc machine") - **twitter** - **twitter:amplify** - **twitter:broadcast** @@ -1503,11 +1503,11 @@ - **twitter:shortener** - **twitter:spaces** - **Txxx** - - **udemy**: [udemy] - - **udemy:course**: [udemy] + - **udemy**: [*udemy*](## "netrc machine") + - **udemy:course**: [*udemy*](## "netrc machine") - **UDNEmbed**: 聯合影音 - - **UFCArabia**: [ufcarabia] - - **UFCTV**: [ufctv] + - **UFCArabia**: [*ufcarabia*](## "netrc machine") + - **UFCTV**: [*ufctv*](## "netrc machine") - **ukcolumn** - **UKTVPlay** - **umg:de**: Universal Music Deutschland @@ -1537,7 +1537,7 @@ - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - - **vhx:embed**: [vimeo] + - **vhx:embed**: [*vimeo*](## "netrc machine") - **Viafree** - **vice** - **vice:article** @@ -1560,25 +1560,25 @@ - **videomore:season** - **videomore:video** - **VideoPress** - - **Vidio**: [vidio] - - **VidioLive**: [vidio] - - **VidioPremier**: [vidio] + - **Vidio**: [*vidio*](## "netrc machine") + - **VidioLive**: [*vidio*](## "netrc machine") + - **VidioPremier**: [*vidio*](## "netrc machine") - **VidLii** - **viewlift** - **viewlift:embed** - **Viidea** - - **viki**: [viki] - - **viki:channel**: [viki] - - **vimeo**: [vimeo] - - **vimeo:album**: [vimeo] - - **vimeo:channel**: [vimeo] - - **vimeo:group**: [vimeo] - - **vimeo:likes**: [vimeo] Vimeo user likes - - **vimeo:ondemand**: [vimeo] - - **vimeo:pro**: [vimeo] - - **vimeo:review**: [vimeo] Review pages on vimeo - - **vimeo:user**: [vimeo] - - **vimeo:watchlater**: [vimeo] Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) + - **viki**: [*viki*](## "netrc machine") + - **viki:channel**: [*viki*](## "netrc machine") + - **vimeo**: [*vimeo*](## "netrc machine") + - **vimeo:album**: [*vimeo*](## "netrc machine") + - **vimeo:channel**: [*vimeo*](## "netrc machine") + - **vimeo:group**: [*vimeo*](## "netrc machine") + - **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes + - **vimeo:ondemand**: [*vimeo*](## "netrc machine") + - **vimeo:pro**: [*vimeo*](## "netrc machine") + - **vimeo:review**: [*vimeo*](## "netrc machine") Review pages on vimeo + - **vimeo:user**: [*vimeo*](## "netrc machine") + - **vimeo:watchlater**: [*vimeo*](## "netrc machine") Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) - **Vimm:recording** - **Vimm:stream** - **ViMP** @@ -1588,13 +1588,13 @@ - **vine:user** - **Viqeo** - **Viu** - - **viu:ott**: [viu] + - **viu:ott**: [*viu*](## "netrc machine") - **viu:playlist** - **ViuOTTIndonesia** - **Vivo**: vivo.sx - - **vk**: [vk] VK - - **vk:uservideos**: [vk] VK - User's Videos - - **vk:wallpost**: [vk] + - **vk**: [*vk*](## "netrc machine") VK + - **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos + - **vk:wallpost**: [*vk*](## "netrc machine") - **vm.tiktok** - **Vocaroo** - **Vodlocker** @@ -1613,14 +1613,14 @@ - **vqq:video** - **Vrak** - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza - - **VrtNU**: [vrtnu] VrtNU.be - - **vrv**: [vrv] + - **VrtNU**: [*vrtnu*](## "netrc machine") VrtNU.be + - **vrv**: [*vrv*](## "netrc machine") - **vrv:series** - **VShare** - **VTM** - - **VTXTV**: [vtxtv] - - **VTXTVLive**: [vtxtv] - - **VTXTVRecordings**: [vtxtv] + - **VTXTV**: [*vtxtv*](## "netrc machine") + - **VTXTVLive**: [*vtxtv*](## "netrc machine") + - **VTXTVRecordings**: [*vtxtv*](## "netrc machine") - **VuClip** - **Vupload** - **VVVVID** @@ -1629,9 +1629,9 @@ - **Vzaar** - **Wakanim** - **Walla** - - **WalyTV**: [walytv] - - **WalyTVLive**: [walytv] - - **WalyTVRecordings**: [walytv] + - **WalyTV**: [*walytv*](## "netrc machine") + - **WalyTVLive**: [*walytv*](## "netrc machine") + - **WalyTVRecordings**: [*walytv*](## "netrc machine") - **wasdtv:clip** - **wasdtv:record** - **wasdtv:stream** @@ -1743,13 +1743,13 @@ - **YoutubeLivestreamEmbed**: YouTube livestream embeds - **YoutubeYtBe**: youtu.be - **Zapiks** - - **Zattoo**: [zattoo] - - **ZattooLive**: [zattoo] - - **ZattooMovies**: [zattoo] - - **ZattooRecordings**: [zattoo] + - **Zattoo**: [*zattoo*](## "netrc machine") + - **ZattooLive**: [*zattoo*](## "netrc machine") + - **ZattooMovies**: [*zattoo*](## "netrc machine") + - **ZattooRecordings**: [*zattoo*](## "netrc machine") - **ZDF** - **ZDFChannel** - - **Zee5**: [zee5] + - **Zee5**: [*zee5*](## "netrc machine") - **zee5:series** - **ZeeNews** - **ZenYandex** diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d6c5ce769..00846cd7e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -614,7 +614,7 @@ class YoutubeDL: '\n You will no longer receive updates on this version') if current_version < MIN_SUPPORTED: msg = 'Python version %d.%d is no longer supported' - self.deprecation_warning( + self.deprecated_feature( f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED)) if self.params.get('allow_unplayable_formats'): diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py index a50bce4d4..74ab6575c 100644 --- a/yt_dlp/dependencies/Cryptodome.py +++ b/yt_dlp/dependencies/Cryptodome.py @@ -14,22 +14,14 @@ AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None try: if _parent.__name__ == 'Cryptodome': from Cryptodome import __version__ - from Cryptodome.Cipher import AES - from Cryptodome.Cipher import PKCS1_v1_5 - from Cryptodome.Cipher import Blowfish - from Cryptodome.Cipher import PKCS1_OAEP - from Cryptodome.Hash import SHA1 - from Cryptodome.Hash import CMAC + from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 + from Cryptodome.Hash import CMAC, SHA1 from Cryptodome.PublicKey import RSA elif _parent.__name__ == 'Crypto': from Crypto import __version__ - from Crypto.Cipher import AES - from Crypto.Cipher import PKCS1_v1_5 - from Crypto.Cipher import Blowfish - from Crypto.Cipher import PKCS1_OAEP - from Crypto.Hash import SHA1 - from Crypto.Hash import CMAC - from Crypto.PublicKey import RSA + from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 + from Crypto.Hash import CMAC, SHA1 # noqa: F401 + from Crypto.PublicKey import RSA # noqa: F401 except ImportError: __version__ = f'broken {__version__}'.strip() diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 377f138b7..3dc638f52 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -497,7 +497,7 @@ class FragmentFD(FileDownloader): download_fragment(fragment, ctx_copy) return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') - self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') + self.report_warning('The download speed shown is only of one thread. This is a known issue') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: try: for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 98efe0e9d..8ad63b411 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3527,7 +3527,7 @@ class InfoExtractor: desc = '' if cls._NETRC_MACHINE: if markdown: - desc += f' [{cls._NETRC_MACHINE}]' + desc += f' [*{cls._NETRC_MACHINE}*](## "netrc machine")' else: desc += f' [{cls._NETRC_MACHINE}]' if cls.IE_DESC is False: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d1696349a..44e932293 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -956,7 +956,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): @staticmethod def is_music_url(url): - return re.match(r'https?://music\.youtube\.com/', url) is not None + return re.match(r'(https?://)?music\.youtube\.com/', url) is not None def _extract_video(self, renderer): video_id = renderer.get('videoId') @@ -6211,6 +6211,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): original_tab_id, display_id = tab[1:], f'{item_id}{tab}' if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts: url = f'{pre}/videos{post}' + if smuggled_data.get('is_music_url'): + self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}') # Handle both video/playlist URLs qs = parse_qs(url) From 7f51861b1820c37b157a239b1fe30628d907c034 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Wed, 1 Mar 2023 07:56:53 +0000 Subject: [PATCH 2018/2552] [extractor/youtube] Detect and break on looping comments (#6301) Fixes https://github.com/yt-dlp/yt-dlp/issues/6290 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 44e932293..b02e0153a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3341,6 +3341,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): comment = self._extract_comment(comment_renderer, parent) if not comment: continue + # Sometimes YouTube may break and give us infinite looping comments. + # See: https://github.com/yt-dlp/yt-dlp/issues/6290 + if comment['id'] in tracker['seen_comment_ids']: + self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.') + yield + else: + tracker['seen_comment_ids'].add(comment['id']) tracker['running_total'] += 1 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1 @@ -3365,7 +3372,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): est_total=0, current_page_thread=0, total_parent_comments=0, - total_reply_comments=0) + total_reply_comments=0, + seen_comment_ids=set()) # TODO: Deprecated # YouTube comments have a max depth of 2 From b38cae49e6f4849c8ee2a774bdc3c1c647ae5f0e Mon Sep 17 00:00:00 2001 From: bashonly Date: Wed, 1 Mar 2023 06:38:02 -0600 Subject: [PATCH 2019/2552] [extractor/generic] Detect manifest links via extension Authored by: bashonly --- yt_dlp/extractor/generic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d76ef3e31..49aa5a1f5 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2393,14 +2393,15 @@ class GenericIE(InfoExtractor): self.report_detected('direct video link') headers = smuggled_data.get('http_headers', {}) format_id = str(m.group('format_id')) + ext = determine_ext(url) subtitles = {} - if format_id.endswith('mpegurl'): + if format_id.endswith('mpegurl') or ext == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) info_dict.update(self._fragment_query(url)) - elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): + elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd': formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers) info_dict.update(self._fragment_query(url)) - elif format_id == 'f4m': + elif format_id == 'f4m' or ext == 'f4m': formats = self._extract_f4m_formats(url, video_id, headers=headers) else: formats = [{ From 9fddc12ab022a31754e0eaa358fc4e1dfa974587 Mon Sep 17 00:00:00 2001 From: std-move <26625259+std-move@users.noreply.github.com> Date: Thu, 2 Mar 2023 19:33:33 +0100 Subject: [PATCH 2020/2552] [extractor/iprima] Fix extractor (#6291) Authored by: std-move Closes #6187 --- yt_dlp/extractor/iprima.py | 41 +++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index 181820542..e58e9c2ee 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -7,7 +7,8 @@ from ..utils import ( js_to_json, urlencode_postdata, ExtractorError, - parse_qs + parse_qs, + traverse_obj ) @@ -15,8 +16,7 @@ class IPrimaIE(InfoExtractor): _VALID_URL = r'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P[^/?#&]+)' _GEO_BYPASS = False _NETRC_MACHINE = 'iprima' - _LOGIN_URL = 'https://auth.iprima.cz/oauth2/login' - _TOKEN_URL = 'https://auth.iprima.cz/oauth2/token' + _AUTH_ROOT = 'https://auth.iprima.cz' access_token = None _TESTS = [{ @@ -67,7 +67,7 @@ class IPrimaIE(InfoExtractor): return login_page = self._download_webpage( - self._LOGIN_URL, None, note='Downloading login page', + f'{self._AUTH_ROOT}/oauth2/login', None, note='Downloading login page', errnote='Downloading login page failed') login_form = self._hidden_inputs(login_page) @@ -76,11 +76,20 @@ class IPrimaIE(InfoExtractor): '_email': username, '_password': password}) - _, login_handle = self._download_webpage_handle( - self._LOGIN_URL, None, data=urlencode_postdata(login_form), + profile_select_html, login_handle = self._download_webpage_handle( + f'{self._AUTH_ROOT}/oauth2/login', None, data=urlencode_postdata(login_form), note='Logging in') - code = parse_qs(login_handle.geturl()).get('code')[0] + # a profile may need to be selected first, even when there is only a single one + if '/profile-select' in login_handle.geturl(): + profile_id = self._search_regex( + r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id') + + login_handle = self._request_webpage( + f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None, + query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile') + + code = traverse_obj(login_handle.geturl(), ({parse_qs}, 'code', 0)) if not code: raise ExtractorError('Login failed', expected=True) @@ -89,10 +98,10 @@ class IPrimaIE(InfoExtractor): 'client_id': 'prima_sso', 'grant_type': 'authorization_code', 'code': code, - 'redirect_uri': 'https://auth.iprima.cz/sso/auth-check'} + 'redirect_uri': f'{self._AUTH_ROOT}/sso/auth-check'} token_data = self._download_json( - self._TOKEN_URL, None, + f'{self._AUTH_ROOT}/oauth2/token', None, note='Downloading token', errnote='Downloading token failed', data=urlencode_postdata(token_request_data)) @@ -115,14 +124,22 @@ class IPrimaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._html_search_meta( + title = self._html_extract_title(webpage) or self._html_search_meta( ['og:title', 'twitter:title'], webpage, 'title', default=None) video_id = self._search_regex(( r'productId\s*=\s*([\'"])(?Pp\d+)\1', - r'pproduct_id\s*=\s*([\'"])(?Pp\d+)\1'), - webpage, 'real id', group='id') + r'pproduct_id\s*=\s*([\'"])(?Pp\d+)\1', + ), webpage, 'real id', group='id', default=None) + + if not video_id: + nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data') + video_id = traverse_obj( + nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False) + + if not video_id: + self.raise_no_formats('Unable to extract video ID from webpage') metadata = self._download_json( f'https://api.play-backend.iprima.cz/api/v1//products/id-{video_id}/play', From 77d6d136468d0c23c8e79bc937898747804f585a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 3 Mar 2023 03:34:56 -0600 Subject: [PATCH 2021/2552] [extractor/ntvru] Extract HLS and DASH formats (#6403) Closes #5915 Authored by: bashonly --- yt_dlp/extractor/ntvru.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/yt_dlp/extractor/ntvru.py b/yt_dlp/extractor/ntvru.py index 8d5877daa..91b7724eb 100644 --- a/yt_dlp/extractor/ntvru.py +++ b/yt_dlp/extractor/ntvru.py @@ -21,6 +21,7 @@ class NTVRuIE(InfoExtractor): 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', 'thumbnail': r're:^http://.*\.jpg', 'duration': 136, + 'view_count': int, }, }, { 'url': 'http://www.ntv.ru/video/novosti/750370/', @@ -32,6 +33,7 @@ class NTVRuIE(InfoExtractor): 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', 'thumbnail': r're:^http://.*\.jpg', 'duration': 172, + 'view_count': int, }, }, { 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', @@ -43,6 +45,7 @@ class NTVRuIE(InfoExtractor): 'description': '«Сегодня». 21 марта 2014 года. 16:00', 'thumbnail': r're:^http://.*\.jpg', 'duration': 1496, + 'view_count': int, }, }, { 'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/', @@ -54,6 +57,7 @@ class NTVRuIE(InfoExtractor): 'description': 'Остросюжетный фильм «Кома»', 'thumbnail': r're:^http://.*\.jpg', 'duration': 5592, + 'view_count': int, }, }, { 'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', @@ -65,6 +69,7 @@ class NTVRuIE(InfoExtractor): 'description': '«Дело врачей»: «Деревце жизни»', 'thumbnail': r're:^http://.*\.jpg', 'duration': 2590, + 'view_count': int, }, }, { # Schemeless file URL @@ -115,6 +120,14 @@ class NTVRuIE(InfoExtractor): 'url': file_, 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)), }) + hls_manifest = xpath_text(video, './playback/hls') + if hls_manifest: + formats.extend(self._extract_m3u8_formats( + hls_manifest, video_id, m3u8_id='hls', fatal=False)) + dash_manifest = xpath_text(video, './playback/dash') + if dash_manifest: + formats.extend(self._extract_mpd_formats( + dash_manifest, video_id, mpd_id='dash', fatal=False)) return { 'id': xpath_text(video, './id'), From 2d5a8c5db2bd4ff1c2e45e00cd890a10f8ffca9e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 3 Mar 2023 03:37:23 -0600 Subject: [PATCH 2022/2552] [extractor/mediastream] Improve WinSports support (#6401) Closes #6360 Authored by: bashonly --- yt_dlp/extractor/mediastream.py | 41 +++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index 4d3949527..e8d427a31 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -1,7 +1,13 @@ import re from .common import InfoExtractor -from ..utils import clean_html, get_element_html_by_class +from ..utils import ( + remove_end, + str_or_none, + strip_or_none, + traverse_obj, + urljoin, +) class MediaStreamIE(InfoExtractor): @@ -117,39 +123,56 @@ class MediaStreamIE(InfoExtractor): class WinSportsVideoIE(InfoExtractor): - _VALID_URL = r'https?://www\.winsports\.co/videos/(?P[\w-]+)-(?P\d+)' + _VALID_URL = r'https?://www\.winsports\.co/videos/(?P[\w-]+)' _TESTS = [{ 'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536', 'info_dict': { 'id': '62dc8357162c4b0821fcfb3c', - 'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco', + 'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536', 'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco', 'description': 'md5:eb811b2b2882bdc59431732c06b905f2', 'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c', 'ext': 'mp4', }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548', 'info_dict': { 'id': '62dcb875ef12a5526790b552', - 'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional', + 'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548', 'title': 'Observa aquí los goles del empate entre Tolima y Nacional', 'description': 'md5:b19402ba6e46558b93fd24b873eea9c9', 'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552', 'ext': 'mp4', }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.winsports.co/videos/equidad-vuelve-defender-su-arco-de-remates-de-junior', + 'info_dict': { + 'id': '63fa7eca72f1741ad3a4d515', + 'display_id': 'equidad-vuelve-defender-su-arco-de-remates-de-junior', + 'title': '⚽ Equidad vuelve a defender su arco de remates de Junior', + 'description': 'Remate de Sierra', + 'thumbnail': r're:^https?://[^?#]+63fa7eca72f1741ad3a4d515', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): - display_id, video_id = self._match_valid_url(url).group('display_id', 'id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - + json_ld = self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}) media_setting_json = self._search_json( r']+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id) - mediastream_id = media_setting_json['settings']['mediastream_formatter'][video_id]['mediastream_id'] + mediastream_id = traverse_obj( + media_setting_json, ('settings', 'mediastream_formatter', ..., 'mediastream_id', {str_or_none}), + get_all=False) or json_ld.get('url') + if not mediastream_id: + self.raise_no_formats('No MediaStream embed found in webpage') return self.url_result( - f'https://mdstrm.com/embed/{mediastream_id}', MediaStreamIE, video_id, url_transparent=True, - display_id=display_id, video_title=clean_html(get_element_html_by_class('title-news', webpage))) + urljoin('https://mdstrm.com/embed/', mediastream_id), MediaStreamIE, display_id, url_transparent=True, + display_id=display_id, video_title=strip_or_none(remove_end(json_ld.get('title'), '| Win Sports'))) From 40d77d89027cd0e0ce31d22aec81db3e1d433900 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 3 Mar 2023 03:42:54 -0600 Subject: [PATCH 2023/2552] [extractor/yle_areena] Extract non-Kaltura videos (#6402) Closes #6066 Authored by: bashonly --- yt_dlp/extractor/yle_areena.py | 37 ++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/yle_areena.py b/yt_dlp/extractor/yle_areena.py index 98d3b1949..c5b45f0cb 100644 --- a/yt_dlp/extractor/yle_areena.py +++ b/yt_dlp/extractor/yle_areena.py @@ -61,7 +61,22 @@ class YleAreenaIE(InfoExtractor): 'age_limit': 0, 'webpage_url': 'https://areena.yle.fi/1-2158940' } - } + }, + { + 'url': 'https://areena.yle.fi/1-64829589', + 'info_dict': { + 'id': '1-64829589', + 'ext': 'mp4', + 'title': 'HKO & Mälkki & Tanner', + 'description': 'md5:b4f1b1af2c6569b33f75179a86eea156', + 'series': 'Helsingin kaupunginorkesterin konsertteja', + 'thumbnail': r're:^https?://.+\.jpg$', + 'release_date': '20230120', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, ] def _real_extract(self, url): @@ -91,12 +106,22 @@ class YleAreenaIE(InfoExtractor): 'name': sub.get('kind'), }) + kaltura_id = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id'), expected_type=str) + if kaltura_id: + info_dict = { + '_type': 'url_transparent', + 'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}), + 'ie_key': KalturaIE.ie_key(), + } + else: + info_dict = { + 'id': video_id, + 'formats': self._extract_m3u8_formats( + video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls'), + } + return { - '_type': 'url_transparent', - 'url': smuggle_url( - f'kaltura:1955031:{video_data["data"]["ongoing_ondemand"]["kaltura"]["id"]}', - {'source_url': url}), - 'ie_key': KalturaIE.ie_key(), + **info_dict, 'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str) or episode or info.get('title')), 'description': description, From 9acf1ee25f7ad3920ede574a9de95b8c18626af4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 3 Mar 2023 16:48:54 +0530 Subject: [PATCH 2024/2552] [jsinterp] Handle `Date` at epoch 0 Closes #6400 --- test/test_youtube_signature.py | 4 ++++ yt_dlp/jsinterp.py | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 3203538bb..336e80291 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -66,6 +66,10 @@ _SIG_TESTS = [ ] _NSIG_TESTS = [ + ( + 'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js', + 'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg', + ), ( 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index c2d056aa1..31ab204d7 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -355,11 +355,11 @@ class JSInterpreter: obj = expr[4:] if obj.startswith('Date('): left, right = self._separate_at_paren(obj[4:]) - expr = unified_timestamp( + date = unified_timestamp( self.interpret_expression(left, local_vars, allow_recursion), False) - if not expr: + if date is None: raise self.Exception(f'Failed to parse date {left!r}', expr) - expr = self._dump(int(expr * 1000), local_vars) + right + expr = self._dump(int(date * 1000), local_vars) + right else: raise self.Exception(f'Unsupported object {obj}', expr) From d400e261cf029a3f20d364113b14de973be75404 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Fri, 3 Mar 2023 22:31:41 +0530 Subject: [PATCH 2025/2552] [devscripts] Script to generate changelog (#6220) Authored by: Grub4K --- README.md | 7 +- devscripts/changelog_override.json | 1 + devscripts/changelog_override.schema.json | 96 +++++ devscripts/make_changelog.py | 491 ++++++++++++++++++++++ 4 files changed, 593 insertions(+), 2 deletions(-) create mode 100644 devscripts/changelog_override.json create mode 100644 devscripts/changelog_override.schema.json create mode 100644 devscripts/make_changelog.py diff --git a/README.md b/README.md index 3d3db933a..ddd71eeeb 100644 --- a/README.md +++ b/README.md @@ -311,10 +311,13 @@ If you wish to build it anyway, install Python and py2exe, and then simply run ` ### Related scripts -* **`devscripts/update-version.py [revision]`** - Update the version number based on current date -* **`devscripts/set-variant.py variant [-M update_message]`** - Set the build variant of the executable +* **`devscripts/update-version.py`** - Update the version number based on current date. +* **`devscripts/set-variant.py`** - Set the build variant of the executable. +* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. +Note: See their `--help` for more info. + You can also fork the project on GitHub and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release # USAGE AND OPTIONS diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/devscripts/changelog_override.json @@ -0,0 +1 @@ +{} diff --git a/devscripts/changelog_override.schema.json b/devscripts/changelog_override.schema.json new file mode 100644 index 000000000..9bd747b70 --- /dev/null +++ b/devscripts/changelog_override.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft/2020-12/schema", + "type": "array", + "uniqueItems": true, + "items": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "action": { + "enum": [ + "add" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "short" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "remove" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + } + }, + "required": [ + "action", + "hash" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "change" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "hash", + "short", + "authors" + ] + } + ] + } +} diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py new file mode 100644 index 000000000..b66181b53 --- /dev/null +++ b/devscripts/make_changelog.py @@ -0,0 +1,491 @@ +from __future__ import annotations + +import enum +import itertools +import json +import logging +import re +import subprocess +import sys +from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path + +BASE_URL = 'https://github.com' +LOCATION_PATH = Path(__file__).parent + +logger = logging.getLogger(__name__) + + +class CommitGroup(enum.Enum): + UPSTREAM = None + PRIORITY = 'Important' + CORE = 'Core' + EXTRACTOR = 'Extractor' + DOWNLOADER = 'Downloader' + POSTPROCESSOR = 'Postprocessor' + MISC = 'Misc.' + + @classmethod + @lru_cache + def commit_lookup(cls): + return { + name: group + for group, names in { + cls.PRIORITY: {''}, + cls.UPSTREAM: {'upstream'}, + cls.CORE: { + 'aes', + 'cache', + 'compat_utils', + 'compat', + 'cookies', + 'core', + 'dependencies', + 'jsinterp', + 'outtmpl', + 'plugins', + 'update', + 'utils', + }, + cls.MISC: { + 'build', + 'cleanup', + 'devscripts', + 'docs', + 'misc', + 'test', + }, + cls.EXTRACTOR: {'extractor', 'extractors'}, + cls.DOWNLOADER: {'downloader'}, + cls.POSTPROCESSOR: {'postprocessor'}, + }.items() + for name in names + } + + @classmethod + def get(cls, value): + result = cls.commit_lookup().get(value) + if result: + logger.debug(f'Mapped {value!r} => {result.name}') + return result + + +@dataclass +class Commit: + hash: str | None + short: str + authors: list[str] + + def __str__(self): + result = f'{self.short!r}' + + if self.hash: + result += f' ({self.hash[:7]})' + + if self.authors: + authors = ', '.join(self.authors) + result += f' by {authors}' + + return result + + +@dataclass +class CommitInfo: + details: str | None + sub_details: tuple[str, ...] + message: str + issues: list[str] + commit: Commit + fixes: list[Commit] + + def key(self): + return ((self.details or '').lower(), self.sub_details, self.message) + + +class Changelog: + MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) + + def __init__(self, groups, repo): + self._groups = groups + self._repo = repo + + def __str__(self): + return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') + + def _format_groups(self, groups): + for item in CommitGroup: + group = groups[item] + if group: + yield self.format_module(item.value, group) + + def format_module(self, name, group): + result = f'\n#### {name} changes\n' if name else '\n' + return result + '\n'.join(self._format_group(group)) + + def _format_group(self, group): + sorted_group = sorted(group, key=CommitInfo.key) + detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) + for details, items in detail_groups: + if not details: + indent = '' + else: + yield f'- {details}' + indent = '\t' + + if details == 'cleanup': + items, cleanup_misc_items = self._filter_cleanup_misc_items(items) + + sub_detail_groups = itertools.groupby(items, lambda item: item.sub_details) + for sub_details, entries in sub_detail_groups: + if not sub_details: + for entry in entries: + yield f'{indent}- {self.format_single_change(entry)}' + continue + + prefix = f'{indent}- {", ".join(sub_details)}' + entries = list(entries) + if len(entries) == 1: + yield f'{prefix}: {self.format_single_change(entries[0])}' + continue + + yield prefix + for entry in entries: + yield f'{indent}\t- {self.format_single_change(entry)}' + + if details == 'cleanup' and cleanup_misc_items: + yield from self._format_cleanup_misc_sub_group(cleanup_misc_items) + + def _filter_cleanup_misc_items(self, items): + cleanup_misc_items = defaultdict(list) + non_misc_items = [] + for item in items: + if self.MISC_RE.search(item.message): + cleanup_misc_items[tuple(item.commit.authors)].append(item) + else: + non_misc_items.append(item) + + return non_misc_items, cleanup_misc_items + + def _format_cleanup_misc_sub_group(self, group): + prefix = '\t- Miscellaneous' + if len(group) == 1: + yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}' + return + + yield prefix + for message in self._format_cleanup_misc_items(group): + yield f'\t\t- {message}' + + def _format_cleanup_misc_items(self, group): + for authors, infos in group.items(): + message = ', '.join( + self._format_message_link(None, info.commit.hash) + for info in sorted(infos, key=lambda item: item.commit.hash or '')) + yield f'{message} by {self._format_authors(authors)}' + + def format_single_change(self, info): + message = self._format_message_link(info.message, info.commit.hash) + if info.issues: + message = f'{message} ({self._format_issues(info.issues)})' + + if info.commit.authors: + message = f'{message} by {self._format_authors(info.commit.authors)}' + + if info.fixes: + fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) + + authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) + if authors != info.commit.authors: + fix_message = f'{fix_message} by {self._format_authors(authors)}' + + message = f'{message} (With fixes in {fix_message})' + + return message + + def _format_message_link(self, message, hash): + assert message or hash, 'Improperly defined commit message or override' + message = message if message else hash[:7] + return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message + + def _format_issues(self, issues): + return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) + + @staticmethod + def _format_authors(authors): + return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) + + @property + def repo_url(self): + return f'{BASE_URL}/{self._repo}' + + +class CommitRange: + COMMAND = 'git' + COMMIT_SEPARATOR = '-----' + + AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) + MESSAGE_RE = re.compile(r''' + (?:\[ + (?P[^\]\/:,]+) + (?:/(?P
    [^\]:,]+))? + (?:[:,](?P[^\]]+))? + \]\ )? + (?:`?(?P[^:`]+)`?: )? + (?P.+?) + (?:\ \((?P\#\d+(?:,\ \#\d+)*)\))? + ''', re.VERBOSE | re.DOTALL) + EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) + FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+for)?|Revert)\s+([\da-f]{40})') + UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') + + def __init__(self, start, end, default_author=None) -> None: + self._start = start + self._end = end + self._commits, self._fixes = self._get_commits_and_fixes(default_author) + self._commits_added = [] + + @classmethod + def from_single(cls, commitish='HEAD', default_author=None): + start_commitish = cls.get_prev_tag(commitish) + end_commitish = cls.get_next_tag(commitish) + if start_commitish == end_commitish: + start_commitish = cls.get_prev_tag(f'{commitish}~') + logger.info(f'Determined range from {commitish!r}: {start_commitish}..{end_commitish}') + return cls(start_commitish, end_commitish, default_author) + + @classmethod + def get_prev_tag(cls, commitish): + command = [cls.COMMAND, 'describe', '--tags', '--abbrev=0', '--exclude=*[^0-9.]*', commitish] + return subprocess.check_output(command, text=True).strip() + + @classmethod + def get_next_tag(cls, commitish): + result = subprocess.run( + [cls.COMMAND, 'describe', '--contains', '--abbrev=0', commitish], + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) + if result.returncode: + return 'HEAD' + + return result.stdout.partition('~')[0].strip() + + def __iter__(self): + return iter(itertools.chain(self._commits.values(), self._commits_added)) + + def __len__(self): + return len(self._commits) + len(self._commits_added) + + def __contains__(self, commit): + if isinstance(commit, Commit): + if not commit.hash: + return False + commit = commit.hash + + return commit in self._commits + + def _is_ancestor(self, commitish): + return bool(subprocess.call( + [self.COMMAND, 'merge-base', '--is-ancestor', commitish, self._start])) + + def _get_commits_and_fixes(self, default_author): + result = subprocess.check_output([ + self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', + f'{self._start}..{self._end}'], text=True) + + commits = {} + fixes = defaultdict(list) + lines = iter(result.splitlines(False)) + for line in lines: + commit_hash = line + short = next(lines) + skip = short.startswith('Release ') or short == '[version] update' + + authors = [default_author] if default_author else [] + for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): + match = self.AUTHOR_INDICATOR_RE.match(line) + if match: + authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) + + commit = Commit(commit_hash, short, authors) + if skip: + logger.debug(f'Skipped commit: {commit}') + continue + + fix_match = self.FIXES_RE.search(commit.short) + if fix_match: + commitish = fix_match.group(1) + fixes[commitish].append(commit) + + commits[commit.hash] = commit + + for commitish, fix_commits in fixes.items(): + if commitish in commits: + hashes = ', '.join(commit.hash[:7] for commit in fix_commits) + logger.info(f'Found fix(es) for {commitish[:7]}: {hashes}') + for fix_commit in fix_commits: + del commits[fix_commit.hash] + else: + logger.debug(f'Commit with fixes not in changes: {commitish[:7]}') + + return commits, fixes + + def apply_overrides(self, overrides): + for override in overrides: + when = override.get('when') + if when and when not in self and when != self._start: + logger.debug(f'Ignored {when!r}, not in commits {self._start!r}') + continue + + override_hash = override.get('hash') + if override['action'] == 'add': + commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) + logger.info(f'ADD {commit}') + self._commits_added.append(commit) + + elif override['action'] == 'remove': + if override_hash in self._commits: + logger.info(f'REMOVE {self._commits[override_hash]}') + del self._commits[override_hash] + + elif override['action'] == 'change': + if override_hash not in self._commits: + continue + commit = Commit(override_hash, override['short'], override['authors']) + logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') + self._commits[commit.hash] = commit + + self._commits = {key: value for key, value in reversed(self._commits.items())} + + def groups(self): + groups = defaultdict(list) + for commit in self: + upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short) + if upstream_re: + commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}' + + match = self.MESSAGE_RE.fullmatch(commit.short) + if not match: + logger.error(f'Error parsing short commit message: {commit.short!r}') + continue + + prefix, details, sub_details, sub_details_alt, message, issues = match.groups() + group = None + if prefix: + if prefix == 'priority': + prefix, _, details = (details or '').partition('/') + logger.debug(f'Priority: {message!r}') + group = CommitGroup.PRIORITY + + if not details and prefix: + if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'): + logger.debug(f'Replaced details with {prefix!r}') + details = prefix or None + + if details == 'common': + details = None + + if details: + details = details.strip() + + else: + group = CommitGroup.CORE + + sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.lower().replace(':', ',') + sub_details = tuple(filter(None, map(str.strip, sub_details.split(',')))) + + issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] + + if not group: + group = CommitGroup.get(prefix.lower()) + if not group: + if self.EXTRACTOR_INDICATOR_RE.search(commit.short): + group = CommitGroup.EXTRACTOR + else: + group = CommitGroup.POSTPROCESSOR + logger.warning(f'Failed to map {commit.short!r}, selected {group.name}') + + commit_info = CommitInfo( + details, sub_details, message.strip(), + issues, commit, self._fixes[commit.hash]) + logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') + groups[group].append(commit_info) + + return groups + + +def get_new_contributors(contributors_path, commits): + contributors = set() + if contributors_path.exists(): + with contributors_path.open() as file: + for line in filter(None, map(str.strip, file)): + author, _, _ = line.partition(' (') + authors = author.split('/') + contributors.update(map(str.casefold, authors)) + + new_contributors = set() + for commit in commits: + for author in commit.authors: + author_folded = author.casefold() + if author_folded not in contributors: + contributors.add(author_folded) + new_contributors.add(author) + + return sorted(new_contributors, key=str.casefold) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser( + description='Create a changelog markdown from a git commit range') + parser.add_argument( + 'commitish', default='HEAD', nargs='?', + help='The commitish to create the range from (default: %(default)s)') + parser.add_argument( + '-v', '--verbosity', action='count', default=0, + help='increase verbosity (can be used twice)') + parser.add_argument( + '-c', '--contributors', action='store_true', + help='update CONTRIBUTORS file (default: %(default)s)') + parser.add_argument( + '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', + help='path to the CONTRIBUTORS file') + parser.add_argument( + '--no-override', action='store_true', + help='skip override json in commit generation (default: %(default)s)') + parser.add_argument( + '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', + help='path to the changelog_override.json file') + parser.add_argument( + '--default-author', default='pukkandan', + help='the author to use without a author indicator (default: %(default)s)') + parser.add_argument( + '--repo', default='yt-dlp/yt-dlp', + help='the github repository to use for the operations (default: %(default)s)') + args = parser.parse_args() + + logging.basicConfig( + datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', + level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) + + commits = CommitRange.from_single(args.commitish, args.default_author) + + if not args.no_override: + if args.override_path.exists(): + with args.override_path.open() as file: + overrides = json.load(file) + commits.apply_overrides(overrides) + else: + logger.warning(f'File {args.override_path.as_posix()} does not exist') + + logger.info(f'Loaded {len(commits)} commits') + + new_contributors = get_new_contributors(args.contributors_path, commits) + if new_contributors: + if args.contributors: + with args.contributors_path.open('a') as file: + file.writelines(f'{contributor}\n' for contributor in new_contributors) + logger.info(f'New contributors: {", ".join(new_contributors)}') + + print(Changelog(commits.groups(), args.repo)) From 29cb20bd563c02671b31dd840139e93dd37150a1 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Fri, 3 Mar 2023 22:33:12 +0530 Subject: [PATCH 2026/2552] [build] Automated builds and nightly releases (#6220) Closes #1839 Authored by: Grub4K, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- .github/workflows/build.yml | 555 ++++++++++++-------------- .github/workflows/publish.yml | 80 ++++ .github/workflows/release-nightly.yml | 49 +++ .github/workflows/release.yml | 125 ++++++ Changelog.md | 8 +- README.md | 8 +- devscripts/make_readme.py | 22 +- devscripts/update-version.py | 46 ++- yt_dlp/YoutubeDL.py | 6 +- 9 files changed, 559 insertions(+), 340 deletions(-) create mode 100644 .github/workflows/publish.yml create mode 100644 .github/workflows/release-nightly.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6041376a4..2183903ea 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,393 +1,338 @@ -name: Build -on: workflow_dispatch +name: Build Artifacts +on: + workflow_call: + inputs: + version: + required: true + type: string + channel: + required: false + default: stable + type: string + unix: + default: true + type: boolean + linux_arm: + default: true + type: boolean + macos: + default: true + type: boolean + macos_legacy: + default: true + type: boolean + windows: + default: true + type: boolean + windows32: + default: true + type: boolean + meta_files: + default: true + type: boolean + + workflow_dispatch: + inputs: + version: + description: Version tag (YYYY.MM.DD[.REV]) + required: true + type: string + channel: + description: Update channel (stable/nightly) + required: true + default: stable + type: string + unix: + description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip + default: true + type: boolean + linux_arm: + description: yt-dlp_linux_aarch64, yt-dlp_linux_armv7l + default: true + type: boolean + macos: + description: yt-dlp_macos, yt-dlp_macos.zip + default: true + type: boolean + macos_legacy: + description: yt-dlp_macos_legacy + default: true + type: boolean + windows: + description: yt-dlp.exe, yt-dlp_min.exe, yt-dlp_win.zip + default: true + type: boolean + windows32: + description: yt-dlp_x86.exe + default: true + type: boolean + meta_files: + description: SHA2-256SUMS, SHA2-512SUMS, _update_spec + default: true + type: boolean + permissions: contents: read jobs: - prepare: - permissions: - contents: write # for push_release - runs-on: ubuntu-latest - outputs: - version_suffix: ${{ steps.version_suffix.outputs.version_suffix }} - ytdlp_version: ${{ steps.bump_version.outputs.ytdlp_version }} - head_sha: ${{ steps.push_release.outputs.head_sha }} - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Set version suffix - id: version_suffix - env: - PUSH_VERSION_COMMIT: ${{ secrets.PUSH_VERSION_COMMIT }} - if: "env.PUSH_VERSION_COMMIT == ''" - run: echo "version_suffix=$(date -u +"%H%M%S")" >> "$GITHUB_OUTPUT" - - name: Bump version - id: bump_version - run: | - python devscripts/update-version.py ${{ steps.version_suffix.outputs.version_suffix }} - make issuetemplates - - - name: Push to release - id: push_release - run: | - git config --global user.name github-actions - git config --global user.email github-actions@example.com - git add -u - git commit -m "[version] update" -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl" - git push origin --force ${{ github.event.ref }}:release - echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" - - name: Update master - env: - PUSH_VERSION_COMMIT: ${{ secrets.PUSH_VERSION_COMMIT }} - if: "env.PUSH_VERSION_COMMIT != ''" - run: git push origin ${{ github.event.ref }} - - - build_unix: - needs: prepare + unix: + if: inputs.unix runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - uses: conda-incubator/setup-miniconda@v2 - with: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - uses: conda-incubator/setup-miniconda@v2 + with: miniforge-variant: Mambaforge use-mamba: true channels: conda-forge auto-update-conda: true - activate-environment: '' + activate-environment: "" auto-activate-base: false - - name: Install Requirements - run: | + - name: Install Requirements + run: | sudo apt-get -y install zip pandoc man sed - python -m pip install -U pip setuptools wheel twine + python -m pip install -U pip setuptools wheel python -m pip install -U Pyinstaller -r requirements.txt reqs=$(mktemp) echo -e 'python=3.10.*\npyinstaller' >$reqs sed 's/^brotli.*/brotli-python/' >$reqs mamba create -n build --file $reqs - - name: Prepare - run: | - python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + python devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} python devscripts/make_lazy_extractors.py - - name: Build Unix platform-independent binary - run: | + - name: Build Unix platform-independent binary + run: | make all tar - - name: Build Unix standalone binary - shell: bash -l {0} - run: | + - name: Build Unix standalone binary + shell: bash -l {0} + run: | unset LD_LIBRARY_PATH # Harmful; set by setup-python conda activate build python pyinst.py --onedir (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .) python pyinst.py - - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - yt-dlp - yt-dlp.tar.gz - dist/yt-dlp_linux - dist/yt-dlp_linux.zip - - - name: Build and publish on PyPi - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - if: "env.TWINE_PASSWORD != ''" - run: | - rm -rf dist/* - python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" - python setup.py sdist bdist_wheel - twine upload dist/* - - - name: Install SSH private key for Homebrew - env: - BREW_TOKEN: ${{ secrets.BREW_TOKEN }} - if: "env.BREW_TOKEN != ''" - uses: yt-dlp/ssh-agent@v0.5.3 - with: - ssh-private-key: ${{ env.BREW_TOKEN }} - - name: Update Homebrew Formulae - env: - BREW_TOKEN: ${{ secrets.BREW_TOKEN }} - if: "env.BREW_TOKEN != ''" - run: | - git clone git@github.com:yt-dlp/homebrew-taps taps/ - python devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ needs.prepare.outputs.ytdlp_version }}" - git -C taps/ config user.name github-actions - git -C taps/ config user.email github-actions@example.com - git -C taps/ commit -am 'yt-dlp: ${{ needs.prepare.outputs.ytdlp_version }}' - git -C taps/ push - - - build_linux_arm: + mv ./dist/yt-dlp_linux ./yt-dlp_linux + mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + yt-dlp + yt-dlp.tar.gz + yt-dlp_linux + yt-dlp_linux.zip + + linux_arm: + if: inputs.linux_arm permissions: - packages: write # for Creating cache + contents: read + packages: write # for creating cache runs-on: ubuntu-latest - needs: prepare strategy: matrix: architecture: - - armv7 - - aarch64 + - armv7 + - aarch64 steps: - - uses: actions/checkout@v3 - with: - path: ./repo - - name: Virtualized Install, Prepare & Build - uses: yt-dlp/run-on-arch-action@v2 - with: - githubToken: ${{ github.token }} # To cache image - arch: ${{ matrix.architecture }} - distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS - dockerRunArgs: --volume "${PWD}/repo:/repo" - install: | # Installing Python 3.10 from the Deadsnakes repo raises errors - apt update - apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip - python3.8 -m pip install -U pip setuptools wheel - # Cannot access requirements.txt from the repo directory at this stage - python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi - - run: | - cd repo - python3.8 -m pip install -U Pyinstaller -r requirements.txt # Cached version may be out of date - python3.8 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} - python3.8 devscripts/make_lazy_extractors.py - python3.8 pyinst.py - - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | # run-on-arch-action designates armv7l as armv7 - repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} - - - build_macos: + - uses: actions/checkout@v3 + with: + path: ./repo + - name: Virtualized Install, Prepare & Build + uses: yt-dlp/run-on-arch-action@v2 + with: + # Ref: https://github.com/uraimo/run-on-arch-action/issues/55 + env: | + GITHUB_WORKFLOW: build + githubToken: ${{ github.token }} # To cache image + arch: ${{ matrix.architecture }} + distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS + dockerRunArgs: --volume "${PWD}/repo:/repo" + install: | # Installing Python 3.10 from the Deadsnakes repo raises errors + apt update + apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip + python3.8 -m pip install -U pip setuptools wheel + # Cannot access requirements.txt from the repo directory at this stage + python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi + + run: | + cd repo + python3.8 -m pip install -U Pyinstaller -r requirements.txt # Cached version may be out of date + python3.8 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} + python3.8 devscripts/make_lazy_extractors.py + python3.8 pyinst.py + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | # run-on-arch-action designates armv7l as armv7 + repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} + + macos: + if: inputs.macos runs-on: macos-11 - needs: prepare steps: - - uses: actions/checkout@v3 - # NB: In order to create a universal2 application, the version of python3 in /usr/bin has to be used - - name: Install Requirements - run: | + - uses: actions/checkout@v3 + # NB: In order to create a universal2 application, the version of python3 in /usr/bin has to be used + - name: Install Requirements + run: | brew install coreutils /usr/bin/python3 -m pip install -U --user pip Pyinstaller -r requirements.txt - - name: Prepare - run: | - /usr/bin/python3 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + /usr/bin/python3 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} /usr/bin/python3 devscripts/make_lazy_extractors.py - - name: Build - run: | + - name: Build + run: | /usr/bin/python3 pyinst.py --target-architecture universal2 --onedir (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .) /usr/bin/python3 pyinst.py --target-architecture universal2 - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - dist/yt-dlp_macos - dist/yt-dlp_macos.zip + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + dist/yt-dlp_macos + dist/yt-dlp_macos.zip - - build_macos_legacy: + macos_legacy: + if: inputs.macos_legacy runs-on: macos-latest - needs: prepare steps: - - uses: actions/checkout@v3 - - name: Install Python - # We need the official Python, because the GA ones only support newer macOS versions - env: - PYTHON_VERSION: 3.10.5 - MACOSX_DEPLOYMENT_TARGET: 10.9 # Used up by the Python build tools - run: | + - uses: actions/checkout@v3 + - name: Install Python + # We need the official Python, because the GA ones only support newer macOS versions + env: + PYTHON_VERSION: 3.10.5 + MACOSX_DEPLOYMENT_TARGET: 10.9 # Used up by the Python build tools + run: | # Hack to get the latest patch version. Uncomment if needed #brew install python@3.10 #export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 ) curl https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg -o "python.pkg" sudo installer -pkg python.pkg -target / python3 --version - - name: Install Requirements - run: | + - name: Install Requirements + run: | brew install coreutils python3 -m pip install -U --user pip Pyinstaller -r requirements.txt - - name: Prepare - run: | - python3 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + python3 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} python3 devscripts/make_lazy_extractors.py - - name: Build - run: | + - name: Build + run: | python3 pyinst.py mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - dist/yt-dlp_macos_legacy - + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + dist/yt-dlp_macos_legacy - build_windows: + windows: + if: inputs.windows runs-on: windows-latest - needs: prepare steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: # 3.8 is used for Win7 support - python-version: '3.8' - - name: Install Requirements - run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: # 3.8 is used for Win7 support + python-version: "3.8" + - name: Install Requirements + run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python -m pip install -U pip setuptools wheel py2exe pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt - - name: Prepare - run: | - python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + python devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} python devscripts/make_lazy_extractors.py - - name: Build - run: | + - name: Build + run: | python setup.py py2exe Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe python pyinst.py python pyinst.py --onedir Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - dist/yt-dlp.exe - dist/yt-dlp_min.exe - dist/yt-dlp_win.zip - + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + dist/yt-dlp.exe + dist/yt-dlp_min.exe + dist/yt-dlp_win.zip - build_windows32: + windows32: + if: inputs.windows32 runs-on: windows-latest - needs: prepare steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: # 3.7 is used for Vista support. See https://github.com/yt-dlp/yt-dlp/issues/390 - python-version: '3.7' - architecture: 'x86' - - name: Install Requirements - run: | + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: # 3.7 is used for Vista support. See https://github.com/yt-dlp/yt-dlp/issues/390 + python-version: "3.7" + architecture: "x86" + - name: Install Requirements + run: | python -m pip install -U pip setuptools wheel pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt - - name: Prepare - run: | - python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + python devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} python devscripts/make_lazy_extractors.py - - name: Build - run: | + - name: Build + run: | python pyinst.py - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - dist/yt-dlp_x86.exe - - - publish_release: - permissions: - contents: write # for action-gh-release + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + dist/yt-dlp_x86.exe + + meta_files: + if: inputs.meta_files && always() + needs: + - unix + - linux_arm + - macos + - macos_legacy + - windows + - windows32 runs-on: ubuntu-latest - needs: [prepare, build_unix, build_linux_arm, build_windows, build_windows32, build_macos, build_macos_legacy] - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - - - name: Get Changelog - run: | - changelog=$(grep -oPz '(?s)(?<=### ${{ needs.prepare.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)' Changelog.md) || true - echo "changelog<> $GITHUB_ENV - echo "$changelog" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - name: Make Update spec - run: | - echo "# This file is used for regulating self-update" >> _update_spec - echo "lock 2022.07.18 .+ Python 3.6" >> _update_spec - - name: Make SHA2-SUMS files - run: | - sha256sum artifact/yt-dlp | awk '{print $1 " yt-dlp"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp.tar.gz | awk '{print $1 " yt-dlp.tar.gz"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp.exe | awk '{print $1 " yt-dlp.exe"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_win.zip | awk '{print $1 " yt-dlp_win.zip"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_min.exe | awk '{print $1 " yt-dlp_min.exe"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_x86.exe | awk '{print $1 " yt-dlp_x86.exe"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_linux_armv7l | awk '{print $1 " yt-dlp_linux_armv7l"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_linux_aarch64 | awk '{print $1 " yt-dlp_linux_aarch64"}' >> SHA2-256SUMS - sha256sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-256SUMS - sha256sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-256SUMS - sha512sum artifact/yt-dlp | awk '{print $1 " yt-dlp"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp.tar.gz | awk '{print $1 " yt-dlp.tar.gz"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp.exe | awk '{print $1 " yt-dlp.exe"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_win.zip | awk '{print $1 " yt-dlp_win.zip"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_min.exe | awk '{print $1 " yt-dlp_min.exe"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_x86.exe | awk '{print $1 " yt-dlp_x86.exe"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_linux_armv7l | awk '{print $1 " yt-dlp_linux_armv7l"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_linux_aarch64 | awk '{print $1 " yt-dlp_linux_aarch64"}' >> SHA2-512SUMS - sha512sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-512SUMS - sha512sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-512SUMS - - - name: Publish Release - uses: yt-dlp/action-gh-release@v1 - with: - tag_name: ${{ needs.prepare.outputs.ytdlp_version }} - name: yt-dlp ${{ needs.prepare.outputs.ytdlp_version }} - target_commitish: ${{ needs.prepare.outputs.head_sha }} - body: | - #### [A description of the various files]((https://github.com/yt-dlp/yt-dlp#release-files)) are in the README + - uses: actions/download-artifact@v3 - --- -

    Changelog

    -

    - - ${{ env.changelog }} + - name: Make SHA2-SUMS files + run: | + cd ./artifact/ + sha256sum * > ../SHA2-256SUMS + sha512sum * > ../SHA2-512SUMS -

    -
    - files: | - SHA2-256SUMS - SHA2-512SUMS - artifact/yt-dlp - artifact/yt-dlp.tar.gz - artifact/yt-dlp.exe - artifact/yt-dlp_win.zip - artifact/yt-dlp_min.exe - artifact/yt-dlp_x86.exe - artifact/yt-dlp_macos - artifact/yt-dlp_macos.zip - artifact/yt-dlp_macos_legacy - artifact/yt-dlp_linux_armv7l - artifact/yt-dlp_linux_aarch64 - artifact/dist/yt-dlp_linux - artifact/dist/yt-dlp_linux.zip - _update_spec + - name: Make Update spec + run: | + cat >> _update_spec << EOF + # This file is used for regulating self-update + lock 2022.08.18.36 .+ Python 3.6 + EOF + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + SHA*SUMS* + _update_spec diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..42e66a29c --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,80 @@ +name: Publish +on: + workflow_call: + inputs: + nightly: + default: false + required: false + type: boolean + version: + required: true + type: string + target_commitish: + required: true + type: string + secrets: + ARCHIVE_REPO_TOKEN: + required: false + +permissions: + contents: write + +jobs: + publish: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: actions/download-artifact@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Generate release notes + run: | + cat >> ./RELEASE_NOTES << EOF + #### A description of the various files are in the [README](https://github.com/yt-dlp/yt-dlp#release-files) + --- +

    Changelog

    + $(python ./devscripts/make_changelog.py -vv) +
    + EOF + echo "**This is an automated nightly pre-release build**" >> ./PRERELEASE_NOTES + cat ./RELEASE_NOTES >> ./PRERELEASE_NOTES + echo "Generated from: https://github.com/${{ github.repository }}/commit/${{ inputs.target_commitish }}" >> ./ARCHIVE_NOTES + cat ./RELEASE_NOTES >> ./ARCHIVE_NOTES + + - name: Archive nightly release + env: + GH_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }} + GH_REPO: ${{ vars.ARCHIVE_REPO }} + if: | + inputs.nightly && env.GH_TOKEN != '' && env.GH_REPO != '' + run: | + gh release create \ + --notes-file ARCHIVE_NOTES \ + --title "Build ${{ inputs.version }}" \ + ${{ inputs.version }} \ + artifact/* + + - name: Prune old nightly release + if: inputs.nightly + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release delete --yes --cleanup-tag "nightly" || true + git tag --delete "nightly" || true + sleep 5 # Enough time to cover deletion race condition + + - name: Publish release${{ inputs.nightly && ' (nightly)' || '' }} + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release create \ + --notes-file ${{ inputs.nightly && 'PRE' || '' }}RELEASE_NOTES \ + --target ${{ inputs.target_commitish }} \ + --title "yt-dlp ${{ inputs.nightly && 'nightly ' || '' }}${{ inputs.version }}" \ + ${{ inputs.nightly && '--prerelease "nightly"' || inputs.version }} \ + artifact/* diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml new file mode 100644 index 000000000..ec079b8d0 --- /dev/null +++ b/.github/workflows/release-nightly.yml @@ -0,0 +1,49 @@ +name: Release (nightly) +on: + push: + branches: + - master + paths: + - "**.py" + - "!yt_dlp/version.py" +concurrency: + group: release-nightly + cancel-in-progress: true +permissions: + contents: read + +jobs: + prepare: + if: vars.BUILD_NIGHTLY != '' + runs-on: ubuntu-latest + outputs: + version: ${{ steps.get_version.outputs.version }} + + steps: + - uses: actions/checkout@v3 + - name: Get version + id: get_version + run: | + python devscripts/update-version.py "$(date -u +"%H%M%S")" | grep -Po "version=\d+(\.\d+){3}" >> "$GITHUB_OUTPUT" + + build: + needs: prepare + uses: ./.github/workflows/build.yml + with: + version: ${{ needs.prepare.outputs.version }} + channel: nightly + permissions: + contents: read + packages: write # For package cache + + publish: + needs: [prepare, build] + uses: ./.github/workflows/publish.yml + secrets: + ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }} + permissions: + contents: write + with: + nightly: true + version: ${{ needs.prepare.outputs.version }} + target_commitish: ${{ github.sha }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..c97cd1f4a --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,125 @@ +name: Release +on: workflow_dispatch +permissions: + contents: read + +jobs: + prepare: + permissions: + contents: write + runs-on: ubuntu-latest + outputs: + version: ${{ steps.update_version.outputs.version }} + head_sha: ${{ steps.push_release.outputs.head_sha }} + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Update version + id: update_version + run: | + python devscripts/update-version.py ${{ vars.PUSH_VERSION_COMMIT == '' && '"$(date -u +"%H%M%S")"' || '' }} | \ + grep -Po "version=\d+\.\d+\.\d+(\.\d+)?" >> "$GITHUB_OUTPUT" + + - name: Update documentation + run: | + make doc + sed '/### /Q' Changelog.md >> ./CHANGELOG + echo '### ${{ steps.update_version.outputs.version }}' >> ./CHANGELOG + python ./devscripts/make_changelog.py -vv -c >> ./CHANGELOG + echo >> ./CHANGELOG + grep -Poz '(?s)### \d+\.\d+\.\d+.+' 'Changelog.md' | head -n -1 >> ./CHANGELOG + cat ./CHANGELOG > Changelog.md + + - name: Push to release + id: push_release + run: | + git config --global user.name github-actions + git config --global user.email github-actions@example.com + git add -u + git commit -m "Release ${{ steps.update_version.outputs.version }}" \ + -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl" + git push origin --force ${{ github.event.ref }}:release + echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" + + - name: Update master + if: vars.PUSH_VERSION_COMMIT != '' + run: git push origin ${{ github.event.ref }} + + publish_pypi_homebrew: + needs: prepare + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install Requirements + run: | + python -m pip install -U pip setuptools wheel twine + python -m pip install -U -r requirements.txt + + - name: Prepare + run: | + python devscripts/update-version.py ${{ needs.prepare.outputs.version }} + python devscripts/make_lazy_extractors.py + + - name: Build and publish on PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + if: env.TWINE_PASSWORD != '' + run: | + rm -rf dist/* + python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" + python setup.py sdist bdist_wheel + twine upload dist/* + + - name: Checkout Homebrew repository + env: + BREW_TOKEN: ${{ secrets.BREW_TOKEN }} + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != '' + uses: actions/checkout@v3 + with: + repository: yt-dlp/homebrew-taps + path: taps + ssh-key: ${{ secrets.BREW_TOKEN }} + + - name: Update Homebrew Formulae + env: + BREW_TOKEN: ${{ secrets.BREW_TOKEN }} + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != '' + run: | + python devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ needs.prepare.outputs.version }}" + git -C taps/ config user.name github-actions + git -C taps/ config user.email github-actions@example.com + git -C taps/ commit -am 'yt-dlp: ${{ needs.prepare.outputs.version }}' + git -C taps/ push + + build: + needs: prepare + uses: ./.github/workflows/build.yml + with: + version: ${{ needs.prepare.outputs.version }} + permissions: + contents: read + packages: write # For package cache + + publish: + needs: [prepare, build] + uses: ./.github/workflows/publish.yml + permissions: + contents: write + with: + version: ${{ needs.prepare.outputs.version }} + target_commitish: ${{ needs.prepare.outputs.head_sha }} diff --git a/Changelog.md b/Changelog.md index 24bc8a2e2..60bd99f72 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,13 +1,7 @@ # Changelog ### 2023.02.17 diff --git a/README.md b/README.md index ddd71eeeb..e6e95b147 100644 --- a/README.md +++ b/README.md @@ -318,7 +318,8 @@ If you wish to build it anyway, install Python and py2exe, and then simply run ` Note: See their `--help` for more info. -You can also fork the project on GitHub and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release +### Forking the project +If you fork the project on GitHub, you can run your fork's [build workflow](.github/workflows/build.yml) to automatically build the selected version(s) as artifacts. Alternatively, you can run the [release workflow](.github/workflows/release.yml) or enable the [nightly workflow](.github/workflows/release-nightly.yml) to create full (pre-)releases. # USAGE AND OPTIONS @@ -460,9 +461,8 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. - E.g. "--date today-2weeks" downloads - only videos uploaded on the same day two - weeks ago + E.g. "--date today-2weeks" downloads only + videos uploaded on the same day two weeks ago --datebefore DATE Download only videos uploaded on or before this date. The date formats accepted is the same as --date diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index fad993a19..2270b31d3 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -45,33 +45,43 @@ switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group()) delim = f'\n{" " * switch_col_width}' PATCHES = ( - ( # Standardize update message + ( # Standardize `--update` message r'(?m)^( -U, --update\s+).+(\n \s.+)*$', r'\1Update this program to the latest version', ), - ( # Headings + ( # Headings r'(?m)^ (\w.+\n)( (?=\w))?', r'## \1' ), - ( # Do not split URLs + ( # Fixup `--date` formatting + rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$', + (rf'\1[now|today|yesterday][-N[day|week|month|year]].{delim}' + f'E.g. "--date today-2weeks" downloads only{delim}' + 'videos uploaded on the same day two weeks ago'), + ), + ( # Do not split URLs rf'({delim[:-1]})? (?P

    ([^<]+)\s+playlist\s*<', webpage, 'playlist title', + r'([^<]+)\s+playlist\s*<', webpage, 'playlist title', fatal=False) return self.playlist_result(entries, playlist_id, title) From 153e88a75151a51cc2a2fbf02d62f66fc09b29d9 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Thu, 29 Dec 2022 17:12:07 +0900 Subject: [PATCH 1851/2552] [extractor/netverse] Add `NetverseSearch` extractor (#5838) Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/netverse.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 672eb9596..1b76d8264 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1160,6 +1160,7 @@ from .neteasemusic import ( from .netverse import ( NetverseIE, NetversePlaylistIE, + NetverseSearchIE, ) from .newgrounds import ( NewgroundsIE, diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py index 3c4fd92eb..398198a1b 100644 --- a/yt_dlp/extractor/netverse.py +++ b/yt_dlp/extractor/netverse.py @@ -1,6 +1,6 @@ import itertools -from .common import InfoExtractor +from .common import InfoExtractor, SearchInfoExtractor from .dailymotion import DailymotionIE from ..utils import smuggle_url, traverse_obj @@ -251,3 +251,31 @@ class NetversePlaylistIE(NetverseBaseIE): self.parse_playlist(playlist_data['response'], playlist_id), traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')), traverse_obj(playlist_data, ('response', 'webseries_info', 'title'))) + + +class NetverseSearchIE(SearchInfoExtractor): + _SEARCH_KEY = 'netsearch' + + _TESTS = [{ + 'url': 'netsearch10:tetangga', + 'info_dict': { + 'id': 'tetangga', + 'title': 'tetangga', + }, + 'playlist_count': 10, + }] + + def _search_results(self, query): + last_page = None + for i in itertools.count(1): + search_data = self._download_json( + 'https://api.netverse.id/search/elastic/search', query, + query={'q': query, 'page': i}, note=f'Downloading page {i}') + + videos = traverse_obj(search_data, ('response', 'data', ...)) + for video in videos: + yield self.url_result(f'https://netverse.id/video/{video["slug"]}', NetverseIE) + + last_page = last_page or traverse_obj(search_data, ('response', 'lastpage')) + if not videos or i >= (last_page or 0): + break From 9a9006ba20f1f9f34183e1bde098c75502a018f8 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 29 Dec 2022 06:15:38 -0500 Subject: [PATCH 1852/2552] [extractor/twitcasting] Fix videos with password (#5894) Closes #5888 Authored by: bashonly, Spicadox --- yt_dlp/extractor/twitcasting.py | 34 +++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 735cb0bb0..2548dae04 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -38,7 +38,7 @@ class TwitCastingIE(InfoExtractor): 'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20110822', - 'timestamp': 1314010824, + 'timestamp': 1313978424, 'duration': 32, 'view_count': int, }, @@ -52,10 +52,10 @@ class TwitCastingIE(InfoExtractor): 'ext': 'mp4', 'title': 'Live playing something #3689740', 'uploader_id': 'mttbernardini', - 'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.', + 'description': 'md5:1dc7efa2f1ab932fcd119265cebeec69', 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20120212', - 'timestamp': 1329028024, + 'upload_date': '20120211', + 'timestamp': 1328995624, 'duration': 681, 'view_count': int, }, @@ -64,15 +64,22 @@ class TwitCastingIE(InfoExtractor): 'videopassword': 'abc', }, }, { - 'note': 'archive is split in 2 parts', 'url': 'https://twitcasting.tv/loft_heaven/movie/685979292', 'info_dict': { 'id': '685979292', 'ext': 'mp4', - 'title': '南波一海のhear_here “ナタリー望月哲さんに聞く編集と「渋谷系狂騒曲」”', - 'duration': 6964.599334, + 'title': '【無料配信】南波一海のhear/here “ナタリー望月哲さんに聞く編集と「渋谷系狂騒曲」”', + 'uploader_id': 'loft_heaven', + 'description': 'md5:3a0c7b53019df987ce545c935538bacf', + 'upload_date': '20210604', + 'timestamp': 1622802114, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 6964, + 'view_count': int, + }, + 'params': { + 'skip_download': True, }, - 'playlist_mincount': 2, }] def _parse_data_movie_playlist(self, dmp, video_id): @@ -88,15 +95,18 @@ class TwitCastingIE(InfoExtractor): def _real_extract(self, url): uploader_id, video_id = self._match_valid_url(url).groups() + webpage, urlh = self._download_webpage_handle(url, video_id) video_password = self.get_param('videopassword') request_data = None if video_password: request_data = urlencode_postdata({ 'password': video_password, + **self._hidden_inputs(webpage), }, encoding='utf-8') - webpage, urlh = self._download_webpage_handle( - url, video_id, data=request_data, - headers={'Origin': 'https://twitcasting.tv'}) + webpage, urlh = self._download_webpage_handle( + url, video_id, data=request_data, + headers={'Origin': 'https://twitcasting.tv'}, + note='Trying video password') if urlh.geturl() != url and request_data: webpage = self._download_webpage( urlh.geturl(), video_id, data=request_data, @@ -122,7 +132,7 @@ class TwitCastingIE(InfoExtractor): duration = (try_get(video_js_data, lambda x: sum(float_or_none(y.get('duration')) for y in x) / 1000) or parse_duration(clean_html(get_element_by_class('tw-player-duration-time', webpage)))) view_count = str_to_int(self._search_regex( - (r'Total\s*:\s*([\d,]+)\s*Views', r'総視聴者\s*:\s*([\d,]+)\s*]+datetime="([^"]+)"', webpage, 'datetime', None)) From 3d667e0047915c32f5df9fdd86a4223dc0e9ce8f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 12:03:03 +0000 Subject: [PATCH 1853/2552] [extractor/slideslive] Support embeds and slides (#5784) Authored by: bashonly, Grub4K, pukkandan --- yt_dlp/extractor/slideslive.py | 390 ++++++++++++++++++++++++++++++--- 1 file changed, 362 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index 86c26a8a2..4268bfeaf 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -1,16 +1,24 @@ +import re +import urllib.parse + from .common import InfoExtractor from ..utils import ( + ExtractorError, + int_or_none, + parse_qs, smuggle_url, traverse_obj, unified_timestamp, + update_url_query, url_or_none, + xpath_text, ) class SlidesLiveIE(InfoExtractor): - _VALID_URL = r'https?://slideslive\.com/(?P[0-9]+)' + _VALID_URL = r'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P[0-9]+)' _TESTS = [{ - # service_name = yoda + # service_name = yoda, only XML slides info 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', 'info_dict': { 'id': '38902413', @@ -19,12 +27,14 @@ class SlidesLiveIE(InfoExtractor): 'timestamp': 1648189972, 'upload_date': '20220325', 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': 'count:42', + 'chapters': 'count:41', }, 'params': { 'skip_download': 'm3u8', }, }, { - # service_name = yoda + # service_name = yoda, /v7/ slides 'url': 'https://slideslive.com/38935785', 'info_dict': { 'id': '38935785', @@ -32,13 +42,15 @@ class SlidesLiveIE(InfoExtractor): 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges', 'upload_date': '20211115', 'timestamp': 1636996003, - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:640', + 'chapters': 'count:639', }, 'params': { 'skip_download': 'm3u8', }, }, { - # service_name = yoda + # service_name = yoda, /v1/ slides 'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics', 'info_dict': { 'id': '38973182', @@ -47,12 +59,14 @@ class SlidesLiveIE(InfoExtractor): 'upload_date': '20220201', 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1643728135, + 'thumbnails': 'count:3', + 'chapters': 'count:2', }, 'params': { 'skip_download': 'm3u8', }, }, { - # service_name = youtube + # service_name = youtube, only XML slides info 'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost', 'md5': '8a79b5e3d700837f40bd2afca3c8fa01', 'info_dict': { @@ -76,26 +90,253 @@ class SlidesLiveIE(InfoExtractor): 'comment_count': int, 'channel_follower_count': int, 'age_limit': 0, - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', + 'thumbnails': 'count:169', 'playable_in_embed': True, 'availability': 'unlisted', 'tags': [], 'categories': ['People & Blogs'], + 'chapters': 'count:168', + }, + }, { + # embed-only presentation, only XML slides info + 'url': 'https://slideslive.com/embed/presentation/38925850', + 'info_dict': { + 'id': '38925850', + 'ext': 'mp4', + 'title': 'Towards a Deep Network Architecture for Structured Smoothness', + 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': 'count:8', + 'timestamp': 1629671508, + 'upload_date': '20210822', + 'chapters': 'count:7', + }, + 'params': { + 'skip_download': 'm3u8', }, }, { - # service_name = youtube + # embed-only presentation, only JSON slides info, /v5/ slides (.png) + 'url': 'https://slideslive.com/38979920/', + 'info_dict': { + 'id': '38979920', + 'ext': 'mp4', + 'title': 'MoReL: Multi-omics Relational Learning', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:7', + 'timestamp': 1654714970, + 'upload_date': '20220608', + 'chapters': 'count:6', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v2/ slides (.jpg) + 'url': 'https://slideslive.com/38954074', + 'info_dict': { + 'id': '38954074', + 'ext': 'mp4', + 'title': 'Decentralized Attribution of Generative Models', + 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': 'count:16', + 'timestamp': 1622806321, + 'upload_date': '20210604', + 'chapters': 'count:15', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v4/ slides (.png) + 'url': 'https://slideslive.com/38979570/', + 'info_dict': { + 'id': '38979570', + 'ext': 'mp4', + 'title': 'Efficient Active Search for Combinatorial Optimization Problems', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:9', + 'timestamp': 1654714896, + 'upload_date': '20220608', + 'chapters': 'count:8', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v10/ slides + 'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F', + 'info_dict': { + 'id': '38979880', + 'ext': 'mp4', + 'title': 'The Representation Power of Neural Networks', + 'timestamp': 1654714962, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:22', + 'upload_date': '20220608', + 'chapters': 'count:21', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v7/ slides, 2 video slides + 'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com', + 'playlist_count': 3, + 'info_dict': { + 'id': '38979682-playlist', + 'title': 'LoRA: Low-Rank Adaptation of Large Language Models', + }, + 'playlist': [{ + 'info_dict': { + 'id': '38979682', + 'ext': 'mp4', + 'title': 'LoRA: Low-Rank Adaptation of Large Language Models', + 'timestamp': 1654714920, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:30', + 'upload_date': '20220608', + 'chapters': 'count:31', + }, + }, { + 'info_dict': { + 'id': '38979682-021', + 'ext': 'mp4', + 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021', + 'duration': 3, + 'timestamp': 1654714920, + 'upload_date': '20220608', + }, + }, { + 'info_dict': { + 'id': '38979682-024', + 'ext': 'mp4', + 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024', + 'duration': 4, + 'timestamp': 1654714920, + 'upload_date': '20220608', + }, + }], + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v6/ slides, 1 video slide, edit.videoken.com embed + 'url': 'https://slideslive.com/38979481/', + 'playlist_count': 2, + 'info_dict': { + 'id': '38979481-playlist', + 'title': 'How to Train Your MAML to Excel in Few-Shot Classification', + }, + 'playlist': [{ + 'info_dict': { + 'id': '38979481', + 'ext': 'mp4', + 'title': 'How to Train Your MAML to Excel in Few-Shot Classification', + 'timestamp': 1654714877, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:43', + 'upload_date': '20220608', + 'chapters': 'count:43', + }, + }, { + 'info_dict': { + 'id': '38979481-013', + 'ext': 'mp4', + 'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013', + 'duration': 3, + 'timestamp': 1654714877, + 'upload_date': '20220608', + }, + }], + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v3/ slides, .jpg and .png, service_name = youtube + 'url': 'https://slideslive.com/embed/38932460/', + 'info_dict': { + 'id': 'RTPdrgkyTiE', + 'display_id': '38932460', + 'ext': 'mp4', + 'title': 'Active Learning for Hierarchical Multi-Label Classification', + 'description': 'Watch full version of this video at https://slideslive.com/38932460.', + 'channel': 'SlidesLive Videos - A', + 'channel_id': 'UC62SdArr41t_-_fX40QCLRw', + 'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw', + 'uploader': 'SlidesLive Videos - A', + 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', + 'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw', + 'upload_date': '20200903', + 'timestamp': 1602599092, + 'duration': 942, + 'age_limit': 0, + 'live_status': 'not_live', + 'playable_in_embed': True, + 'availability': 'unlisted', + 'categories': ['People & Blogs'], + 'tags': [], + 'channel_follower_count': int, + 'like_count': int, + 'view_count': int, + 'thumbnail': r're:^https?://.*\.(?:jpg|png|webp)', + 'thumbnails': 'count:21', + 'chapters': 'count:20', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # service_name = yoda 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'only_matching': True, }, { - # service_name = url + # dead link, service_name = url 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1', 'only_matching': True, }, { - # service_name = vimeo + # dead link, service_name = vimeo 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + # only XML slides info + 'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html', + 'info_dict': { + 'id': '38925850', + 'ext': 'mp4', + 'title': 'Towards a Deep Network Architecture for Structured Smoothness', + 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': 'count:8', + 'timestamp': 1629671508, + 'upload_date': '20210822', + 'chapters': 'count:7', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + # Reference: https://slideslive.com/embed_presentation.js + for embed_id in re.findall(r'(?s)new\s+SlidesLiveEmbed\s*\([^)]+\bpresentationId:\s*["\'](\d+)["\']', webpage): + url_parsed = urllib.parse.urlparse(url) + origin = f'{url_parsed.scheme}://{url_parsed.netloc}' + yield update_url_query( + f'https://slideslive.com/embed/presentation/{embed_id}', { + 'embed_parent_url': url, + 'embed_container_origin': origin, + }) + + def _download_embed_webpage_handle(self, video_id, headers): + return self._download_webpage_handle( + f'https://slideslive.com/embed/presentation/{video_id}', video_id, + headers=headers, query=traverse_obj(headers, { + 'embed_parent_url': 'Referer', + 'embed_container_origin': 'Origin', + })) + def _extract_custom_m3u8_info(self, m3u8_data): m3u8_dict = {} @@ -108,6 +349,8 @@ class SlidesLiveIE(InfoExtractor): 'VOD-VIDEO-ID': 'service_id', 'VOD-VIDEO-SERVERS': 'video_servers', 'VOD-SUBTITLES': 'subtitles', + 'VOD-SLIDES-JSON-URL': 'slides_json_url', + 'VOD-SLIDES-XML-URL': 'slides_xml_url', } for line in m3u8_data.splitlines(): @@ -126,9 +369,33 @@ class SlidesLiveIE(InfoExtractor): return m3u8_dict + def _extract_formats(self, cdn_hostname, path, video_id): + formats = [] + formats.extend(self._extract_m3u8_formats( + f'https://{cdn_hostname}/{path}/master.m3u8', + video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)) + formats.extend(self._extract_mpd_formats( + f'https://{cdn_hostname}/{path}/master.mpd', + video_id, mpd_id='dash', fatal=False)) + return formats + def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage, urlh = self._download_embed_webpage_handle( + video_id, headers=traverse_obj(parse_qs(url), { + 'Referer': ('embed_parent_url', -1), + 'Origin': ('embed_container_origin', -1)})) + redirect_url = urlh.geturl() + if 'domain_not_allowed' in redirect_url: + domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False) + if not domain: + raise ExtractorError( + 'This is an embed-only presentation. Try passing --referer', expected=True) + webpage, _ = self._download_embed_webpage_handle(video_id, headers={ + 'Referer': f'https://{domain}/', + 'Origin': f'https://{domain}', + }) + player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token') player_data = self._download_webpage( f'https://ben.slideslive.com/player/{video_id}', video_id, @@ -139,6 +406,50 @@ class SlidesLiveIE(InfoExtractor): assert service_name in ('url', 'yoda', 'vimeo', 'youtube') service_id = player_info['service_id'] + slides_info_url = None + slides, slides_info = [], [] + if player_info.get('slides_json_url'): + slides_info_url = player_info['slides_json_url'] + slides = traverse_obj(self._download_json( + slides_info_url, video_id, fatal=False, + note='Downloading slides JSON', errnote=False), 'slides', expected_type=list) or [] + for slide_id, slide in enumerate(slides, start=1): + slides_info.append(( + slide_id, traverse_obj(slide, ('image', 'name')), + int_or_none(slide.get('time'), scale=1000))) + + if not slides and player_info.get('slides_xml_url'): + slides_info_url = player_info['slides_xml_url'] + slides = self._download_xml( + slides_info_url, video_id, fatal=False, + note='Downloading slides XML', errnote='Failed to download slides info') + for slide_id, slide in enumerate(slides.findall('./slide'), start=1): + slides_info.append(( + slide_id, xpath_text(slide, './slideName', 'name'), + int_or_none(xpath_text(slide, './timeSec', 'time')))) + + slides_version = int(self._search_regex( + r'https?://slides\.slideslive\.com/\d+/v(\d+)/\w+\.(?:json|xml)', + slides_info_url, 'slides version', default=0)) + if slides_version < 4: + slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s.jpg' + else: + slide_url_template = 'https://slides.slideslive.com/%s/slides/original/%s.png' + + chapters, thumbnails = [], [] + if url_or_none(player_info.get('thumbnail')): + thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']}) + for slide_id, slide_path, start_time in slides_info: + if slide_path: + thumbnails.append({ + 'id': f'{slide_id:03d}', + 'url': slide_url_template % (video_id, slide_path), + }) + chapters.append({ + 'title': f'Slide {slide_id:03d}', + 'start_time': start_time, + }) + subtitles = {} for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict): webvtt_url = url_or_none(sub.get('webvtt_url')) @@ -154,25 +465,15 @@ class SlidesLiveIE(InfoExtractor): 'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''), 'timestamp': unified_timestamp(player_info.get('timestamp')), 'is_live': player_info.get('playlist_type') != 'vod', - 'thumbnail': url_or_none(player_info.get('thumbnail')), + 'thumbnails': thumbnails, + 'chapters': chapters, 'subtitles': subtitles, } - if service_name in ('url', 'yoda'): - if service_name == 'url': - info['url'] = service_id - else: - cdn_hostname = player_info['video_servers'][0] - formats = [] - formats.extend(self._extract_m3u8_formats( - f'https://{cdn_hostname}/{service_id}/master.m3u8', - video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)) - formats.extend(self._extract_mpd_formats( - f'https://{cdn_hostname}/{service_id}/master.mpd', - video_id, mpd_id='dash', fatal=False)) - info.update({ - 'formats': formats, - }) + if service_name == 'url': + info['url'] = service_id + elif service_name == 'yoda': + info['formats'] = self._extract_formats(player_info['video_servers'][0], service_id, video_id) else: info.update({ '_type': 'url_transparent', @@ -185,4 +486,37 @@ class SlidesLiveIE(InfoExtractor): f'https://player.vimeo.com/video/{service_id}', {'http_headers': {'Referer': url}}) - return info + video_slides = traverse_obj(slides, (..., 'video', 'id')) + if not video_slides: + return info + + def entries(): + yield info + + service_data = self._download_json( + f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data', + video_id, fatal=False, query={ + 'player_token': player_token, + 'videos': ','.join(video_slides), + }, note='Downloading video slides info', errnote='Failed to download video slides info') or {} + + for slide_id, slide in enumerate(slides, 1): + if not traverse_obj(slide, ('video', 'service')) == 'yoda': + continue + video_path = traverse_obj(slide, ('video', 'id')) + cdn_hostname = traverse_obj(service_data, ( + video_path, 'video_servers', ...), get_all=False) + if not cdn_hostname or not video_path: + continue + formats = self._extract_formats(cdn_hostname, video_path, video_id) + if not formats: + continue + yield { + 'id': f'{video_id}-{slide_id:03d}', + 'title': f'{info["title"]} - Slide {slide_id:03d}', + 'timestamp': info['timestamp'], + 'duration': int_or_none(traverse_obj(slide, ('video', 'duration_ms')), scale=1000), + 'formats': formats, + } + + return self.playlist_result(entries(), f'{video_id}-playlist', info['title']) From 4b183d49620e564219c01714ca8639199f6b1cc0 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 14:29:08 +0000 Subject: [PATCH 1854/2552] [extractor/videoken] Add extractors (#5824) Closes #5818 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 7 + yt_dlp/extractor/videoken.py | 336 ++++++++++++++++++++++++++++++++ 2 files changed, 343 insertions(+) create mode 100644 yt_dlp/extractor/videoken.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1b76d8264..e51228aff 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2097,6 +2097,13 @@ from .videocampus_sachsen import ( ) from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE +from .videoken import ( + VideoKenIE, + VideoKenPlayerIE, + VideoKenPlaylistIE, + VideoKenCategoryIE, + VideoKenTopicIE, +) from .videomore import ( VideomoreIE, VideomoreVideoIE, diff --git a/yt_dlp/extractor/videoken.py b/yt_dlp/extractor/videoken.py new file mode 100644 index 000000000..560b41a6d --- /dev/null +++ b/yt_dlp/extractor/videoken.py @@ -0,0 +1,336 @@ +import base64 +import functools +import math +import re +import time +import urllib.parse + +from .common import InfoExtractor +from .slideslive import SlidesLiveIE +from ..utils import ( + ExtractorError, + InAdvancePagedList, + int_or_none, + traverse_obj, + update_url_query, + url_or_none, +) + + +class VideoKenBaseIE(InfoExtractor): + _ORGANIZATIONS = { + 'videos.icts.res.in': 'icts', + 'videos.cncf.io': 'cncf', + 'videos.neurips.cc': 'neurips', + } + _BASE_URL_RE = rf'https?://(?P{"|".join(map(re.escape, _ORGANIZATIONS))})/' + + _PAGE_SIZE = 12 + + def _get_org_id_and_api_key(self, org, video_id): + details = self._download_json( + f'https://analytics.videoken.com/api/videolake/{org}/details', video_id, + note='Downloading organization ID and API key', headers={ + 'Accept': 'application/json', + }) + return details['id'], details['apikey'] + + def _create_slideslive_url(self, video_url, video_id, referer): + if not video_url and not video_id: + return + elif not video_url or 'embed/sign-in' in video_url: + video_url = f'https://slideslive.com/embed/{video_id.lstrip("slideslive-")}' + if url_or_none(referer): + return update_url_query(video_url, { + 'embed_parent_url': referer, + 'embed_container_origin': f'https://{urllib.parse.urlparse(referer).netloc}', + }) + return video_url + + def _extract_videos(self, videos, url): + for video in traverse_obj(videos, (('videos', 'results'), ...)): + video_id = traverse_obj(video, 'youtube_id', 'videoid') + if not video_id: + continue + ie_key = None + if traverse_obj(video, 'type', 'source') == 'youtube': + video_url = video_id + ie_key = 'Youtube' + else: + video_url = traverse_obj(video, 'embed_url', 'embeddableurl') + if urllib.parse.urlparse(video_url).netloc == 'slideslive.com': + ie_key = SlidesLiveIE + video_url = self._create_slideslive_url(video_url, video_id, url) + if not video_url: + continue + yield self.url_result(video_url, ie_key, video_id) + + +class VideoKenIE(VideoKenBaseIE): + _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:(?:topic|category)/[^/#?]+/)?video/(?P[\w-]+)' + _TESTS = [{ + # neurips -> videoken -> slideslive + 'url': 'https://videos.neurips.cc/video/slideslive-38922815', + 'info_dict': { + 'id': '38922815', + 'ext': 'mp4', + 'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures', + 'timestamp': 1630939331, + 'upload_date': '20210906', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:330', + 'chapters': 'count:329', + }, + 'params': { + 'skip_download': 'm3u8', + }, + 'expected_warnings': ['Failed to download VideoKen API JSON'], + }, { + # neurips -> videoken -> slideslive -> youtube + 'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348', + 'info_dict': { + 'id': '2Xa_dt78rJE', + 'ext': 'mp4', + 'display_id': '38923348', + 'title': 'Machine Education', + 'description': 'Watch full version of this video at https://slideslive.com/38923348.', + 'channel': 'SlidesLive Videos - G2', + 'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w', + 'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w', + 'uploader': 'SlidesLive Videos - G2', + 'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w', + 'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w', + 'duration': 2504, + 'timestamp': 1618922125, + 'upload_date': '20200131', + 'age_limit': 0, + 'channel_follower_count': int, + 'view_count': int, + 'availability': 'unlisted', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'categories': ['People & Blogs'], + 'tags': [], + 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', + 'thumbnails': 'count:78', + 'chapters': 'count:77', + }, + 'params': { + 'skip_download': 'm3u8', + }, + 'expected_warnings': ['Failed to download VideoKen API JSON'], + }, { + # icts -> videoken -> youtube + 'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc', + 'info_dict': { + 'id': 'zysIsojYdvc', + 'ext': 'mp4', + 'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad', + 'description': 'md5:87433069d79719eeadc1962cc2ace00b', + 'channel': 'International Centre for Theoretical Sciences', + 'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ', + 'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ', + 'uploader': 'International Centre for Theoretical Sciences', + 'uploader_id': 'ICTStalks', + 'uploader_url': 'http://www.youtube.com/user/ICTStalks', + 'duration': 3372, + 'upload_date': '20191004', + 'age_limit': 0, + 'live_status': 'not_live', + 'availability': 'public', + 'playable_in_embed': True, + 'channel_follower_count': int, + 'like_count': int, + 'view_count': int, + 'categories': ['Science & Technology'], + 'tags': [], + 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', + 'thumbnails': 'count:42', + 'chapters': 'count:20', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + 'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8', + 'only_matching': True, + }, { + 'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI', + 'only_matching': True, + }, { + 'url': 'https://videos.icts.res.in/video/d7HuP_abpKU', + 'only_matching': True, + }] + + def _real_extract(self, url): + hostname, video_id = self._match_valid_url(url).group('host', 'id') + org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], video_id) + details = self._download_json( + 'https://analytics.videoken.com/api/videoinfo_private', video_id, query={ + 'videoid': video_id, + 'org_id': org_id, + }, headers={'Accept': 'application/json'}, note='Downloading VideoKen API JSON', + errnote='Failed to download VideoKen API JSON', fatal=False) + if details: + return next(self._extract_videos({'videos': [details]}, url)) + # fallback for API error 400 response + elif video_id.startswith('slideslive-'): + return self.url_result( + self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id) + elif re.match(r'^[\w-]{11}$', video_id): + self.url_result(video_id, 'Youtube', video_id) + else: + raise ExtractorError('Unable to extract without VideoKen API response') + + +class VideoKenPlayerIE(VideoKenBaseIE): + _VALID_URL = r'https?://player\.videoken\.com/embed/slideslive-(?P\d+)' + _TESTS = [{ + 'url': 'https://player.videoken.com/embed/slideslive-38968434', + 'info_dict': { + 'id': '38968434', + 'ext': 'mp4', + 'title': 'Deep Learning with Label Differential Privacy', + 'timestamp': 1643377020, + 'upload_date': '20220128', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:30', + 'chapters': 'count:29', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.url_result( + self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id) + + +class VideoKenPlaylistIE(VideoKenBaseIE): + _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:category/\d+/)?playlist/(?P\d+)' + _TESTS = [{ + 'url': 'https://videos.icts.res.in/category/1822/playlist/381', + 'playlist_mincount': 117, + 'info_dict': { + 'id': '381', + 'title': 'Cosmology - The Next Decade', + }, + }] + + def _real_extract(self, url): + hostname, playlist_id = self._match_valid_url(url).group('host', 'id') + org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], playlist_id) + videos = self._download_json( + f'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/', + playlist_id, headers={'Accept': 'application/json'}, note='Downloading API JSON') + return self.playlist_result(self._extract_videos(videos, url), playlist_id, videos.get('title')) + + +class VideoKenCategoryIE(VideoKenBaseIE): + _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'category/(?P\d+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://videos.icts.res.in/category/1822/', + 'playlist_mincount': 500, + 'info_dict': { + 'id': '1822', + 'title': 'Programs', + }, + }, { + 'url': 'https://videos.neurips.cc/category/350/', + 'playlist_mincount': 34, + 'info_dict': { + 'id': '350', + 'title': 'NeurIPS 2018', + }, + }, { + 'url': 'https://videos.cncf.io/category/479/', + 'playlist_mincount': 328, + 'info_dict': { + 'id': '479', + 'title': 'KubeCon + CloudNativeCon Europe\'19', + }, + }] + + def _get_category_page(self, category_id, org_id, page=1, note=None): + return self._download_json( + f'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id, + fatal=False, note=note if note else f'Downloading category page {page}', + query={ + 'category_id': category_id, + 'page_number': page, + 'length': self._PAGE_SIZE, + }, headers={'Accept': 'application/json'}) or {} + + def _entries(self, category_id, org_id, url, page): + videos = self._get_category_page(category_id, org_id, page + 1) + yield from self._extract_videos(videos, url) + + def _real_extract(self, url): + hostname, category_id = self._match_valid_url(url).group('host', 'id') + org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], category_id) + category_info = self._get_category_page(category_id, org_id, note='Downloading category info') + category = category_info['category_name'] + total_pages = math.ceil(int(category_info['recordsTotal']) / self._PAGE_SIZE) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, category_id, org_id, url), + total_pages, self._PAGE_SIZE), category_id, category) + + +class VideoKenTopicIE(VideoKenBaseIE): + _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'topic/(?P[^/#?]+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://videos.neurips.cc/topic/machine%20learning/', + 'playlist_mincount': 500, + 'info_dict': { + 'id': 'machine_learning', + 'title': 'machine learning', + }, + }, { + 'url': 'https://videos.icts.res.in/topic/gravitational%20waves/', + 'playlist_mincount': 77, + 'info_dict': { + 'id': 'gravitational_waves', + 'title': 'gravitational waves' + }, + }, { + 'url': 'https://videos.cncf.io/topic/prometheus/', + 'playlist_mincount': 134, + 'info_dict': { + 'id': 'prometheus', + 'title': 'prometheus', + }, + }] + + def _get_topic_page(self, topic, org_id, search_id, api_key, page=1, note=None): + return self._download_json( + 'https://es.videoken.com/api/v1.0/get_results', topic, fatal=False, query={ + 'orgid': org_id, + 'size': self._PAGE_SIZE, + 'query': topic, + 'page': page, + 'sort': 'upload_desc', + 'filter': 'all', + 'token': api_key, + 'is_topic': 'true', + 'category': '', + 'searchid': search_id, + }, headers={'Accept': 'application/json'}, + note=note if note else f'Downloading topic page {page}') or {} + + def _entries(self, topic, org_id, search_id, api_key, url, page): + videos = self._get_topic_page(topic, org_id, search_id, api_key, page + 1) + yield from self._extract_videos(videos, url) + + def _real_extract(self, url): + hostname, topic_id = self._match_valid_url(url).group('host', 'id') + topic = urllib.parse.unquote(topic_id) + topic_id = topic.replace(' ', '_') + org_id, api_key = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], topic) + search_id = base64.b64encode(f':{topic}:{int(time.time())}:transient'.encode()).decode() + total_pages = int_or_none(self._get_topic_page( + topic, org_id, search_id, api_key, note='Downloading topic info')['total_no_of_pages']) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, topic, org_id, search_id, api_key, url), + total_pages, self._PAGE_SIZE), topic_id, topic) From 53006b35ea8b26ff31a96a423ddaa3304d0a124e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 15:04:09 +0000 Subject: [PATCH 1855/2552] [extractor/amazon] Add `AmazonReviews` extractor (#5857) Closes #5766 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 5 +- yt_dlp/extractor/amazon.py | 116 ++++++++++++++++++++++++++++++-- 2 files changed, 113 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e51228aff..4fed24c35 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -87,7 +87,10 @@ from .alura import ( AluraCourseIE ) from .amcnetworks import AMCNetworksIE -from .amazon import AmazonStoreIE +from .amazon import ( + AmazonStoreIE, + AmazonReviewsIE, +) from .amazonminitv import ( AmazonMiniTVIE, AmazonMiniTVSeasonIE, diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 4d3170683..a03f983e0 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -1,5 +1,17 @@ +import re + from .common import InfoExtractor -from ..utils import ExtractorError, int_or_none +from ..utils import ( + ExtractorError, + clean_html, + float_or_none, + get_element_by_attribute, + get_element_by_class, + int_or_none, + js_to_json, + traverse_obj, + url_or_none, +) class AmazonStoreIE(InfoExtractor): @@ -9,7 +21,7 @@ class AmazonStoreIE(InfoExtractor): 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', 'info_dict': { 'id': 'B098XNCHLD', - 'title': 'md5:dae240564cbb2642170c02f7f0d7e472', + 'title': str, }, 'playlist_mincount': 1, 'playlist': [{ @@ -20,28 +32,32 @@ class AmazonStoreIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 34, }, - }] + }], + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3', 'info_dict': { 'id': 'B0863TXGM3', - 'title': 'md5:d1d3352428f8f015706c84b31e132169', + 'title': str, }, 'playlist_mincount': 4, + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.com/dp/B0845NXCXF/', 'info_dict': { 'id': 'B0845NXCXF', - 'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e', + 'title': str, }, 'playlist-mincount': 1, + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ', 'info_dict': { 'id': 'B08WX337PQ', - 'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e', + 'title': str, }, 'playlist_mincount': 1, + 'expected_warnings': ['Unable to extract data'], }] def _real_extract(self, url): @@ -52,7 +68,7 @@ class AmazonStoreIE(InfoExtractor): try: data_json = self._search_json( r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, - transform_source=lambda x: x.replace(R'\\u', R'\u')) + transform_source=js_to_json) except ExtractorError as e: retry.error = e @@ -66,3 +82,89 @@ class AmazonStoreIE(InfoExtractor): 'width': int_or_none(video.get('videoWidth')), } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) + + +class AmazonReviewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/gp/customer-reviews/(?P[^/&#$?]+)' + _TESTS = [{ + 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl', + 'info_dict': { + 'id': 'R10VE9VUSY19L3', + 'ext': 'mp4', + 'title': 'Get squad #Suspicious', + 'description': 'md5:7012695052f440a1e064e402d87e0afb', + 'uploader': 'Kimberly Cronkright', + 'average_rating': 1.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }, { + 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl?language=es_US', + 'info_dict': { + 'id': 'R10VE9VUSY19L3', + 'ext': 'mp4', + 'title': 'Get squad #Suspicious', + 'description': 'md5:7012695052f440a1e064e402d87e0afb', + 'uploader': 'Kimberly Cronkright', + 'average_rating': 1.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }, { + 'url': 'https://www.amazon.in/gp/customer-reviews/RV1CO8JN5VGXV/', + 'info_dict': { + 'id': 'RV1CO8JN5VGXV', + 'ext': 'mp4', + 'title': 'Not sure about its durability', + 'description': 'md5:1a252c106357f0a3109ebf37d2e87494', + 'uploader': 'Shoaib Gulzar', + 'average_rating': 2.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + for retry in self.RetryManager(): + webpage = self._download_webpage(url, video_id) + review_body = get_element_by_attribute('data-hook', 'review-body', webpage) + if not review_body: + retry.error = ExtractorError('Review body was not found in webpage', expected=True) + + formats, subtitles = [], {} + + manifest_url = self._search_regex( + r'data-video-url="([^"]+)"', review_body, 'm3u8 url', default=None) + if url_or_none(manifest_url): + fmts, subtitles = self._extract_m3u8_formats_and_subtitles( + manifest_url, video_id, 'mp4', fatal=False) + formats.extend(fmts) + + video_url = self._search_regex( + r']+\bvalue="([^"]+)"[^>]+\bclass="video-url"', review_body, 'mp4 url', default=None) + if url_or_none(video_url): + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'format_id': 'http-mp4', + }) + + if not formats: + self.raise_no_formats('No video found for this customer review', expected=True) + + return { + 'id': video_id, + 'title': (clean_html(get_element_by_attribute('data-hook', 'review-title', webpage)) + or self._html_extract_title(webpage)), + 'description': clean_html(traverse_obj(re.findall( + r'(.+?)', review_body), -1)), + 'uploader': clean_html(get_element_by_class('a-profile-name', webpage)), + 'average_rating': float_or_none(clean_html(get_element_by_attribute( + 'data-hook', 'review-star-rating', webpage) or '').partition(' ')[0]), + 'thumbnail': self._search_regex( + r'data-thumbnail-url="([^"]+)"', review_body, 'thumbnail', default=None), + 'formats': formats, + 'subtitles': subtitles, + } From 2647c933b8ed22f95dd8e9866c4db031867a1bc8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 16:32:54 +0000 Subject: [PATCH 1856/2552] [extractor/wistia] Improve extension detection (#5415) Closes #5053 Authored by: bashonly, Grub4k, pukkandan --- yt_dlp/extractor/wistia.py | 41 ++++++++----- yt_dlp/utils.py | 120 +++++++++++++++++++++++-------------- 2 files changed, 103 insertions(+), 58 deletions(-) diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index 38dcc2f5b..884fa4b5f 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -6,12 +6,15 @@ from base64 import b64decode from .common import InfoExtractor from ..utils import ( ExtractorError, + HEADRequest, + determine_ext, float_or_none, int_or_none, parse_qs, traverse_obj, try_get, update_url_query, + urlhandle_detect_ext, ) @@ -34,6 +37,16 @@ class WistiaBaseIE(InfoExtractor): return embed_config + def _get_real_ext(self, url): + ext = determine_ext(url, default_ext='bin') + if ext == 'bin': + urlh = self._request_webpage( + HEADRequest(url), None, note='Checking media extension', + errnote='HEAD request returned error', fatal=False) + if urlh: + ext = urlhandle_detect_ext(urlh, default='bin') + return 'mp4' if ext == 'mov' else ext + def _extract_media(self, embed_config): data = embed_config['media'] video_id = data['hashedId'] @@ -51,13 +64,13 @@ class WistiaBaseIE(InfoExtractor): continue elif atype in ('still', 'still_image'): thumbnails.append({ - 'url': aurl, + 'url': aurl.replace('.bin', f'.{self._get_real_ext(aurl)}'), 'width': int_or_none(a.get('width')), 'height': int_or_none(a.get('height')), 'filesize': int_or_none(a.get('size')), }) else: - aext = a.get('ext') + aext = a.get('ext') or self._get_real_ext(aurl) display_name = a.get('display_name') format_id = atype if atype and atype.endswith('_video') and display_name: @@ -169,26 +182,26 @@ class WistiaIE(WistiaBaseIE): 'md5': '10c1ce9c4dde638202513ed17a3767bd', 'info_dict': { 'id': 'a6ndpko1wg', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'Episode 2: Boxed Water\'s retention is thirsty', 'upload_date': '20210324', 'description': 'md5:da5994c2c2d254833b412469d9666b7a', 'duration': 966.0, 'timestamp': 1616614369, - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/53dc60239348dc9b9fba3755173ea4c2.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/53dc60239348dc9b9fba3755173ea4c2.png', } }, { 'url': 'wistia:5vd7p4bct5', 'md5': 'b9676d24bf30945d97060638fbfe77f0', 'info_dict': { 'id': '5vd7p4bct5', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'md5:eaa9f64c4efd7b5f098b9b6118597679', 'description': 'md5:a9bea0315f0616aa5df2dc413ddcdd0f', 'upload_date': '20220915', 'timestamp': 1663258727, 'duration': 623.019, - 'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.(?:jpg|bin)$', + 'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.jpg$', }, }, { 'url': 'wistia:sh7fpupwlt', @@ -208,25 +221,25 @@ class WistiaIE(WistiaBaseIE): 'url': 'https://www.weidert.com/blog/wistia-channels-video-marketing-tool', 'info_dict': { 'id': 'cqwukac3z1', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'How Wistia Channels Can Help Capture Inbound Value From Your Video Content', 'duration': 158.125, 'timestamp': 1618974400, 'description': 'md5:27abc99a758573560be72600ef95cece', 'upload_date': '20210421', - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/6c551820ae950cdee2306d6cbe9ef742.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/6c551820ae950cdee2306d6cbe9ef742.jpg', } }, { 'url': 'https://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', 'md5': 'b9676d24bf30945d97060638fbfe77f0', 'info_dict': { 'id': '5vd7p4bct5', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england', 'upload_date': '20220915', 'timestamp': 1663258727, 'duration': 623.019, - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/83e6ec693e2c05a0ce65809cbaead86a.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/83e6ec693e2c05a0ce65809cbaead86a.jpg', 'description': 'a Paywall Videos video', }, }] @@ -302,9 +315,9 @@ class WistiaChannelIE(WistiaBaseIE): 'url': 'https://fast.wistia.net/embed/channel/3802iirk0l?wchannelid=3802iirk0l&wmediaid=sp5dqjzw3n', 'info_dict': { 'id': 'sp5dqjzw3n', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'The Roof S2: The Modern CRO', - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/dadfa9233eaa505d5e0c85c23ff70741.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/dadfa9233eaa505d5e0c85c23ff70741.png', 'duration': 86.487, 'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n', 'timestamp': 1619790290, @@ -334,12 +347,12 @@ class WistiaChannelIE(WistiaBaseIE): 'info_dict': { 'id': 'pz0m0l0if3', 'title': 'A Framework for Improving Product Team Performance', - 'ext': 'bin', + 'ext': 'mp4', 'timestamp': 1653935275, 'upload_date': '20220530', 'description': 'Learn how to help your company improve and achieve your product related goals.', 'duration': 1854.39, - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/12fd19e56413d9d6f04e2185c16a6f8854e25226.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/12fd19e56413d9d6f04e2185c16a6f8854e25226.png', }, 'params': {'noplaylist': True, 'skip_download': True}, }] diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 65408bf19..3947dcf2e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3480,67 +3480,93 @@ def error_to_str(err): return f'{type(err).__name__}: {err}' -def mimetype2ext(mt): - if mt is None: +def mimetype2ext(mt, default=NO_DEFAULT): + if not isinstance(mt, str): + if default is not NO_DEFAULT: + return default return None - mt, _, params = mt.partition(';') - mt = mt.strip() - - FULL_MAP = { - 'audio/mp4': 'm4a', - # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as - # it's the most popular one - 'audio/mpeg': 'mp3', - 'audio/x-wav': 'wav', - 'audio/wav': 'wav', - 'audio/wave': 'wav', - } - - ext = FULL_MAP.get(mt) - if ext is not None: - return ext - - SUBTYPE_MAP = { + MAP = { + # video '3gpp': '3gp', - 'smptett+xml': 'tt', - 'ttaf+xml': 'dfxp', - 'ttml+xml': 'ttml', + 'mp2t': 'ts', + 'mp4': 'mp4', + 'mpeg': 'mpeg', + 'mpegurl': 'm3u8', + 'quicktime': 'mov', + 'webm': 'webm', + 'vp9': 'vp9', 'x-flv': 'flv', + 'x-m4v': 'm4v', + 'x-matroska': 'mkv', + 'x-mng': 'mng', 'x-mp4-fragmented': 'mp4', - 'x-ms-sami': 'sami', + 'x-ms-asf': 'asf', 'x-ms-wmv': 'wmv', - 'mpegurl': 'm3u8', - 'x-mpegurl': 'm3u8', - 'vnd.apple.mpegurl': 'm3u8', + 'x-msvideo': 'avi', + + # application (streaming playlists) 'dash+xml': 'mpd', 'f4m+xml': 'f4m', 'hds+xml': 'f4m', + 'vnd.apple.mpegurl': 'm3u8', 'vnd.ms-sstr+xml': 'ism', - 'quicktime': 'mov', - 'mp2t': 'ts', + 'x-mpegurl': 'm3u8', + + # audio + 'audio/mp4': 'm4a', + # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. + # Using .mp3 as it's the most popular one + 'audio/mpeg': 'mp3', + 'audio/webm': 'weba', + 'audio/x-matroska': 'mka', + 'audio/x-mpegurl': 'm3u', + 'midi': 'mid', + 'ogg': 'ogg', + 'wav': 'wav', + 'wave': 'wav', + 'x-aac': 'aac', + 'x-flac': 'flac', + 'x-m4a': 'm4a', + 'x-realaudio': 'ra', 'x-wav': 'wav', - 'filmstrip+json': 'fs', - 'svg+xml': 'svg', - } - _, _, subtype = mt.rpartition('/') - ext = SUBTYPE_MAP.get(subtype.lower()) - if ext is not None: - return ext + # image + 'avif': 'avif', + 'bmp': 'bmp', + 'gif': 'gif', + 'jpeg': 'jpg', + 'png': 'png', + 'svg+xml': 'svg', + 'tiff': 'tif', + 'vnd.wap.wbmp': 'wbmp', + 'webp': 'webp', + 'x-icon': 'ico', + 'x-jng': 'jng', + 'x-ms-bmp': 'bmp', + + # caption + 'filmstrip+json': 'fs', + 'smptett+xml': 'tt', + 'ttaf+xml': 'dfxp', + 'ttml+xml': 'ttml', + 'x-ms-sami': 'sami', - SUFFIX_MAP = { + # misc + 'gzip': 'gz', 'json': 'json', 'xml': 'xml', 'zip': 'zip', - 'gzip': 'gz', } - _, _, suffix = subtype.partition('+') - ext = SUFFIX_MAP.get(suffix) - if ext is not None: - return ext + mimetype = mt.partition(';')[0].strip().lower() + _, _, subtype = mimetype.rpartition('/') + ext = traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1]) + if ext: + return ext + elif default is not NO_DEFAULT: + return default return subtype.replace('+', '.') @@ -3634,7 +3660,7 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None): return 'mkv' if allow_mkv else preferences[-1] -def urlhandle_detect_ext(url_handle): +def urlhandle_detect_ext(url_handle, default=NO_DEFAULT): getheader = url_handle.headers.get cd = getheader('Content-Disposition') @@ -3645,7 +3671,13 @@ def urlhandle_detect_ext(url_handle): if e: return e - return mimetype2ext(getheader('Content-Type')) + meta_ext = getheader('x-amz-meta-name') + if meta_ext: + e = meta_ext.rpartition('.')[2] + if e: + return e + + return mimetype2ext(getheader('Content-Type'), default=default) def encode_data_uri(data, mime_type): From c1edb853b0a0cc69ea08337c0c5aee669b26d3d2 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 17:31:01 +0000 Subject: [PATCH 1857/2552] [extractor/kick] Add extractor (#5736) Closes #5722 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/kick.py | 127 ++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 yt_dlp/extractor/kick.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4fed24c35..a2b92b85a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -844,6 +844,10 @@ from .khanacademy import ( KhanAcademyIE, KhanAcademyUnitIE, ) +from .kick import ( + KickIE, + KickVODIE, +) from .kicker import KickerIE from .kickstarter import KickStarterIE from .kinja import KinjaEmbedIE diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py new file mode 100644 index 000000000..a79ffb7a9 --- /dev/null +++ b/yt_dlp/extractor/kick.py @@ -0,0 +1,127 @@ +from .common import InfoExtractor + +from ..utils import ( + HEADRequest, + UserNotLive, + float_or_none, + merge_dicts, + str_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) + + +class KickBaseIE(InfoExtractor): + def _real_initialize(self): + self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session') + xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN') + if not xsrf_token: + self.write_debug('kick.com did not set XSRF-TOKEN cookie') + KickBaseIE._API_HEADERS = { + 'Authorization': f'Bearer {xsrf_token.value}', + 'X-XSRF-TOKEN': xsrf_token.value, + } if xsrf_token else {} + + def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs): + return self._download_json( + f'https://kick.com/api/v1/{path}', display_id, note=note, + headers=merge_dicts(headers, self._API_HEADERS), **kwargs) + + +class KickIE(KickBaseIE): + _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P[\w_]+)' + _TESTS = [{ + 'url': 'https://kick.com/yuppy', + 'info_dict': { + 'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21', + 'ext': 'mp4', + 'title': str, + 'description': str, + 'channel': 'yuppy', + 'channel_id': '33538', + 'uploader': 'Yuppy', + 'uploader_id': '33793', + 'upload_date': str, + 'live_status': 'is_live', + 'timestamp': int, + 'thumbnail': r're:^https?://.*\.jpg', + 'categories': list, + }, + 'skip': 'livestream', + }, { + 'url': 'https://kick.com/kmack710', + 'only_matching': True, + }] + + def _real_extract(self, url): + channel = self._match_id(url) + response = self._call_api(f'channels/{channel}', channel) + if not traverse_obj(response, 'livestream', expected_type=dict): + raise UserNotLive(video_id=channel) + + return { + 'id': str(traverse_obj( + response, ('livestream', ('slug', 'id')), get_all=False, default=channel)), + 'formats': self._extract_m3u8_formats( + response['playback_url'], channel, 'mp4', live=True), + 'title': traverse_obj( + response, ('livestream', ('session_title', 'slug')), get_all=False, default=''), + 'description': traverse_obj(response, ('user', 'bio')), + 'channel': channel, + 'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))), + 'uploader': traverse_obj(response, 'name', ('user', 'username')), + 'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))), + 'is_live': True, + 'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))), + 'thumbnail': traverse_obj( + response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none), + 'categories': traverse_obj(response, ('recent_categories', ..., 'name')), + } + + +class KickVODIE(KickBaseIE): + _VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _TESTS = [{ + 'url': 'https://kick.com/video/54244b5e-050a-4df4-a013-b2433dafbe35', + 'md5': '73691206a6a49db25c5aa1588e6538fc', + 'info_dict': { + 'id': '54244b5e-050a-4df4-a013-b2433dafbe35', + 'ext': 'mp4', + 'title': 'Making 710-carBoosting. Kinda No Pixel inspired. !guilded - !links', + 'description': 'md5:a0d3546bf7955d0a8252ffe0fd6f518f', + 'channel': 'kmack710', + 'channel_id': '16278', + 'uploader': 'Kmack710', + 'uploader_id': '16412', + 'upload_date': '20221206', + 'timestamp': 1670318289, + 'duration': 40104.0, + 'thumbnail': r're:^https?://.*\.jpg', + 'categories': ['Grand Theft Auto V'], + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + response = self._call_api(f'video/{video_id}', video_id) + + return { + 'id': video_id, + 'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'), + 'title': traverse_obj( + response, ('livestream', ('session_title', 'slug')), get_all=False, default=''), + 'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')), + 'channel': traverse_obj(response, ('livestream', 'channel', 'slug')), + 'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))), + 'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')), + 'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))), + 'timestamp': unified_timestamp(response.get('created_at')), + 'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000), + 'thumbnail': traverse_obj( + response, ('livestream', 'thumbnail'), expected_type=url_or_none), + 'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')), + } From ca2f6e14e65f0faf92cabff8b7e5b4760363c52e Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Fri, 30 Dec 2022 03:01:22 +0900 Subject: [PATCH 1858/2552] [extractor/BiliLive] Fix extractor - Remove unnecessary group in `_VALID_URL` - This extractor always returns livestreams --- yt_dlp/extractor/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 616a54960..37711c138 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1034,7 +1034,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): class BiliLiveIE(InfoExtractor): - _VALID_URL = r'https?://live.bilibili.com/(blanc/)?(?P\d+)' + _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P\d+)' _TESTS = [{ 'url': 'https://live.bilibili.com/196', @@ -1114,6 +1114,7 @@ class BiliLiveIE(InfoExtractor): 'thumbnail': room_data.get('user_cover'), 'timestamp': stream_data.get('live_time'), 'formats': formats, + 'is_live': True, 'http_headers': { 'Referer': url, }, From e107c2b8cf8d6f3506d07bc64fc243682ee49b1e Mon Sep 17 00:00:00 2001 From: nosoop Date: Thu, 29 Dec 2022 10:46:43 -0800 Subject: [PATCH 1859/2552] [extractor/soundcloud] Support user permalink (#5842) Closes #5841 Authored by: nosoop --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/soundcloud.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a2b92b85a..352de83ca 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1710,6 +1710,7 @@ from .soundcloud import ( SoundcloudSetIE, SoundcloudRelatedIE, SoundcloudUserIE, + SoundcloudUserPermalinkIE, SoundcloudTrackStationIE, SoundcloudPlaylistIE, SoundcloudSearchIE, diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 4879d48c8..979f23f44 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -782,6 +782,27 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): '%s (%s)' % (user['username'], resource.capitalize())) +class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE): + _VALID_URL = r'https?://api\.soundcloud\.com/users/(?P\d+)' + IE_NAME = 'soundcloud:user:permalink' + _TESTS = [{ + 'url': 'https://api.soundcloud.com/users/30909869', + 'info_dict': { + 'id': '30909869', + 'title': 'neilcic', + }, + 'playlist_mincount': 23, + }] + + def _real_extract(self, url): + user_id = self._match_id(url) + user = self._download_json( + self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS) + + return self._extract_playlist( + f'{self._API_V2_BASE}stream/users/{user["id"]}', str(user['id']), user.get('username')) + + class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P[^/?#&]+)' IE_NAME = 'soundcloud:trackstation' From efa944f4bc892321a0d01dcddb210405761ecada Mon Sep 17 00:00:00 2001 From: Anant Murmu Date: Fri, 30 Dec 2022 08:13:49 +0530 Subject: [PATCH 1860/2552] [cleanup] Use `random.choices` (#5800) Authored by: freezboltz --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/extractor/adn.py | 2 +- yt_dlp/extractor/discovery.py | 2 +- yt_dlp/extractor/funimation.py | 2 +- yt_dlp/extractor/linuxacademy.py | 5 ++--- yt_dlp/extractor/tencent.py | 4 ++-- yt_dlp/extractor/tiktok.py | 10 +++++----- yt_dlp/extractor/videa.py | 2 +- yt_dlp/extractor/viu.py | 2 +- yt_dlp/extractor/vrv.py | 2 +- yt_dlp/extractor/youku.py | 4 ++-- 11 files changed, 18 insertions(+), 19 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index abb0ddfe5..17f37a643 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1068,7 +1068,7 @@ class YoutubeDL: # correspondingly that is not what we want since we need to keep # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. - sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) + sep = ''.join(random.choices(ascii_letters, k=32)) outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index e0c18c877..f1f55e87f 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -168,7 +168,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' }, data=b'')['token'] links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') - self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) + self._K = ''.join(random.choices('0123456789abcdef', k=16)) message = bytes_to_intlist(json.dumps({ 'k': self._K, 't': token, diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py index fd3fc8fb0..e6e109d5c 100644 --- a/yt_dlp/extractor/discovery.py +++ b/yt_dlp/extractor/discovery.py @@ -78,7 +78,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): 'Downloading token JSON metadata', query={ 'authRel': 'authorization', 'client_id': '3020a40c2356a645b4b4', - 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), + 'nonce': ''.join(random.choices(string.ascii_letters, k=32)), 'redirectUri': 'https://www.discovery.com/', })['access_token'] diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 18363c1b9..47c316664 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -210,7 +210,7 @@ class FunimationIE(FunimationBaseIE): page = self._download_json( 'https://www.funimation.com/api/showexperience/%s/' % experience_id, display_id, headers=headers, expected_status=403, query={ - 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]), + 'pinst_id': ''.join(random.choices(string.digits + string.ascii_letters, k=8)), }, note=f'Downloading {format_name} JSON') sources = page.get('items') or [] if not sources: diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py index a570248b7..7bb64e17c 100644 --- a/yt_dlp/extractor/linuxacademy.py +++ b/yt_dlp/extractor/linuxacademy.py @@ -75,9 +75,8 @@ class LinuxAcademyIE(InfoExtractor): def _perform_login(self, username, password): def random_string(): - return ''.join([ - random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') - for _ in range(32)]) + return ''.join(random.choices( + '0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~', k=32)) webpage, urlh = self._download_webpage_handle( self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index ff8bf991e..44cae0472 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -32,7 +32,7 @@ class TencentBaseIE(InfoExtractor): padding_mode='whitespace').hex().upper() def _get_video_api_response(self, video_url, video_id, series_id, subtitle_format, video_format, video_quality): - guid = ''.join([random.choice(string.digits + string.ascii_lowercase) for _ in range(16)]) + guid = ''.join(random.choices(string.digits + string.ascii_lowercase, k=16)) ckey = self._get_ckey(video_id, video_url, guid) query = { 'vid': video_id, @@ -55,7 +55,7 @@ class TencentBaseIE(InfoExtractor): 'platform': self._PLATFORM, # For VQQ 'guid': guid, - 'flowid': ''.join(random.choice(string.digits + string.ascii_lowercase) for _ in range(32)), + 'flowid': ''.join(random.choices(string.digits + string.ascii_lowercase, k=32)), } return self._search_json(r'QZOutputJson=', self._download_webpage( diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 2dd4510cc..709d944dc 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -49,7 +49,7 @@ class TikTokBaseIE(InfoExtractor): def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): - self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) + self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160))) webpage_cookies = self._get_cookies(self._WEBPAGE_HOST) if webpage_cookies.get('sid_tt'): self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value) @@ -68,8 +68,8 @@ class TikTokBaseIE(InfoExtractor): 'build_number': app_version, 'manifest_version_code': manifest_app_version, 'update_version_code': manifest_app_version, - 'openudid': ''.join(random.choice('0123456789abcdef') for _ in range(16)), - 'uuid': ''.join([random.choice(string.digits) for _ in range(16)]), + 'openudid': ''.join(random.choices('0123456789abcdef', k=16)), + 'uuid': ''.join(random.choices(string.digits, k=16)), '_rticket': int(time.time() * 1000), 'ts': int(time.time()), 'device_brand': 'Google', @@ -638,7 +638,7 @@ class TikTokUserIE(TikTokBaseIE): 'max_cursor': 0, 'min_cursor': 0, 'retry_type': 'no_retry', - 'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + 'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. } for page in itertools.count(1): @@ -686,7 +686,7 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul 'cursor': 0, 'count': 20, 'type': 5, - 'device_id': ''.join(random.choice(string.digits) for i in range(19)) + 'device_id': ''.join(random.choices(string.digits, k=19)) } for page in itertools.count(1): diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index 52fa8fcec..59ae933b0 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -119,7 +119,7 @@ class VideaIE(InfoExtractor): result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] query = parse_qs(player_url) - random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) + random_seed = ''.join(random.choices(string.ascii_letters + string.digits, k=8)) query['_s'] = random_seed query['_t'] = result[:16] diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index 19d48234e..dd4cad7ba 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -251,7 +251,7 @@ class ViuOTTIE(InfoExtractor): return self._user_token def _get_token(self, country_code, video_id): - rand = ''.join(random.choice('0123456789') for _ in range(10)) + rand = ''.join(random.choices('0123456789', k=10)) return self._download_json( f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id, headers={'Content-Type': 'application/json'}, note='Getting bearer token', diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 89fa7affc..ad9dc568a 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -30,7 +30,7 @@ class VRVBaseIE(InfoExtractor): base_url = self._API_DOMAIN + '/core/' + path query = [ ('oauth_consumer_key', self._API_PARAMS['oAuthKey']), - ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])), + ('oauth_nonce', ''.join(random.choices(string.ascii_letters, k=32))), ('oauth_signature_method', 'HMAC-SHA1'), ('oauth_timestamp', int(time.time())), ] diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py index ab59200d7..404f196f4 100644 --- a/yt_dlp/extractor/youku.py +++ b/yt_dlp/extractor/youku.py @@ -129,8 +129,8 @@ class YoukuIE(InfoExtractor): @staticmethod def get_ysuid(): - return '%d%s' % (int(time.time()), ''.join([ - random.choice(string.ascii_letters) for i in range(3)])) + return '%d%s' % (int(time.time()), ''.join( + random.choices(string.ascii_letters, k=3))) def get_format_name(self, fm): _dict = { From 4455918e7f090ace0b0c2537bbfd364956eb66cb Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 10:12:13 +0530 Subject: [PATCH 1861/2552] [extractor/stv] Detect DRM Closes #5320 --- yt_dlp/extractor/stv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py index c879fb52e..8b3e63538 100644 --- a/yt_dlp/extractor/stv.py +++ b/yt_dlp/extractor/stv.py @@ -73,6 +73,8 @@ class STVPlayerIE(InfoExtractor): }) programme = result.get('programme') or {} + if programme.get('drmEnabled'): + self.report_drm(video_id) return { '_type': 'url_transparent', From 119e40ef64b25f66a39246e87ce6c143cd34276d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 11:15:41 +0530 Subject: [PATCH 1862/2552] Add pre-processor stage `video` Related: #456, #5808 --- README.md | 44 +++++++++++++++++++------------------ yt_dlp/YoutubeDL.py | 17 +++++++++------ yt_dlp/options.py | 53 +++++++++++++++++++++------------------------ yt_dlp/utils.py | 2 +- 4 files changed, 59 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 440ed1934..d31fedb00 100644 --- a/README.md +++ b/README.md @@ -725,7 +725,7 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi screen, optionally prefixed with when to print it, separated by a ":". Supported values of "WHEN" are the same as that of - --use-postprocessor, and "video" (default). + --use-postprocessor (default: video). Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. This option can be used multiple times @@ -979,18 +979,18 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with - when to execute it (after_move if - unspecified), separated by a ":". Supported - values of "WHEN" are the same as that of - --use-postprocessor. Same syntax as the - output template can be used to pass any - field as arguments to the command. After - download, an additional field "filepath" - that contains the final path of the - downloaded file is also available, and if no - fields are passed, %(filepath)q is appended - to the end of the command. This option can - be used multiple times + when to execute it, separated by a ":". + Supported values of "WHEN" are the same as + that of --use-postprocessor (default: + after_move). Same syntax as the output + template can be used to pass any field as + arguments to the command. After download, an + additional field "filepath" that contains + the final path of the downloaded file is + also available, and if no fields are passed, + %(filepath)q is appended to the end of the + command. This option can be used multiple + times --no-exec Remove any previously defined --exec --convert-subs FORMAT Convert the subtitles to another format (currently supported: ass, lrc, srt, vtt) @@ -1028,14 +1028,16 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi postprocessor is invoked. It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), - "before_dl" (before each video download), - "post_process" (after each video download; - default), "after_move" (after moving video - file to it's final locations), "after_video" - (after downloading and processing all - formats of a video), or "playlist" (at end - of playlist). This option can be used - multiple times to add different postprocessors + "video" (after --format; before + --print/--output), "before_dl" (before each + video download), "post_process" (after each + video download; default), "after_move" + (after moving video file to it's final + locations), "after_video" (after downloading + and processing all formats of a video), or + "playlist" (at end of playlist). This option + can be used multiple times to add different + postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 17f37a643..505732327 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2977,6 +2977,16 @@ class YoutubeDL: # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) + + def replace_info_dict(new_info): + nonlocal info_dict + if new_info == info_dict: + return + info_dict.clear() + info_dict.update(new_info) + + new_info, _ = self.pre_process(info_dict, 'video') + replace_info_dict(new_info) self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility @@ -3090,13 +3100,6 @@ class YoutubeDL: for link_type, should_write in write_links.items()): return - def replace_info_dict(new_info): - nonlocal info_dict - if new_info == info_dict: - return - info_dict.clear() - info_dict.update(new_info) - new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) replace_info_dict(new_info) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index bc574b885..096a50249 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -277,6 +277,20 @@ def create_parser(): out_dict[key] = out_dict.get(key, []) + [val] if append else val setattr(parser.values, option.dest, out_dict) + def when_prefix(default): + return { + 'default': {}, + 'type': 'str', + 'action': 'callback', + 'callback': _dict_from_options_callback, + 'callback_kwargs': { + 'allowed_keys': '|'.join(map(re.escape, POSTPROCESS_WHEN)), + 'default_key': default, + 'multiple_keys': False, + 'append': True, + }, + } + parser = _YoutubeDLOptionParser() alias_group = optparse.OptionGroup(parser, 'Aliases') Formatter = string.Formatter() @@ -1086,28 +1100,16 @@ def create_parser(): help='Do not download the video but write all related files (Alias: --no-download)') verbosity.add_option( '-O', '--print', - metavar='[WHEN:]TEMPLATE', dest='forceprint', default={}, type='str', - action='callback', callback=_dict_from_options_callback, - callback_kwargs={ - 'allowed_keys': 'video|' + '|'.join(map(re.escape, POSTPROCESS_WHEN)), - 'default_key': 'video', - 'multiple_keys': False, - 'append': True, - }, help=( + metavar='[WHEN:]TEMPLATE', dest='forceprint', **when_prefix('video'), + help=( 'Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". ' - 'Supported values of "WHEN" are the same as that of --use-postprocessor, and "video" (default). ' + 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: video). ' 'Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. ' 'This option can be used multiple times')) verbosity.add_option( '--print-to-file', - metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', default={}, type='str', nargs=2, - action='callback', callback=_dict_from_options_callback, - callback_kwargs={ - 'allowed_keys': 'video|' + '|'.join(map(re.escape, POSTPROCESS_WHEN)), - 'default_key': 'video', - 'multiple_keys': False, - 'append': True, - }, help=( + metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', nargs=2, **when_prefix('video'), + help=( 'Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. ' 'FILE uses the same syntax as the output template. This option can be used multiple times')) verbosity.add_option( @@ -1629,16 +1631,10 @@ def create_parser(): help='Location of the ffmpeg binary; either the path to the binary or its containing directory') postproc.add_option( '--exec', - metavar='[WHEN:]CMD', dest='exec_cmd', default={}, type='str', - action='callback', callback=_dict_from_options_callback, - callback_kwargs={ - 'allowed_keys': '|'.join(map(re.escape, POSTPROCESS_WHEN)), - 'default_key': 'after_move', - 'multiple_keys': False, - 'append': True, - }, help=( - 'Execute a command, optionally prefixed with when to execute it (after_move if unspecified), separated by a ":". ' - 'Supported values of "WHEN" are the same as that of --use-postprocessor. ' + metavar='[WHEN:]CMD', dest='exec_cmd', **when_prefix('after_move'), + help=( + 'Execute a command, optionally prefixed with when to execute it, separated by a ":". ' + 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). ' 'Same syntax as the output template can be used to pass any field as arguments to the command. ' 'After download, an additional field "filepath" that contains the final path of the downloaded file ' 'is also available, and if no fields are passed, %(filepath)q is appended to the end of the command. ' @@ -1714,7 +1710,8 @@ def create_parser(): 'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' 'The "when" argument determines when the postprocessor is invoked. ' 'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' - '"before_dl" (before each video download), "post_process" (after each video download; default), ' + '"video" (after --format; before --print/--output), "before_dl" (before each video download), ' + '"post_process" (after each video download; default), ' '"after_move" (after moving video file to it\'s final locations), ' '"after_video" (after downloading and processing all formats of a video), ' 'or "playlist" (at end of playlist). ' diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 3947dcf2e..43b5fda1d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3395,7 +3395,7 @@ def qualities(quality_ids): return q -POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist') +POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist') DEFAULT_OUTTMPL = { From fe74d5b592438c669f5717b34504f27c34ca9904 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 11:01:14 +0530 Subject: [PATCH 1863/2552] Let `--parse/replace-in-metadata` run at any post-processing stage Closes #5808, #456 --- README.md | 13 +++++++++---- yt_dlp/__init__.py | 14 ++++++++------ yt_dlp/options.py | 12 +++++++----- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index d31fedb00..500f92387 100644 --- a/README.md +++ b/README.md @@ -952,13 +952,18 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi mkv/mka video files --no-embed-info-json Do not embed the infojson as an attachment to the video file - --parse-metadata FROM:TO Parse additional metadata like title/artist + --parse-metadata [WHEN:]FROM:TO + Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" - for details - --replace-in-metadata FIELDS REGEX REPLACE + for details. Supported values of "WHEN" are + the same as that of --use-postprocessor + (default: pre_process) + --replace-in-metadata [WHEN:]FIELDS REGEX REPLACE Replace text in a metadata field using the given regex. This option can be used - multiple times + multiple times. Supported values of "WHEN" + are the same as that of --use-postprocessor + (default: pre_process) --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --concat-playlist POLICY Concatenate videos in a playlist. One of diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 202f102ba..3490816c4 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -386,10 +386,12 @@ def validate_options(opts): raise ValueError(f'{cmd} is invalid; {err}') yield action - parse_metadata = opts.parse_metadata or [] if opts.metafromtitle is not None: - parse_metadata.append('title:%s' % opts.metafromtitle) - opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) + opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata = { + k: list(itertools.chain(*map(metadataparser_actions, v))) + for k, v in opts.parse_metadata.items() + } # Other options if opts.playlist_items is not None: @@ -561,11 +563,11 @@ def validate_options(opts): def get_postprocessors(opts): yield from opts.add_postprocessors - if opts.parse_metadata: + for when, actions in opts.parse_metadata.items(): yield { 'key': 'MetadataParser', - 'actions': opts.parse_metadata, - 'when': 'pre_process' + 'actions': actions, + 'when': when } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 096a50249..ed83cb763 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1586,14 +1586,16 @@ def create_parser(): help=optparse.SUPPRESS_HELP) postproc.add_option( '--parse-metadata', - metavar='FROM:TO', dest='parse_metadata', action='append', + metavar='[WHEN:]FROM:TO', dest='parse_metadata', **when_prefix('pre_process'), help=( - 'Parse additional metadata like title/artist from other fields; ' - 'see "MODIFYING METADATA" for details')) + 'Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details. ' + 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process)')) postproc.add_option( '--replace-in-metadata', - dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3, - help='Replace text in a metadata field using the given regex. This option can be used multiple times') + dest='parse_metadata', metavar='[WHEN:]FIELDS REGEX REPLACE', nargs=3, **when_prefix('pre_process'), + help=( + 'Replace text in a metadata field using the given regex. This option can be used multiple times. ' + 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process)')) postproc.add_option( '--xattrs', '--xattr', action='store_true', dest='xattrs', default=False, From d5f043d127cac1e8ec8a6eacde04ad1133600a16 Mon Sep 17 00:00:00 2001 From: ChillingPepper <90042155+ChillingPepper@users.noreply.github.com> Date: Fri, 30 Dec 2022 07:38:38 +0100 Subject: [PATCH 1864/2552] [utils] js_to_json: Fix bug in f55523c (#5771) Authored by: ChillingPepper, pukkandan --- test/test_utils.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++ yt_dlp/utils.py | 8 ++++- 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 49ab3796b..82ae77ea2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -954,6 +954,85 @@ class TestUtil(unittest.TestCase): ) self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') + def test_js_to_json_vars_strings(self): + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'null': a, + 'nullStr': b, + 'true': c, + 'trueStr': d, + 'false': e, + 'falseStr': f, + 'unresolvedVar': g, + }''', + { + 'a': 'null', + 'b': '"null"', + 'c': 'true', + 'd': '"true"', + 'e': 'false', + 'f': '"false"', + 'g': 'var', + } + )), + { + 'null': None, + 'nullStr': 'null', + 'true': True, + 'trueStr': 'true', + 'false': False, + 'falseStr': 'false', + 'unresolvedVar': 'var' + } + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'int': a, + 'intStr': b, + 'float': c, + 'floatStr': d, + }''', + { + 'a': '123', + 'b': '"123"', + 'c': '1.23', + 'd': '"1.23"', + } + )), + { + 'int': 123, + 'intStr': '123', + 'float': 1.23, + 'floatStr': '1.23', + } + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'object': a, + 'objectStr': b, + 'array': c, + 'arrayStr': d, + }''', + { + 'a': '{}', + 'b': '"{}"', + 'c': '[]', + 'd': '"[]"', + } + )), + { + 'object': {}, + 'objectStr': '{}', + 'array': [], + 'arrayStr': '[]', + } + ) + def test_js_to_json_realworld(self): inp = '''{ 'clip':{'provider':'pseudo'} diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 43b5fda1d..64c83a77a 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3360,7 +3360,13 @@ def js_to_json(code, vars={}, *, strict=False): return f'"{i}":' if v.endswith(':') else str(i) if v in vars: - return json.dumps(vars[v]) + try: + if not strict: + json.loads(vars[v]) + except json.decoder.JSONDecodeError: + return json.dumps(vars[v]) + else: + return vars[v] if not strict: return f'"{v}"' From f74371a97d67237e055612006602934b910b1275 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 11:57:33 +0530 Subject: [PATCH 1865/2552] [extractor/bilibili] Fix `--no-playlist` for anthology Closes #5797 --- yt_dlp/extractor/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 37711c138..92620f697 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -303,7 +303,8 @@ class BiliBiliIE(BilibiliBaseIE): getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}') if is_anthology: - title += f' p{part_id:02d} {traverse_obj(page_list_json, ((part_id or 1) - 1, "part")) or ""}' + part_id = part_id or 1 + title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}' aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') From ec54bd43f374cee429d67078ac61b75e66afb3fa Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 14:07:11 +0530 Subject: [PATCH 1866/2552] Fix bug in writing playlist info-json Closes #4889 --- yt_dlp/YoutubeDL.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 505732327..db6bfded8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1862,11 +1862,10 @@ class YoutubeDL: self.to_screen('[download] Downloading item %s of %s' % ( self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) - extra.update({ + entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, - }) - entry_result = self.__process_iterable_entry(entry, download, extra) + }, extra)) if not entry_result: failures += 1 if failures >= max_failures: From fbb73833067ba742459729809679a62f34b3e41e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 15:30:56 +0530 Subject: [PATCH 1867/2552] Add `weba` to known extensions --- test/test_utils.py | 2 ++ yt_dlp/utils.py | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 82ae77ea2..3d5a6ea6b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1953,6 +1953,8 @@ Line 1 vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm') self.assertEqual(get_compatible_ext( vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 64c83a77a..ee5340cd2 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3656,7 +3656,7 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None): COMPATIBLE_EXTS = ( {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'}, - {'webm'}, + {'webm', 'weba'}, ) for ext in preferences or vexts: current_exts = {ext, *vexts, *aexts} @@ -5962,7 +5962,7 @@ MEDIA_EXTENSIONS = Namespace( common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'), video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'), common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'), - audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'), + audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'), thumbnails=('jpg', 'png', 'webp'), storyboards=('mhtml', ), subtitles=('srt', 'vtt', 'ass', 'lrc'), @@ -6094,9 +6094,9 @@ class FormatSorter: 'vext': {'type': 'ordered', 'field': 'video_ext', 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'), 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')}, - 'aext': {'type': 'ordered', 'field': 'audio_ext', - 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'), - 'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')}, + 'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext', + 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'), + 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')}, 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}, 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple', 'field': ('vcodec', 'acodec'), From 9bb856998b0d5a0ad58268f0ba8d784fb9d934e3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 15:32:33 +0530 Subject: [PATCH 1868/2552] [extractor/youtube] Extract DRC formats --- yt_dlp/extractor/youtube.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 9dde34fb0..506bd1e19 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2544,6 +2544,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tags': [], }, 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'}, + }, { + 'note': 'Audio formats with Dynamic Range Compression', + 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg', + 'info_dict': { + 'id': 'Tq92D6wQ1mg', + 'ext': 'weba', + 'title': '[MMD] Adios - EVERGLOW [+Motion DL]', + 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ', + 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', + 'channel_follower_count': int, + 'description': 'md5:17eccca93a786d51bc67646756894066', + 'upload_date': '20191228', + 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ', + 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'], + 'playable_in_embed': True, + 'like_count': int, + 'categories': ['Entertainment'], + 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg', + 'age_limit': 18, + 'channel': 'Projekt Melody', + 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', + 'view_count': int, + 'availability': 'needs_auth', + 'comment_count': int, + 'live_status': 'not_live', + 'uploader': 'Projekt Melody', + 'duration': 106, + }, + 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, } ] @@ -3553,7 +3582,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): itag = str_or_none(fmt.get('itag')) audio_track = fmt.get('audioTrack') or {} - stream_id = '%s.%s' % (itag or '', audio_track.get('id', '')) + stream_id = (itag, audio_track.get('id'), fmt.get('isDrc')) if stream_id in stream_ids: continue @@ -3634,11 +3663,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), - 'format_id': itag, + 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}', 'format_note': join_nonempty( '%s%s' % (audio_track.get('displayName') or '', ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), + 'DRC' if fmt.get('isDrc') else None, try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), @@ -3647,7 +3677,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fps': int_or_none(fmt.get('fps')) or None, 'audio_channels': fmt.get('audioChannels'), 'height': height, - 'quality': q(quality), + 'quality': q(quality) - bool(fmt.get('isDrc')) / 2, 'has_drm': bool(fmt.get('drmFamilies')), 'tbr': tbr, 'url': fmt_url, From 8d1ddb0805c7c56bd03a5c0837c55602473d213f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 31 Dec 2022 09:45:12 +0530 Subject: [PATCH 1869/2552] [extractor/udemy] Fix lectures that have no URL and detect DRM Closes #5662 --- yt_dlp/extractor/udemy.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 8b99c59cf..329e5da2d 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -11,8 +11,10 @@ from ..utils import ( int_or_none, js_to_json, sanitized_Request, + smuggle_url, try_get, unescapeHTML, + unsmuggle_url, url_or_none, urlencode_postdata, ) @@ -106,7 +108,7 @@ class UdemyIE(InfoExtractor): % (course_id, lecture_id), lecture_id, 'Downloading lecture JSON', query={ 'fields[lecture]': 'title,description,view_html,asset', - 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data', + 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed', }) def _handle_error(self, response): @@ -199,16 +201,19 @@ class UdemyIE(InfoExtractor): def _real_extract(self, url): lecture_id = self._match_id(url) + course_id = unsmuggle_url(url, {})[1].get('course_id') - webpage = self._download_webpage(url, lecture_id) - - course_id, _ = self._extract_course_info(webpage, lecture_id) + webpage = None + if not course_id: + webpage = self._download_webpage(url, lecture_id) + course_id, _ = self._extract_course_info(webpage, lecture_id) try: lecture = self._download_lecture(course_id, lecture_id) except ExtractorError as e: # Error could possibly mean we are not enrolled in the course if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + webpage = webpage or self._download_webpage(url, lecture_id) self._enroll_course(url, webpage, course_id) lecture = self._download_lecture(course_id, lecture_id) else: @@ -391,6 +396,9 @@ class UdemyIE(InfoExtractor): if f.get('url'): formats.append(f) + if not formats and asset.get('course_is_drmed'): + self.report_drm(video_id) + return { 'id': video_id, 'title': title, @@ -449,7 +457,9 @@ class UdemyCourseIE(UdemyIE): # XXX: Do not subclass from concrete IE if lecture_id: entry = { '_type': 'url_transparent', - 'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']), + 'url': smuggle_url( + f'https://www.udemy.com/{course_path}/learn/v4/t/lecture/{entry["id"]}', + {'course_id': course_id}), 'title': entry.get('title'), 'ie_key': UdemyIE.ie_key(), } From a0e526ed4d042c88771cd5669ceb4413d2b8c47f Mon Sep 17 00:00:00 2001 From: Stel Abrego Date: Fri, 30 Dec 2022 20:58:33 -0800 Subject: [PATCH 1870/2552] [extractor/bandcamp] Add `album_artist` (#5537) Closes #5536 Authored by: stelcodes --- yt_dlp/extractor/bandcamp.py | 48 +++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index de81e0de7..e89b3a69b 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -29,11 +29,18 @@ class BandcampIE(InfoExtractor): 'info_dict': { 'id': '1812978515', 'ext': 'mp3', - 'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭", + 'title': 'youtube-dl "\'/\\ä↭ - youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', 'duration': 9.8485, - 'uploader': 'youtube-dl "\'/\\ä↭', + 'uploader': 'youtube-dl "\'/\\ä↭', 'upload_date': '20121129', 'timestamp': 1354224127, + 'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', + 'album_artist': 'youtube-dl "\'/\\ä↭', + 'track_id': '1812978515', + 'artist': 'youtube-dl "\'/\\ä↭', + 'uploader_url': 'https://youtube-dl.bandcamp.com', + 'uploader_id': 'youtube-dl', + 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', }, '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { @@ -41,7 +48,8 @@ class BandcampIE(InfoExtractor): 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'info_dict': { 'id': '2650410135', - 'ext': 'aiff', + 'ext': 'm4a', + 'acodec': r're:[fa]lac', 'title': 'Ben Prunty - Lanius (Battle)', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Ben Prunty', @@ -54,7 +62,10 @@ class BandcampIE(InfoExtractor): 'track_number': 1, 'track_id': '2650410135', 'artist': 'Ben Prunty', + 'album_artist': 'Ben Prunty', 'album': 'FTL: Advanced Edition Soundtrack', + 'uploader_url': 'https://benprunty.bandcamp.com', + 'uploader_id': 'benprunty', }, }, { # no free download, mp3 128 @@ -75,7 +86,34 @@ class BandcampIE(InfoExtractor): 'track_number': 5, 'track_id': '2584466013', 'artist': 'Mastodon', + 'album_artist': 'Mastodon', 'album': 'Call of the Mastodon', + 'uploader_url': 'https://relapsealumni.bandcamp.com', + 'uploader_id': 'relapsealumni', + }, + }, { + # track from compilation album (artist/album_artist difference) + 'url': 'https://diskotopia.bandcamp.com/track/safehouse', + 'md5': '19c5337bca1428afa54129f86a2f6a69', + 'info_dict': { + 'id': '1978174799', + 'ext': 'mp3', + 'title': 'submerse - submerse - Safehouse', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'submerse', + 'timestamp': 1480779297, + 'upload_date': '20161203', + 'release_timestamp': 1481068800, + 'release_date': '20161207', + 'duration': 154.066, + 'track': 'submerse - Safehouse', + 'track_number': 3, + 'track_id': '1978174799', + 'artist': 'submerse', + 'album_artist': 'Diskotopia', + 'album': 'DSK F/W 2016-2017 Free Compilation', + 'uploader_url': 'https://diskotopia.bandcamp.com', + 'uploader_id': 'diskotopia', }, }] @@ -121,6 +159,9 @@ class BandcampIE(InfoExtractor): embed = self._extract_data_attr(webpage, title, 'embed', False) current = tralbum.get('current') or {} artist = embed.get('artist') or current.get('artist') or tralbum.get('artist') + album_artist = self._html_search_regex( + r'

    [\S\s]*?by\s*\s*\s*([^>]+?)\s*', + webpage, 'album artist', fatal=False) timestamp = unified_timestamp( current.get('publish_date') or tralbum.get('album_publish_date')) @@ -205,6 +246,7 @@ class BandcampIE(InfoExtractor): 'track_id': track_id, 'artist': artist, 'album': embed.get('album_title'), + 'album_artist': album_artist, 'formats': formats, } From 2fb0f858686c46abc50a0e253245afe750746775 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 31 Dec 2022 11:02:24 +0530 Subject: [PATCH 1871/2552] [update] Workaround #5632 --- yt_dlp/update.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index ac3e28057..a3a731aef 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -15,7 +15,6 @@ from .utils import ( Popen, cached_method, deprecation_warning, - remove_end, shell_quote, system_identifier, traverse_obj, @@ -43,7 +42,8 @@ def _get_variant_and_executable_path(): # Ref: https://en.wikipedia.org/wiki/Uname#Examples if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): machine = '_x86' if platform.architecture()[0][:2] == '32' else '' - return f'{remove_end(sys.platform, "32")}{machine}_exe', path + # NB: https://github.com/yt-dlp/yt-dlp/issues/5632 + return f'{sys.platform}{machine}_exe', path path = os.path.dirname(__file__) if isinstance(__loader__, zipimporter): @@ -74,8 +74,8 @@ def current_git_head(): _FILE_SUFFIXES = { 'zip': '', 'py2exe': '_min.exe', - 'win_exe': '.exe', - 'win_x86_exe': '_x86.exe', + 'win32_exe': '.exe', + 'win32_x86_exe': '_x86.exe', 'darwin_exe': '_macos', 'darwin_legacy_exe': '_macos_legacy', 'linux_exe': '_linux', From 8e40b9d1ec132ae1bcac50b3ee520ece46ac9c55 Mon Sep 17 00:00:00 2001 From: Matthew Date: Sun, 1 Jan 2023 04:29:22 +0000 Subject: [PATCH 1872/2552] Improve plugin architecture (#5553) to make plugins easier to develop and use: * Plugins are now loaded as namespace packages. * Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.). * Plugin packages can be installed and managed via pip, or dropped into any of the documented locations. * Users do not need to edit any code files to install plugins. * Backwards-compatible with previous plugin architecture. As a side-effect, yt-dlp will now search in a few more locations for config files. Closes https://github.com/yt-dlp/yt-dlp/issues/1389 Authored by: flashdagger, coletdjnz, pukkandan, Grub4K Co-authored-by: Marcel Co-authored-by: pukkandan Co-authored-by: Simon Sawicki --- .gitignore | 8 +- README.md | 66 ++++++- devscripts/make_lazy_extractors.py | 4 + test/test_plugins.py | 73 ++++++++ .../yt_dlp_plugins/extractor/_ignore.py | 5 + .../yt_dlp_plugins/extractor/ignore.py | 12 ++ .../yt_dlp_plugins/extractor/normal.py | 9 + .../yt_dlp_plugins/postprocessor/normal.py | 5 + .../yt_dlp_plugins/extractor/zipped.py | 5 + .../yt_dlp_plugins/postprocessor/zipped.py | 5 + yt_dlp/YoutubeDL.py | 15 +- yt_dlp/extractor/extractors.py | 4 +- yt_dlp/options.py | 91 +++++----- yt_dlp/plugins.py | 171 ++++++++++++++++++ yt_dlp/postprocessor/__init__.py | 5 +- yt_dlp/utils.py | 55 ++++-- ytdlp_plugins/extractor/__init__.py | 4 - ytdlp_plugins/extractor/sample.py | 14 -- ytdlp_plugins/postprocessor/__init__.py | 4 - ytdlp_plugins/postprocessor/sample.py | 26 --- 20 files changed, 455 insertions(+), 126 deletions(-) create mode 100644 test/test_plugins.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/_ignore.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/ignore.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/normal.py create mode 100644 test/testdata/yt_dlp_plugins/postprocessor/normal.py create mode 100644 test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py create mode 100644 test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py create mode 100644 yt_dlp/plugins.py delete mode 100644 ytdlp_plugins/extractor/__init__.py delete mode 100644 ytdlp_plugins/extractor/sample.py delete mode 100644 ytdlp_plugins/postprocessor/__init__.py delete mode 100644 ytdlp_plugins/postprocessor/sample.py diff --git a/.gitignore b/.gitignore index 00d74057f..ef4d11616 100644 --- a/.gitignore +++ b/.gitignore @@ -120,9 +120,5 @@ yt-dlp.zip */extractor/lazy_extractors.py # Plugins -ytdlp_plugins/extractor/* -!ytdlp_plugins/extractor/__init__.py -!ytdlp_plugins/extractor/sample.py -ytdlp_plugins/postprocessor/* -!ytdlp_plugins/postprocessor/__init__.py -!ytdlp_plugins/postprocessor/sample.py +ytdlp_plugins/* +yt-dlp-plugins/* diff --git a/README.md b/README.md index 500f92387..4294090dc 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,8 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Modifying metadata examples](#modifying-metadata-examples) * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) + * [Installing Plugins](#installing-plugins) + * [Developing Plugins](#developing-plugins) * [EMBEDDING YT-DLP](#embedding-yt-dlp) * [Embedding examples](#embedding-examples) * [DEPRECATED OPTIONS](#deprecated-options) @@ -1110,15 +1112,20 @@ You can configure yt-dlp by placing any supported command line option to a confi * If `-P` is not given, the current directory is searched 1. **User Configuration**: * `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp/config.txt` * `${XDG_CONFIG_HOME}/yt-dlp.conf` * `${APPDATA}/yt-dlp/config` (recommended on Windows) * `${APPDATA}/yt-dlp/config.txt` * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` + * `~/.yt-dlp/config` + * `~/.yt-dlp/config.txt` See also: [Notes about environment variables](#notes-about-environment-variables) 1. **System Configuration**: * `/etc/yt-dlp.conf` + * `/etc/yt-dlp/config` + * `/etc/yt-dlp/config.txt` E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: ``` @@ -1789,19 +1796,68 @@ NOTE: These options may be changed/removed in the future without concern for bac # PLUGINS -Plugins are loaded from `/ytdlp_plugins//__init__.py`; where `` is the directory of the binary (`/yt-dlp`), or the root directory of the module if you are running directly from source-code (`/yt_dlp/__main__.py`). Plugins are currently not supported for the `pip` version +Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!** -Plugins can be of ``s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`. +Plugins can be of ``s `extractor` or `postprocessor`. +- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. +- Extractor plugins take priority over builtin extractors. +- Postprocessor plugins can be invoked using `--use-postprocessor NAME`. -See [ytdlp_plugins](ytdlp_plugins) for example plugins. -Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code +Plugins are loaded from the namespace packages `yt_dlp_plugins.extractor` and `yt_dlp_plugins.postprocessor`. -If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability +In other words, the file structure on the disk looks something like: + + yt_dlp_plugins/ + extractor/ + myplugin.py + postprocessor/ + myplugin.py + +yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them. See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) +## Installing Plugins + +Plugins can be installed using various methods and locations. + +1. **Configuration directories**: + Plugin packages (containing a `yt_dlp_plugins` namespace folder) can be dropped into the following standard [configuration locations](#configuration): + * **User Plugins** + * `${XDG_CONFIG_HOME}/yt-dlp/plugins//yt_dlp_plugins/` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp-plugins//yt_dlp_plugins/` + * `${APPDATA}/yt-dlp/plugins//yt_dlp_plugins/` (recommended on Windows) + * `~/.yt-dlp/plugins//yt_dlp_plugins/` + * `~/yt-dlp-plugins//yt_dlp_plugins/` + * **System Plugins** + * `/etc/yt-dlp/plugins//yt_dlp_plugins/` + * `/etc/yt-dlp-plugins//yt_dlp_plugins/` +2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location: + * Binary: where `/yt-dlp.exe`, `/yt-dlp-plugins//yt_dlp_plugins/` + * Source: where `/yt_dlp/__main__.py`, `/yt-dlp-plugins//yt_dlp_plugins/` + +3. **pip and other locations in `PYTHONPATH`** + * Plugin packages can be installed and managed using `pip`. See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. + * Note: plugin files between plugin packages installed with pip must have unique filenames + * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. + * Note: This does not apply for Pyinstaller/py2exe builds. + + +.zip, .egg and .whl archives containing a `yt_dlp_plugins` namespace folder in their root are also supported. These can be placed in the same locations `yt_dlp_plugins` namespace folders can be found. +- e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins//myplugin.py` + +Run yt-dlp with `--verbose`/`-v` to check if the plugin has been loaded. + +## Developing Plugins + +See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. + +All public classes with a name ending in `IE` are imported from each file. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`) + +If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability +See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) on how to write and test an extractor. # EMBEDDING YT-DLP diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index c502bdf89..d74ea202f 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -40,8 +40,12 @@ def main(): _ALL_CLASSES = get_all_ies() # Must be before import + import yt_dlp.plugins from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor + # Filter out plugins + _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')] + DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) module_src = '\n'.join(( MODULE_TEMPLATE, diff --git a/test/test_plugins.py b/test/test_plugins.py new file mode 100644 index 000000000..6cde579e1 --- /dev/null +++ b/test/test_plugins.py @@ -0,0 +1,73 @@ +import importlib +import os +import shutil +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata') +sys.path.append(str(TEST_DATA_DIR)) +importlib.invalidate_caches() + +from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins + + +class TestPlugins(unittest.TestCase): + + TEST_PLUGIN_DIR = TEST_DATA_DIR / PACKAGE_NAME + + def test_directories_containing_plugins(self): + self.assertIn(self.TEST_PLUGIN_DIR, map(Path, directories())) + + def test_extractor_classes(self): + for module_name in tuple(sys.modules): + if module_name.startswith(f'{PACKAGE_NAME}.extractor'): + del sys.modules[module_name] + plugins_ie = load_plugins('extractor', 'IE') + + self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys()) + self.assertIn('NormalPluginIE', plugins_ie.keys()) + + # don't load modules with underscore prefix + self.assertFalse( + f'{PACKAGE_NAME}.extractor._ignore' in sys.modules.keys(), + 'loaded module beginning with underscore') + self.assertNotIn('IgnorePluginIE', plugins_ie.keys()) + + # Don't load extractors with underscore prefix + self.assertNotIn('_IgnoreUnderscorePluginIE', plugins_ie.keys()) + + # Don't load extractors not specified in __all__ (if supplied) + self.assertNotIn('IgnoreNotInAllPluginIE', plugins_ie.keys()) + self.assertIn('InAllPluginIE', plugins_ie.keys()) + + def test_postprocessor_classes(self): + plugins_pp = load_plugins('postprocessor', 'PP') + self.assertIn('NormalPluginPP', plugins_pp.keys()) + + def test_importing_zipped_module(self): + zip_path = TEST_DATA_DIR / 'zipped_plugins.zip' + shutil.make_archive(str(zip_path)[:-4], 'zip', str(zip_path)[:-4]) + sys.path.append(str(zip_path)) # add zip to search paths + importlib.invalidate_caches() # reset the import caches + + try: + for plugin_type in ('extractor', 'postprocessor'): + package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}') + self.assertIn(zip_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__)) + + plugins_ie = load_plugins('extractor', 'IE') + self.assertIn('ZippedPluginIE', plugins_ie.keys()) + + plugins_pp = load_plugins('postprocessor', 'PP') + self.assertIn('ZippedPluginPP', plugins_pp.keys()) + + finally: + sys.path.remove(str(zip_path)) + os.remove(zip_path) + importlib.invalidate_caches() # reset the import caches + + +if __name__ == '__main__': + unittest.main() diff --git a/test/testdata/yt_dlp_plugins/extractor/_ignore.py b/test/testdata/yt_dlp_plugins/extractor/_ignore.py new file mode 100644 index 000000000..57faf75bb --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/_ignore.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/extractor/ignore.py b/test/testdata/yt_dlp_plugins/extractor/ignore.py new file mode 100644 index 000000000..816a16aa2 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/ignore.py @@ -0,0 +1,12 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnoreNotInAllPluginIE(InfoExtractor): + pass + + +class InAllPluginIE(InfoExtractor): + pass + + +__all__ = ['InAllPluginIE'] diff --git a/test/testdata/yt_dlp_plugins/extractor/normal.py b/test/testdata/yt_dlp_plugins/extractor/normal.py new file mode 100644 index 000000000..b09009bdc --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/normal.py @@ -0,0 +1,9 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class NormalPluginIE(InfoExtractor): + pass + + +class _IgnoreUnderscorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/postprocessor/normal.py b/test/testdata/yt_dlp_plugins/postprocessor/normal.py new file mode 100644 index 000000000..315b85a48 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/postprocessor/normal.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class NormalPluginPP(PostProcessor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py new file mode 100644 index 000000000..01542e0d8 --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class ZippedPluginIE(InfoExtractor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py new file mode 100644 index 000000000..223822bd6 --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class ZippedPluginPP(PostProcessor): + pass diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index db6bfded8..9ef56a46b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -32,6 +32,7 @@ from .extractor import gen_extractor_classes, get_info_extractor from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text +from .plugins import directories as plugin_directories from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors from .postprocessor import ( EmbedThumbnailPP, @@ -3773,10 +3774,6 @@ class YoutubeDL: write_debug('Lazy loading extractors is forcibly disabled') else: write_debug('Lazy loading extractors is disabled') - if plugin_extractors or plugin_postprocessors: - write_debug('Plugins: %s' % [ - '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') - for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params['compat_opts']: write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) @@ -3810,6 +3807,16 @@ class YoutubeDL: proxy_map.update(handler.proxies) write_debug(f'Proxy map: {proxy_map}') + for plugin_type, plugins in {'Extractor': plugin_extractors, 'Post-Processor': plugin_postprocessors}.items(): + if not plugins: + continue + write_debug(f'{plugin_type} Plugins: %s' % (', '.join(sorted(('%s%s' % ( + klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in plugins.items()))))) + plugin_dirs = plugin_directories() + if plugin_dirs: + write_debug(f'Plugin directories: {plugin_dirs}') + # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 610e02f90..beda02917 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1,10 +1,10 @@ import contextlib import os -from ..utils import load_plugins +from ..plugins import load_plugins # NB: Must be before other imports so that plugins can be correctly injected -_PLUGIN_CLASSES = load_plugins('extractor', 'IE', {}) +_PLUGIN_CLASSES = load_plugins('extractor', 'IE') _LAZY_LOADER = False if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): diff --git a/yt_dlp/options.py b/yt_dlp/options.py index ed83cb763..be4695cbb 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -29,6 +29,8 @@ from .utils import ( expand_path, format_field, get_executable_path, + get_system_config_dirs, + get_user_config_dirs, join_nonempty, orderedSet_from_options, remove_end, @@ -42,62 +44,67 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'): if ignore_config_files == 'if_override': ignore_config_files = overrideArguments is not None - def _readUserConf(package_name, default=[]): - # .config + def _load_from_config_dirs(config_dirs): + for config_dir in config_dirs: + conf_file_path = os.path.join(config_dir, 'config') + conf = Config.read_file(conf_file_path, default=None) + if conf is None: + conf_file_path += '.txt' + conf = Config.read_file(conf_file_path, default=None) + if conf is not None: + return conf, conf_file_path + return None, None + + def _read_user_conf(package_name, default=None): + # .config/package_name.conf xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config') - userConfFile = os.path.join(xdg_config_home, package_name, 'config') - if not os.path.isfile(userConfFile): - userConfFile = os.path.join(xdg_config_home, '%s.conf' % package_name) - userConf = Config.read_file(userConfFile, default=None) - if userConf is not None: - return userConf, userConfFile + user_conf_file = os.path.join(xdg_config_home, '%s.conf' % package_name) + user_conf = Config.read_file(user_conf_file, default=None) + if user_conf is not None: + return user_conf, user_conf_file - # appdata - appdata_dir = os.getenv('appdata') - if appdata_dir: - userConfFile = os.path.join(appdata_dir, package_name, 'config') - userConf = Config.read_file(userConfFile, default=None) - if userConf is None: - userConfFile += '.txt' - userConf = Config.read_file(userConfFile, default=None) - if userConf is not None: - return userConf, userConfFile + # home (~/package_name.conf or ~/package_name.conf.txt) + user_conf_file = os.path.join(compat_expanduser('~'), '%s.conf' % package_name) + user_conf = Config.read_file(user_conf_file, default=None) + if user_conf is None: + user_conf_file += '.txt' + user_conf = Config.read_file(user_conf_file, default=None) + if user_conf is not None: + return user_conf, user_conf_file - # home - userConfFile = os.path.join(compat_expanduser('~'), '%s.conf' % package_name) - userConf = Config.read_file(userConfFile, default=None) - if userConf is None: - userConfFile += '.txt' - userConf = Config.read_file(userConfFile, default=None) - if userConf is not None: - return userConf, userConfFile + # Package config directories (e.g. ~/.config/package_name/package_name.txt) + user_conf, user_conf_file = _load_from_config_dirs(get_user_config_dirs(package_name)) + if user_conf is not None: + return user_conf, user_conf_file + return default if default is not None else [], None - return default, None + def _read_system_conf(package_name, default=None): + system_conf, system_conf_file = _load_from_config_dirs(get_system_config_dirs(package_name)) + if system_conf is not None: + return system_conf, system_conf_file + return default if default is not None else [], None - def add_config(label, path, user=False): + def add_config(label, path=None, func=None): """ Adds config and returns whether to continue """ if root.parse_known_args()[0].ignoreconfig: return False - # Multiple package names can be given here - # E.g. ('yt-dlp', 'youtube-dlc', 'youtube-dl') will look for - # the configuration file of any of these three packages - for package in ('yt-dlp',): - if user: - args, current_path = _readUserConf(package, default=None) - else: - current_path = os.path.join(path, '%s.conf' % package) - args = Config.read_file(current_path, default=None) - if args is not None: - root.append_config(args, current_path, label=label) - return True + elif func: + assert path is None + args, current_path = func('yt-dlp') + else: + current_path = os.path.join(path, 'yt-dlp.conf') + args = Config.read_file(current_path, default=None) + if args is not None: + root.append_config(args, current_path, label=label) + return True return True def load_configs(): yield not ignore_config_files yield add_config('Portable', get_executable_path()) yield add_config('Home', expand_path(root.parse_known_args()[0].paths.get('home', '')).strip()) - yield add_config('User', None, user=True) - yield add_config('System', '/etc') + yield add_config('User', func=_read_user_conf) + yield add_config('System', func=_read_system_conf) opts = optparse.Values({'verbose': True, 'print_help': False}) try: diff --git a/yt_dlp/plugins.py b/yt_dlp/plugins.py new file mode 100644 index 000000000..7d2226d0f --- /dev/null +++ b/yt_dlp/plugins.py @@ -0,0 +1,171 @@ +import contextlib +import importlib +import importlib.abc +import importlib.machinery +import importlib.util +import inspect +import itertools +import os +import pkgutil +import sys +import traceback +import zipimport +from pathlib import Path +from zipfile import ZipFile + +from .compat import functools # isort: split +from .compat import compat_expanduser +from .utils import ( + get_executable_path, + get_system_config_dirs, + get_user_config_dirs, + write_string, +) + +PACKAGE_NAME = 'yt_dlp_plugins' +COMPAT_PACKAGE_NAME = 'ytdlp_plugins' + + +class PluginLoader(importlib.abc.Loader): + """Dummy loader for virtual namespace packages""" + + def exec_module(self, module): + return None + + +@functools.cache +def dirs_in_zip(archive): + with ZipFile(archive) as zip: + return set(itertools.chain.from_iterable( + Path(file).parents for file in zip.namelist())) + + +class PluginFinder(importlib.abc.MetaPathFinder): + """ + This class provides one or multiple namespace packages. + It searches in sys.path and yt-dlp config folders for + the existing subdirectories from which the modules can be imported + """ + + def __init__(self, *packages): + self._zip_content_cache = {} + self.packages = set(itertools.chain.from_iterable( + itertools.accumulate(name.split('.'), lambda a, b: '.'.join((a, b))) + for name in packages)) + + def search_locations(self, fullname): + candidate_locations = [] + + def _get_package_paths(*root_paths, containing_folder='plugins'): + for config_dir in map(Path, root_paths): + plugin_dir = config_dir / containing_folder + if not plugin_dir.is_dir(): + continue + yield from plugin_dir.iterdir() + + # Load from yt-dlp config folders + candidate_locations.extend(_get_package_paths( + *get_user_config_dirs('yt-dlp'), *get_system_config_dirs('yt-dlp'), + containing_folder='plugins')) + + # Load from yt-dlp-plugins folders + candidate_locations.extend(_get_package_paths( + get_executable_path(), + compat_expanduser('~'), + '/etc', + os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config'), + containing_folder='yt-dlp-plugins')) + + candidate_locations.extend(map(Path, sys.path)) # PYTHONPATH + + parts = Path(*fullname.split('.')) + locations = set() + for path in dict.fromkeys(candidate_locations): + candidate = path / parts + if candidate.is_dir(): + locations.add(str(candidate)) + elif path.name and any(path.with_suffix(suffix).is_file() for suffix in {'.zip', '.egg', '.whl'}): + with contextlib.suppress(FileNotFoundError): + if parts in dirs_in_zip(path): + locations.add(str(candidate)) + return locations + + def find_spec(self, fullname, path=None, target=None): + if fullname not in self.packages: + return None + + search_locations = self.search_locations(fullname) + if not search_locations: + return None + + spec = importlib.machinery.ModuleSpec(fullname, PluginLoader(), is_package=True) + spec.submodule_search_locations = search_locations + return spec + + def invalidate_caches(self): + dirs_in_zip.cache_clear() + for package in self.packages: + if package in sys.modules: + del sys.modules[package] + + +def directories(): + spec = importlib.util.find_spec(PACKAGE_NAME) + return spec.submodule_search_locations if spec else [] + + +def iter_modules(subpackage): + fullname = f'{PACKAGE_NAME}.{subpackage}' + with contextlib.suppress(ModuleNotFoundError): + pkg = importlib.import_module(fullname) + yield from pkgutil.iter_modules(path=pkg.__path__, prefix=f'{fullname}.') + + +def load_module(module, module_name, suffix): + return inspect.getmembers(module, lambda obj: ( + inspect.isclass(obj) + and obj.__name__.endswith(suffix) + and obj.__module__.startswith(module_name) + and not obj.__name__.startswith('_') + and obj.__name__ in getattr(module, '__all__', [obj.__name__]))) + + +def load_plugins(name, suffix): + classes = {} + + for finder, module_name, _ in iter_modules(name): + if any(x.startswith('_') for x in module_name.split('.')): + continue + try: + if sys.version_info < (3, 10) and isinstance(finder, zipimport.zipimporter): + # zipimporter.load_module() is deprecated in 3.10 and removed in 3.12 + # The exec_module branch below is the replacement for >= 3.10 + # See: https://docs.python.org/3/library/zipimport.html#zipimport.zipimporter.exec_module + module = finder.load_module(module_name) + else: + spec = finder.find_spec(module_name) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + except Exception: + write_string(f'Error while importing module {module_name!r}\n{traceback.format_exc(limit=-1)}') + continue + classes.update(load_module(module, module_name, suffix)) + + # Compat: old plugin system using __init__.py + # Note: plugins imported this way do not show up in directories() + # nor are considered part of the yt_dlp_plugins namespace package + with contextlib.suppress(FileNotFoundError): + spec = importlib.util.spec_from_file_location( + name, Path(get_executable_path(), COMPAT_PACKAGE_NAME, name, '__init__.py')) + plugins = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = plugins + spec.loader.exec_module(plugins) + classes.update(load_module(plugins, spec.name, suffix)) + + return classes + + +sys.meta_path.insert(0, PluginFinder(f'{PACKAGE_NAME}.extractor', f'{PACKAGE_NAME}.postprocessor')) + +__all__ = ['directories', 'load_plugins', 'PACKAGE_NAME', 'COMPAT_PACKAGE_NAME'] diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index f168be46a..bfe9df733 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -33,14 +33,15 @@ from .movefilesafterdownload import MoveFilesAfterDownloadPP from .sponskrub import SponSkrubPP from .sponsorblock import SponsorBlockPP from .xattrpp import XAttrMetadataPP -from ..utils import load_plugins +from ..plugins import load_plugins -_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals()) +_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP') def get_postprocessor(key): return globals()[key + 'PP'] +globals().update(_PLUGIN_CLASSES) __all__ = [name for name in globals().keys() if name.endswith('PP')] __all__.extend(('PostProcessor', 'FFmpegPostProcessor')) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ee5340cd2..32da598d0 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -18,7 +18,6 @@ import html.entities import html.parser import http.client import http.cookiejar -import importlib.util import inspect import io import itertools @@ -5372,22 +5371,37 @@ def get_executable_path(): return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1])) -def load_plugins(name, suffix, namespace): - classes = {} - with contextlib.suppress(FileNotFoundError): - plugins_spec = importlib.util.spec_from_file_location( - name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py')) - plugins = importlib.util.module_from_spec(plugins_spec) - sys.modules[plugins_spec.name] = plugins - plugins_spec.loader.exec_module(plugins) - for name in dir(plugins): - if name in namespace: - continue - if not name.endswith(suffix): - continue - klass = getattr(plugins, name) - classes[name] = namespace[name] = klass - return classes +def get_user_config_dirs(package_name): + locations = set() + + # .config (e.g. ~/.config/package_name) + xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config') + config_dir = os.path.join(xdg_config_home, package_name) + if os.path.isdir(config_dir): + locations.add(config_dir) + + # appdata (%APPDATA%/package_name) + appdata_dir = os.getenv('appdata') + if appdata_dir: + config_dir = os.path.join(appdata_dir, package_name) + if os.path.isdir(config_dir): + locations.add(config_dir) + + # home (~/.package_name) + user_config_directory = os.path.join(compat_expanduser('~'), '.%s' % package_name) + if os.path.isdir(user_config_directory): + locations.add(user_config_directory) + + return locations + + +def get_system_config_dirs(package_name): + locations = set() + # /etc/package_name + system_config_directory = os.path.join('/etc', package_name) + if os.path.isdir(system_config_directory): + locations.add(system_config_directory) + return locations def traverse_obj( @@ -6367,3 +6381,10 @@ class FormatSorter: # Deprecated has_certifi = bool(certifi) has_websockets = bool(websockets) + + +def load_plugins(name, suffix, namespace): + from .plugins import load_plugins + ret = load_plugins(name, suffix) + namespace.update(ret) + return ret diff --git a/ytdlp_plugins/extractor/__init__.py b/ytdlp_plugins/extractor/__init__.py deleted file mode 100644 index 3045a590b..000000000 --- a/ytdlp_plugins/extractor/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# flake8: noqa: F401 - -# ℹ️ The imported name must end in "IE" -from .sample import SamplePluginIE diff --git a/ytdlp_plugins/extractor/sample.py b/ytdlp_plugins/extractor/sample.py deleted file mode 100644 index a8bc455eb..000000000 --- a/ytdlp_plugins/extractor/sample.py +++ /dev/null @@ -1,14 +0,0 @@ -# ⚠ Don't use relative imports -from yt_dlp.extractor.common import InfoExtractor - - -# ℹ️ Instructions on making extractors can be found at: -# 🔗 https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-support-for-a-new-site - -class SamplePluginIE(InfoExtractor): - _WORKING = False - IE_DESC = False - _VALID_URL = r'^sampleplugin:' - - def _real_extract(self, url): - self.to_screen('URL "%s" successfully captured' % url) diff --git a/ytdlp_plugins/postprocessor/__init__.py b/ytdlp_plugins/postprocessor/__init__.py deleted file mode 100644 index 61099abbc..000000000 --- a/ytdlp_plugins/postprocessor/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# flake8: noqa: F401 - -# ℹ️ The imported name must end in "PP" and is the name to be used in --use-postprocessor -from .sample import SamplePluginPP diff --git a/ytdlp_plugins/postprocessor/sample.py b/ytdlp_plugins/postprocessor/sample.py deleted file mode 100644 index 4563e1c11..000000000 --- a/ytdlp_plugins/postprocessor/sample.py +++ /dev/null @@ -1,26 +0,0 @@ -# ⚠ Don't use relative imports -from yt_dlp.postprocessor.common import PostProcessor - - -# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor -class SamplePluginPP(PostProcessor): - def __init__(self, downloader=None, **kwargs): - # ⚠ Only kwargs can be passed from the CLI, and all argument values will be string - # Also, "downloader", "when" and "key" are reserved names - super().__init__(downloader) - self._kwargs = kwargs - - # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run - def run(self, info): - if info.get('_type', 'video') != 'video': # PP was called for playlist - self.to_screen(f'Post-processing playlist {info.get("id")!r} with {self._kwargs}') - elif info.get('filepath'): # PP was called after download (default) - filepath = info.get('filepath') - self.to_screen(f'Post-processed {filepath!r} with {self._kwargs}') - elif info.get('requested_downloads'): # PP was called after_video - filepaths = [f.get('filepath') for f in info.get('requested_downloads')] - self.to_screen(f'Post-processed {filepaths!r} with {self._kwargs}') - else: # PP was called before actual download - filepath = info.get('_filename') - self.to_screen(f'Pre-processed {filepath!r} with {self._kwargs}') - return [], info # return list_of_files_to_delete, info_dict From 3e01ce744a981d8f19ae77ec695005e7000f4703 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sun, 1 Jan 2023 18:40:26 +1300 Subject: [PATCH 1873/2552] [extractor/generic] Use `Accept-Encoding: identity` for initial request The existing comment seems to imply this was the desired behavior from the beginning. Partial fix for https://github.com/yt-dlp/yt-dlp/issues/5855, https://github.com/yt-dlp/yt-dlp/issues/5851, https://github.com/yt-dlp/yt-dlp/issues/4748 --- yt_dlp/extractor/generic.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2281c71f3..ffc279023 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2154,6 +2154,21 @@ class GenericIE(InfoExtractor): 'age_limit': 0, 'direct': True, } + }, { + 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.', + 'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'info_dict': { + 'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'ext': 'mp4', + 'title': 'čauky lidi 70 finall', + 'description': 'čauky lidi 70 finall', + 'thumbnail': 'h', + 'upload_date': '20220606', + 'timestamp': 1654513791, + 'duration': 318.0, + 'direct': True, + 'age_limit': 0, + } } ] @@ -2312,7 +2327,7 @@ class GenericIE(InfoExtractor): # It may probably better to solve this by checking Content-Type for application/octet-stream # after a HEAD request, but not sure if we can rely on this. full_response = self._request_webpage(url, video_id, headers={ - 'Accept-Encoding': '*', + 'Accept-Encoding': 'identity', **smuggled_data.get('http_headers', {}) }) new_url = full_response.geturl() From 1cdda3299810b86206853a22e680758eadcc4e05 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 14:11:14 +0530 Subject: [PATCH 1874/2552] [utils] `get_exe_version`: Detect broken executables Authored by: dirkf, pukkandan Closes #5561 --- yt_dlp/utils.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 32da598d0..5af176b36 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2720,8 +2720,10 @@ def _get_exe_version_output(exe, args): # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656 - stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True, - stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + stdout, _, ret = Popen.run([encodeArgument(exe)] + args, text=True, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + if ret: + return None except OSError: return False return stdout @@ -2739,11 +2741,15 @@ def detect_exe_version(output, version_re=None, unrecognized='present'): def get_exe_version(exe, args=['--version'], - version_re=None, unrecognized='present'): + version_re=None, unrecognized=('present', 'broken')): """ Returns the version of the specified executable, or False if the executable is not present """ + unrecognized = variadic(unrecognized) + assert len(unrecognized) in (1, 2) out = _get_exe_version_output(exe, args) - return detect_exe_version(out, version_re, unrecognized) if out else False + if out is None: + return unrecognized[-1] + return out and detect_exe_version(out, version_re, unrecognized[0]) def frange(start=0, stop=None, step=1): From 88fb9425775da7f92d24e8b5f3009cafb56e94d6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 13:32:05 +0530 Subject: [PATCH 1875/2552] Add message when there are no subtitles/thumbnails Closes #5551 --- yt_dlp/YoutubeDL.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9ef56a46b..866d069b7 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3930,7 +3930,7 @@ class YoutubeDL: elif not self.params.get('overwrites', True) and os.path.exists(descfn): self.to_screen(f'[info] {label.title()} description is already present') elif ie_result.get('description') is None: - self.report_warning(f'There\'s no {label} description to write') + self.to_screen(f'[info] There\'s no {label} description to write') return False else: try: @@ -3946,15 +3946,18 @@ class YoutubeDL: ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' ret = [] subtitles = info_dict.get('requested_subtitles') - if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE return ret - + elif not subtitles: + self.to_screen('[info] There\'s no subtitles for the requested languages') + return ret sub_filename_base = self.prepare_filename(info_dict, 'subtitle') if not sub_filename_base: self.to_screen('[info] Skipping writing video subtitles') return ret + for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) @@ -4001,6 +4004,9 @@ class YoutubeDL: thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') or [] + if not thumbnails: + self.to_screen(f'[info] There\'s no {label} thumbnails to download') + return ret multiple = write_all and len(thumbnails) > 1 if thumb_filename_base is None: From 2a06bb4eb671eb306a2687ef0a4f853b936f05e0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 13:42:43 +0530 Subject: [PATCH 1876/2552] Add `--compat-options 2021,2022` Use these to guard against future compat changes. This allows devs to change defaults and make other potentially breaking changes more easily. If you need everything to work exactly as-is, put this in your config --- README.md | 2 ++ yt_dlp/options.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index 4294090dc..f6bf1175e 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,8 @@ For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options (Do NOT use) * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` +* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` +* `--compat-options 2022`: Currently does nothing. Use this to enable all future compat options # INSTALLATION diff --git a/yt_dlp/options.py b/yt_dlp/options.py index be4695cbb..e9766c02d 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -470,6 +470,8 @@ def create_parser(): }, 'aliases': { 'youtube-dl': ['all', '-multistreams'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'], + '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], + '2022': [], } }, help=( 'Options that can help keep compatibility with youtube-dl or youtube-dlc ' From 78d25e0b7c2b45597e193c0decb33f4f248502a9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 14:10:51 +0530 Subject: [PATCH 1877/2552] [extractor/embedly] Handle vimeo embeds Closes #3360 --- yt_dlp/extractor/embedly.py | 62 +++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/embedly.py b/yt_dlp/extractor/embedly.py index 483d018bb..db5ef055e 100644 --- a/yt_dlp/extractor/embedly.py +++ b/yt_dlp/extractor/embedly.py @@ -1,13 +1,63 @@ import re import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from .youtube import YoutubeTabIE +from ..utils import parse_qs, smuggle_url, traverse_obj class EmbedlyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P[^#&]+)' + _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?(?:src|url)=(?:[^#&]+)' _TESTS = [{ 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'info_dict': { + 'id': 'UUGLim4T2loE5rwCMdpCIPVg', + 'modified_date': '20221225', + 'view_count': int, + 'uploader_url': 'https://www.youtube.com/@TraciHinesMusic', + 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'uploader': 'TraciJHines', + 'channel_url': 'https://www.youtube.com/@TraciHinesMusic', + 'channel': 'TraciJHines', + 'availability': 'public', + 'uploader_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'description': '', + 'tags': [], + 'title': 'Uploads from TraciJHines', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'params': {'noplaylist': True}, + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'age_limit': 0, + 'categories': ['Entertainment'], + 'thumbnail': 'https://i.ytimg.com/vi_webp/SU4fj_aEMVw/maxresdefault.webp', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'channel': 'TraciJHines', + 'uploader_id': 'TraciJHines', + 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg', + 'uploader_url': 'http://www.youtube.com/user/TraciJHines', + 'upload_date': '20150211', + 'duration': 282, + 'availability': 'public', + 'channel_follower_count': int, + 'tags': 'count:39', + 'view_count': int, + 'comment_count': int, + 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'like_count': int, + 'uploader': 'TraciJHines', + 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364', + 'chapters': list, + + }, + }, { + 'url': 'https://cdn.embedly.com/widgets/media.html?src=https://player.vimeo.com/video/1234567?h=abcdefgh', 'only_matching': True, }] @@ -21,4 +71,10 @@ class EmbedlyIE(InfoExtractor): yield urllib.parse.unquote(mobj.group('url')) def _real_extract(self, url): - return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) + qs = parse_qs(url) + src = urllib.parse.unquote(traverse_obj(qs, ('url', 0)) or '') + if src and YoutubeTabIE.suitable(src): + return self.url_result(src, YoutubeTabIE) + return self.url_result(smuggle_url( + urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))), + {'http_headers': {'Referer': url}})) From 26fdfc3704a278acada27cc420d67c6d3f71423b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 14:39:58 +0530 Subject: [PATCH 1878/2552] [extractor/biliintl:series] Make partial download of series faster --- yt_dlp/extractor/bilibili.py | 51 +++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 92620f697..3274a427d 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -20,9 +20,11 @@ from ..utils import ( parse_count, parse_qs, qualities, + smuggle_url, srt_subtitles_timecode, str_or_none, traverse_obj, + unsmuggle_url, url_or_none, urlencode_postdata, ) @@ -881,16 +883,12 @@ class BiliIntlBaseIE(InfoExtractor): return formats - def _extract_video_info(self, video_data, *, ep_id=None, aid=None): + def _parse_video_metadata(self, video_data): return { - 'id': ep_id or aid, 'title': video_data.get('title_display') or video_data.get('title'), 'thumbnail': video_data.get('cover'), 'episode_number': int_or_none(self._search_regex( r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)), - 'formats': self._get_formats(ep_id=ep_id, aid=aid), - 'subtitles': self._get_subtitles(ep_id=ep_id, aid=aid), - 'extractor_key': BiliIntlIE.ie_key(), } def _perform_login(self, username, password): @@ -975,9 +973,16 @@ class BiliIntlIE(BiliIntlBaseIE): 'only_matching': True, }] - def _real_extract(self, url): - season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') - video_id = ep_id or aid + def _make_url(video_id, series_id=None): + if series_id: + return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}' + return f'https://www.bilibili.tv/en/video/{video_id}' + + def _extract_video_metadata(self, url, video_id, season_id): + url, smuggled_data = unsmuggle_url(url, {}) + if smuggled_data.get('title'): + return smuggled_data + webpage = self._download_webpage(url, video_id) # Bstation layout initial_data = ( @@ -989,13 +994,26 @@ class BiliIntlIE(BiliIntlBaseIE): if season_id and not video_data: # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) - video_data = traverse_obj(season_json, - ('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), - expected_type=dict, get_all=False) - return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid) + video_data = traverse_obj(season_json, ( + 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id + ), expected_type=dict, get_all=False) + + return self._parse_video_metadata(video_data) + + def _real_extract(self, url): + season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') + video_id = ep_id or aid + + return { + 'id': video_id, + **self._extract_video_metadata(url, video_id, season_id), + 'formats': self._get_formats(ep_id=ep_id, aid=aid), + 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid), + } class BiliIntlSeriesIE(BiliIntlBaseIE): + IE_NAME = 'biliintl:series' _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?play/(?P\d+)/?(?:[?#]|$)' _TESTS = [{ 'url': 'https://www.bilibili.tv/en/play/34613', @@ -1021,9 +1039,12 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): def _entries(self, series_id): series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id) - for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]): - episode_id = str(episode.get('episode_id')) - yield self._extract_video_info(episode, ep_id=episode_id) + for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict): + episode_id = str(episode['episode_id']) + yield self.url_result(smuggle_url( + BiliIntlIE._make_url(episode_id, series_id), + self._parse_video_metadata(episode) + ), BiliIntlIE, episode_id) def _real_extract(self, url): series_id = self._match_id(url) From 193fb150b76c4aaf41fb2c98b073e7e1f8a108f0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 17:01:48 +0530 Subject: [PATCH 1879/2552] Fix bug in 119e40ef64b25f66a39246e87ce6c143cd34276d --- yt_dlp/YoutubeDL.py | 3 ++- yt_dlp/__init__.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 866d069b7..8ce71a2dc 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3460,7 +3460,8 @@ class YoutubeDL: return infodict def run_all_pps(self, key, info, *, additional_pps=None): - self._forceprint(key, info) + if key != 'video': + self._forceprint(key, info) for pp in (additional_pps or []) + self._pps[key]: info = self.run_pp(pp, info) return info diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 3490816c4..9cb132410 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -703,7 +703,7 @@ def parse_options(argv=None): postprocessors = list(get_postprocessors(opts)) - print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[2:]) + print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' From 8c53322cda75394a8d551dde20b2529ee5ad6e89 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Mon, 2 Jan 2023 02:16:25 +0900 Subject: [PATCH 1880/2552] [downloader/aria2c] Native progress for aria2c via RPC (#3724) Authored by: Lesmiscore, pukkandan Closes #2038 --- README.md | 3 +- yt_dlp/downloader/external.py | 109 ++++++++++++++++++++++++++++++++-- yt_dlp/options.py | 6 +- yt_dlp/utils.py | 9 +++ 4 files changed, 119 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index f6bf1175e..83e69a236 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior +* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: `aria2c`). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is For ease of use, a few more compat options are available: @@ -160,7 +161,7 @@ For ease of use, a few more compat options are available: * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` -* `--compat-options 2022`: Currently does nothing. Use this to enable all future compat options +* `--compat-options 2022`: Same as `--compat-options no-external-downloader-progress`. Use this to enable all future compat options # INSTALLATION diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 575138371..569839f6f 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,9 +1,11 @@ import enum +import json import os.path import re import subprocess import sys import time +import uuid from .fragment import FragmentFD from ..compat import functools @@ -20,8 +22,10 @@ from ..utils import ( determine_ext, encodeArgument, encodeFilename, + find_available_port, handle_youtubedl_headers, remove_end, + sanitized_Request, traverse_obj, ) @@ -60,7 +64,6 @@ class ExternalFD(FragmentFD): } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -129,8 +132,7 @@ class ExternalFD(FragmentFD): self._debug_cmd(cmd) if 'fragments' not in info_dict: - _, stderr, returncode = Popen.run( - cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) + _, stderr, returncode = self._call_process(cmd, info_dict) if returncode and stderr: self.to_stderr(stderr) return returncode @@ -140,7 +142,7 @@ class ExternalFD(FragmentFD): retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=None, fatal=not skip_unavailable_fragments) for retry in retry_manager: - _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE) + _, stderr, returncode = self._call_process(cmd, info_dict) if not returncode: break # TODO: Decide whether to retry based on error code @@ -172,6 +174,9 @@ class ExternalFD(FragmentFD): self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) return 0 + def _call_process(self, cmd, info_dict): + return Popen.run(cmd, text=True, stderr=subprocess.PIPE) + class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' @@ -256,6 +261,14 @@ class Aria2cFD(ExternalFD): def _aria2c_filename(fn): return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' + def _call_downloader(self, tmpfilename, info_dict): + if 'no-external-downloader-progress' not in self.params.get('compat_opts', []): + info_dict['__rpc'] = { + 'port': find_available_port() or 19190, + 'secret': str(uuid.uuid4()), + } + return super()._call_downloader(tmpfilename, info_dict) + def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', @@ -276,6 +289,12 @@ class Aria2cFD(ExternalFD): cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() + if '__rpc' in info_dict: + cmd += [ + '--enable-rpc', + f'--rpc-listen-port={info_dict["__rpc"]["port"]}', + f'--rpc-secret={info_dict["__rpc"]["secret"]}'] + # aria2c strips out spaces from the beginning/end of filenames and paths. # We work around this issue by adding a "./" to the beginning of the # filename and relative path, and adding a "/" at the end of the path. @@ -304,6 +323,88 @@ class Aria2cFD(ExternalFD): cmd += ['--', info_dict['url']] return cmd + def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): + # Does not actually need to be UUID, just unique + sanitycheck = str(uuid.uuid4()) + d = json.dumps({ + 'jsonrpc': '2.0', + 'id': sanitycheck, + 'method': method, + 'params': [f'token:{rpc_secret}', *params], + }).encode('utf-8') + request = sanitized_Request( + f'http://localhost:{rpc_port}/jsonrpc', + data=d, headers={ + 'Content-Type': 'application/json', + 'Content-Length': f'{len(d)}', + 'Ytdl-request-proxy': '__noproxy__', + }) + with self.ydl.urlopen(request) as r: + resp = json.load(r) + assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' + return resp['result'] + + def _call_process(self, cmd, info_dict): + if '__rpc' not in info_dict: + return super()._call_process(cmd, info_dict) + + send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret']) + started = time.time() + + fragmented = 'fragments' in info_dict + frag_count = len(info_dict['fragments']) if fragmented else 1 + status = { + 'filename': info_dict.get('_filename'), + 'status': 'downloading', + 'elapsed': 0, + 'downloaded_bytes': 0, + 'fragment_count': frag_count if fragmented else None, + 'fragment_index': 0 if fragmented else None, + } + self._hook_progress(status, info_dict) + + def get_stat(key, *obj, average=False): + val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0] + return sum(val) / (len(val) if average else 1) + + with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p: + # Add a small sleep so that RPC client can receive response, + # or the connection stalls infinitely + time.sleep(0.2) + retval = p.poll() + while retval is None: + # We don't use tellStatus as we won't know the GID without reading stdout + # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive + active = send_rpc('aria2.tellActive') + completed = send_rpc('aria2.tellStopped', [0, frag_count]) + + downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active) + speed = get_stat('downloadSpeed', active) + total = frag_count * get_stat('totalLength', active, completed, average=True) + if total < downloaded: + total = None + + status.update({ + 'downloaded_bytes': int(downloaded), + 'speed': speed, + 'total_bytes': None if fragmented else total, + 'total_bytes_estimate': total, + 'eta': (total - downloaded) / (speed or 1), + 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, + 'elapsed': time.time() - started + }) + self._hook_progress(status, info_dict) + + if not active and len(completed) >= frag_count: + send_rpc('aria2.shutdown') + retval = p.wait() + break + + time.sleep(0.1) + retval = p.poll() + + return '', p.stderr.read(), retval + class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' diff --git a/yt_dlp/options.py b/yt_dlp/options.py index e9766c02d..5bbb292de 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -464,14 +464,14 @@ def create_parser(): 'allowed_values': { 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', - 'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley', - 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', + 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress', + 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', }, 'aliases': { 'youtube-dl': ['all', '-multistreams'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'], '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], - '2022': [], + '2022': ['no-external-downloader-progress'], } }, help=( 'Options that can help keep compatibility with youtube-dl or youtube-dlc ' diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 5af176b36..45a7e6eaa 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5243,6 +5243,15 @@ def random_birthday(year_field, month_field, day_field): } +def find_available_port(interface=''): + try: + with socket.socket() as sock: + sock.bind((interface, 0)) + return sock.getsockname()[1] + except OSError: + return None + + # Templates for internet shortcut files, which are plain text files. DOT_URL_LINK_TEMPLATE = '''\ [InternetShortcut] From e756f45ba0648f972be71ce328419a623e381028 Mon Sep 17 00:00:00 2001 From: Matthew Date: Mon, 2 Jan 2023 04:55:11 +0000 Subject: [PATCH 1881/2552] Improve handling for overriding extractors with plugins (#5916) * Extractors replaced with plugin extractors now show in debug output * Better testcase handling * Added documentation Authored by: coletdjnz, pukkandan --- README.md | 9 ++++++--- yt_dlp/YoutubeDL.py | 22 +++++++++++++++------- yt_dlp/extractor/common.py | 13 +++++++++++-- yt_dlp/extractor/extractors.py | 2 ++ yt_dlp/extractor/testurl.py | 11 ++++++----- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 83e69a236..c4bd6ef0c 100644 --- a/README.md +++ b/README.md @@ -1841,7 +1841,7 @@ Plugins can be installed using various methods and locations. * Source: where `/yt_dlp/__main__.py`, `/yt-dlp-plugins//yt_dlp_plugins/` 3. **pip and other locations in `PYTHONPATH`** - * Plugin packages can be installed and managed using `pip`. See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. + * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. * Note: plugin files between plugin packages installed with pip must have unique filenames * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. * Note: This does not apply for Pyinstaller/py2exe builds. @@ -1854,9 +1854,12 @@ Run yt-dlp with `--verbose`/`-v` to check if the plugin has been loaded. ## Developing Plugins -See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. +See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. -All public classes with a name ending in `IE` are imported from each file. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`) +All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`) + +To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). +Due to the mechanics behind this, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above. If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8ce71a2dc..e7b469059 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -33,7 +33,7 @@ from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text from .plugins import directories as plugin_directories -from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors +from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import ( EmbedThumbnailPP, FFmpegFixupDuplicateMoovPP, @@ -3730,7 +3730,10 @@ class YoutubeDL: # These imports can be slow. So import them only as needed from .extractor.extractors import _LAZY_LOADER - from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors + from .extractor.extractors import ( + _PLUGIN_CLASSES as plugin_ies, + _PLUGIN_OVERRIDES as plugin_ie_overrides + ) def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) @@ -3808,12 +3811,17 @@ class YoutubeDL: proxy_map.update(handler.proxies) write_debug(f'Proxy map: {proxy_map}') - for plugin_type, plugins in {'Extractor': plugin_extractors, 'Post-Processor': plugin_postprocessors}.items(): - if not plugins: - continue - write_debug(f'{plugin_type} Plugins: %s' % (', '.join(sorted(('%s%s' % ( + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): + display_list = ['%s%s' % ( klass.__name__, '' if klass.__name__ == name else f' as {name}') - for name, klass in plugins.items()))))) + for name, klass in plugins.items()] + if plugin_type == 'Extractor': + display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})' + for parent, plugins in plugin_ie_overrides.items()) + if not display_list: + continue + write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}') + plugin_dirs = plugin_directories() if plugin_dirs: write_debug(f'Plugin directories: {plugin_dirs}') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 9031f3c11..f48b97a6b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3442,13 +3442,17 @@ class InfoExtractor: continue t['name'] = cls.ie_key() yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_testcases(include_onlymatching) @classmethod def get_webpage_testcases(cls): tests = vars(cls).get('_WEBPAGE_TESTS', []) for t in tests: t['name'] = cls.ie_key() - return tests + yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_webpage_testcases() @classproperty(cache=True) def age_limit(cls): @@ -3710,10 +3714,12 @@ class InfoExtractor: if plugin_name: mro = inspect.getmro(cls) super_class = cls.__wrapped__ = mro[mro.index(cls) + 1] - cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key + cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key + cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}' while getattr(super_class, '__wrapped__', None): super_class = super_class.__wrapped__ setattr(sys.modules[super_class.__module__], super_class.__name__, cls) + _PLUGIN_OVERRIDES[super_class].append(cls) return super().__init_subclass__(**kwargs) @@ -3770,3 +3776,6 @@ class UnsupportedURLIE(InfoExtractor): def _real_extract(self, url): raise UnsupportedError(url) + + +_PLUGIN_OVERRIDES = collections.defaultdict(list) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index beda02917..baa69d242 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -24,3 +24,5 @@ if not _LAZY_LOADER: globals().update(_PLUGIN_CLASSES) _ALL_CLASSES[:0] = _PLUGIN_CLASSES.values() + +from .common import _PLUGIN_OVERRIDES # noqa: F401 diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index dccca1004..0da01aa53 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -23,11 +23,12 @@ class TestURLIE(InfoExtractor): if len(matching_extractors) == 0: raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True) elif len(matching_extractors) > 1: - try: # Check for exact match - extractor = next( - ie for ie in matching_extractors - if ie.IE_NAME.lower() == extractor_id.lower()) - except StopIteration: + extractor = next(( # Check for exact match + ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower() + ), None) or next(( # Check for exact match without plugin suffix + ie for ie in matching_extractors if ie.IE_NAME.split('+')[0].lower() == extractor_id.lower() + ), None) + if not extractor: raise ExtractorError( 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors), expected=True) From b23b503e22ff577d23920e877ee73da478bb4c6f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 2 Jan 2023 05:44:54 +0000 Subject: [PATCH 1882/2552] [extractor/odnoklassniki] Extract subtitles (#5920) Closes #5744 Authored by: bashonly --- yt_dlp/extractor/odnoklassniki.py | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 4f325f087..4b73eed37 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -11,6 +11,7 @@ from ..utils import ( int_or_none, qualities, smuggle_url, + traverse_obj, unescapeHTML, unified_strdate, unsmuggle_url, @@ -153,6 +154,26 @@ class OdnoklassnikiIE(InfoExtractor): 'title': 'Быковское крещение', 'duration': 3038.181, }, + 'skip': 'HTTP Error 400', + }, { + 'note': 'subtitles', + 'url': 'https://ok.ru/video/4249587550747', + 'info_dict': { + 'id': '4249587550747', + 'ext': 'mp4', + 'title': 'Small Country An African Childhood (2020) (1080p) +subtitle', + 'uploader': 'Sunflower Movies', + 'uploader_id': '595802161179', + 'upload_date': '20220816', + 'duration': 6728, + 'age_limit': 0, + 'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+', + 'like_count': int, + 'subtitles': dict, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, @@ -202,6 +223,7 @@ class OdnoklassnikiIE(InfoExtractor): 'like_count': 0, 'duration': 10444, }, + 'skip': 'Site no longer embeds', }] @classmethod @@ -294,6 +316,16 @@ class OdnoklassnikiIE(InfoExtractor): like_count = int_or_none(metadata.get('likeCount')) + subtitles = {} + for sub in traverse_obj(metadata, ('movie', 'subtitleTracks', ...), expected_type=dict): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles.setdefault(sub.get('language') or 'en', []).append({ + 'url': sub_url, + 'ext': 'vtt', + }) + info = { 'id': video_id, 'title': title, @@ -305,6 +337,7 @@ class OdnoklassnikiIE(InfoExtractor): 'like_count': like_count, 'age_limit': age_limit, 'start_time': start_time, + 'subtitles': subtitles, } # pladform From 13f930abc0c91d8e50336488e4c55defe97aa588 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 2 Jan 2023 05:46:06 +0000 Subject: [PATCH 1883/2552] [extractor/fifa] Fix Preplay extraction (#5921) Closes #5839 Authored by: dirkf --- yt_dlp/extractor/fifa.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index dc00edcb3..8b4db3a8a 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -17,8 +17,10 @@ class FifaIE(InfoExtractor): 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', 'ext': 'mp4', 'categories': ['FIFA Tournaments'], - 'thumbnail': 'https://digitalhub.fifa.com/transform/fa6f0b3e-a2e9-4cf7-9f32-53c57bcb7360/2006_Final_ITA_FRA', + 'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero', 'duration': 8165, + 'release_timestamp': 1152403200, + 'release_date': '20060709', }, 'params': {'skip_download': 'm3u8'}, }, { @@ -54,7 +56,7 @@ class FifaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) preconnect_link = self._search_regex( - r']+rel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') + r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') video_details = self._download_json( f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) @@ -62,22 +64,9 @@ class FifaIE(InfoExtractor): preplay_parameters = self._download_json( f'{preconnect_link}/videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters'] - cid = preplay_parameters['contentId'] content_data = self._download_json( - f'https://content.uplynk.com/preplay/{cid}/multiple.json', video_id, 'Downloading Content Data', query={ - 'v': preplay_parameters['preplayAPIVersion'], - 'tc': preplay_parameters['tokenCheckAlgorithmVersion'], - 'rn': preplay_parameters['randomNumber'], - 'exp': preplay_parameters['tokenExpirationDate'], - 'ct': preplay_parameters['contentType'], - 'cid': cid, - 'mbtracks': preplay_parameters['tracksAssetNumber'], - 'ad': preplay_parameters['adConfiguration'], - 'ad.preroll': int(preplay_parameters['adPreroll']), - 'ad.cmsid': preplay_parameters['adCMSSourceId'], - 'ad.vid': preplay_parameters['adSourceVideoID'], - 'sig': preplay_parameters['signature'], - }) + 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters), + video_id, 'Downloading Content Data') formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) From d7f98714696a4c9691ed28fb9b63395b9227646a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 2 Jan 2023 05:50:37 +0000 Subject: [PATCH 1884/2552] [extractor/iqiyi] Fix `Iq` JS regex (#5922) Closes #5702 Authored by: bashonly --- yt_dlp/extractor/iqiyi.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index dbc688fb9..eba89f787 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -527,11 +527,14 @@ class IqIE(InfoExtractor): webpack_js_url = self._proto_relative_url(self._search_regex( r').*?\.setup\s*\((?P[^)]+)\)', + r'''(?s)jwplayer\s*\(\s*(?P'|")(?!(?P=q)).+(?P=q)\s*\)(?!).*?\.\s*setup\s*\(\s*(?P(?:\([^)]*\)|[^)])+)\s*\)''', webpage) if mobj: try: @@ -3237,19 +3243,20 @@ class InfoExtractor: def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - # JWPlayer backward compatibility: flattened playlists - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 - if 'playlist' not in jwplayer_data: - jwplayer_data = {'playlist': [jwplayer_data]} - entries = [] + if not isinstance(jwplayer_data, dict): + return entries - # JWPlayer backward compatibility: single playlist item + playlist_items = jwplayer_data.get('playlist') + # JWPlayer backward compatibility: single playlist item/flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 - if not isinstance(jwplayer_data['playlist'], list): - jwplayer_data['playlist'] = [jwplayer_data['playlist']] + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 + if not isinstance(playlist_items, list): + playlist_items = (playlist_items or jwplayer_data, ) - for video_data in jwplayer_data['playlist']: + for video_data in playlist_items: + if not isinstance(video_data, dict): + continue # JWPlayer backward compatibility: flattened sources # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 if 'sources' not in video_data: @@ -3287,6 +3294,13 @@ class InfoExtractor: 'timestamp': int_or_none(video_data.get('pubdate')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'subtitles': subtitles, + 'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ... + 'genre': clean_html(video_data.get('genre')), + 'channel': clean_html(dict_get(video_data, ('category', 'channel'))), + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'release_year': int_or_none(video_data.get('releasedate')), + 'age_limit': int_or_none(video_data.get('age_restriction')), } # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32 if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']): @@ -3304,7 +3318,7 @@ class InfoExtractor: def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - urls = [] + urls = set() formats = [] for source in jwplayer_sources_data: if not isinstance(source, dict): @@ -3313,14 +3327,14 @@ class InfoExtractor: base_url, self._proto_relative_url(source.get('file'))) if not source_url or source_url in urls: continue - urls.append(source_url) + urls.add(source_url) source_type = source.get('type') or '' ext = mimetype2ext(source_type) or determine_ext(source_url) - if source_type == 'hls' or ext == 'm3u8': + if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url: formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif source_type == 'dash' or ext == 'mpd': + elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url: formats.extend(self._extract_mpd_formats( source_url, video_id, mpd_id=mpd_id, fatal=False)) elif ext == 'smil': @@ -3335,13 +3349,12 @@ class InfoExtractor: 'ext': ext, }) else: + format_id = str_or_none(source.get('label')) height = int_or_none(source.get('height')) - if height is None: + if height is None and format_id: # Often no height is provided but there is a label in # format like "1080p", "720p SD", or 1080. - height = int_or_none(self._search_regex( - r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''), - 'height', default=None)) + height = parse_resolution(format_id).get('height') a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), @@ -3349,6 +3362,7 @@ class InfoExtractor: 'tbr': int_or_none(source.get('bitrate'), scale=1000), 'filesize': int_or_none(source.get('filesize')), 'ext': ext, + 'format_id': format_id } if source_url.startswith('rtmp'): a_format['ext'] = 'flv' diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index ffc279023..14d492f07 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -32,6 +32,7 @@ from ..utils import ( unified_timestamp, unsmuggle_url, url_or_none, + urljoin, variadic, xpath_attr, xpath_text, @@ -1867,11 +1868,13 @@ class GenericIE(InfoExtractor): 'display_id': 'kelis-4th-of-july', 'ext': 'mp4', 'title': 'Kelis - 4th Of July', - 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + 'description': 'Kelis - 4th Of July', + 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', }, 'params': { 'skip_download': True, }, + 'expected_warnings': ['Untested major version'], }, { # KVS Player 'url': 'https://www.kvs-demo.com/embed/105/', @@ -1880,35 +1883,12 @@ class GenericIE(InfoExtractor): 'display_id': 'kelis-4th-of-july', 'ext': 'mp4', 'title': 'Kelis - 4th Of July / Embed Player', - 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', }, 'params': { 'skip_download': True, }, }, { - # KVS Player - 'url': 'https://thisvid.com/videos/french-boy-pantsed/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'display_id': 'french-boy-pantsed', - 'ext': 'mp4', - 'title': 'French Boy Pantsed - ThisVid.com', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - } - }, { - # KVS Player - 'url': 'https://thisvid.com/embed/2400174/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'display_id': 'french-boy-pantsed', - 'ext': 'mp4', - 'title': 'French Boy Pantsed - ThisVid.com', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - } - }, { - # KVS Player 'url': 'https://youix.com/video/leningrad-zoj/', 'md5': '94f96ba95706dc3880812b27b7d8a2b8', 'info_dict': { @@ -1916,8 +1896,8 @@ class GenericIE(InfoExtractor): 'display_id': 'leningrad-zoj', 'ext': 'mp4', 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com', - 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', - } + 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', + }, }, { # KVS Player 'url': 'https://youix.com/embed/18485', @@ -1927,19 +1907,20 @@ class GenericIE(InfoExtractor): 'display_id': 'leningrad-zoj', 'ext': 'mp4', 'title': 'Ленинград - ЗОЖ', - 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', - } + 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', + }, }, { # KVS Player 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/', 'md5': '94166bdb26b4cb1fb9214319a629fc51', 'info_dict': { 'id': '21217', - 'display_id': '40-nochey-40-nights-2016', + 'display_id': '40-nochey-2016', 'ext': 'mp4', 'title': '40 ночей (2016) - BogMedia.org', + 'description': 'md5:4e6d7d622636eb7948275432eb256dc3', 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', - } + }, }, { # KVS Player (for sites that serve kt_player.js via non-https urls) @@ -1949,9 +1930,9 @@ class GenericIE(InfoExtractor): 'id': '389508', 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source', 'ext': 'mp4', - 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', - 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg', - } + 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', + 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg', + }, }, { # Reddit-hosted video that will redirect and be processed by RedditIE @@ -2169,7 +2150,20 @@ class GenericIE(InfoExtractor): 'direct': True, 'age_limit': 0, } - } + }, + { + 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/', + 'md5': 'e2f0a4c329f7986280b7328e24036d60', + 'info_dict': { + 'id': '284002', + 'display_id': 'just-out-of-the-shower-joi', + 'ext': 'mp4', + 'title': 'Just Out Of The Shower JOI - Shooshtime', + 'thumbnail': 'https://i.shoosh.co/contents/videos_screenshots/284000/284002/preview.mp4.jpg', + 'height': 720, + 'age_limit': 18, + }, + }, ] def report_following_redirect(self, new_url): @@ -2235,43 +2229,87 @@ class GenericIE(InfoExtractor): 'entries': entries, } - def _kvs_getrealurl(self, video_url, license_code): + @classmethod + def _kvs_get_real_url(cls, video_url, license_code): if not video_url.startswith('function/0/'): return video_url # not obfuscated - url_path, _, url_query = video_url.partition('?') - urlparts = url_path.split('/')[2:] - license = self._kvs_getlicensetoken(license_code) - newmagic = urlparts[5][:32] + parsed = urllib.parse.urlparse(video_url[len('function/0/'):]) + license = cls._kvs_get_license_token(license_code) + urlparts = parsed.path.split('/') - for o in range(len(newmagic) - 1, -1, -1): - new = '' - l = (o + sum(int(n) for n in license[o:])) % 32 + HASH_LENGTH = 32 + hash = urlparts[3][:HASH_LENGTH] + indices = list(range(HASH_LENGTH)) - for i in range(0, len(newmagic)): - if i == o: - new += newmagic[l] - elif i == l: - new += newmagic[o] - else: - new += newmagic[i] - newmagic = new + # Swap indices of hash according to the destination calculated from the license token + accum = 0 + for src in reversed(range(HASH_LENGTH)): + accum += license[src] + dest = (src + accum) % HASH_LENGTH + indices[src], indices[dest] = indices[dest], indices[src] + + urlparts[3] = ''.join(hash[index] for index in indices) + urlparts[3][HASH_LENGTH:] + return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts))) - urlparts[5] = newmagic + urlparts[5][32:] - return '/'.join(urlparts) + '?' + url_query + @staticmethod + def _kvs_get_license_token(license): + license = license.replace('$', '') + license_values = [int(char) for char in license] - def _kvs_getlicensetoken(self, license): - modlicense = license.replace('$', '').replace('0', '1') - center = int(len(modlicense) / 2) + modlicense = license.replace('0', '1') + center = len(modlicense) // 2 fronthalf = int(modlicense[:center + 1]) backhalf = int(modlicense[center:]) + modlicense = str(4 * abs(fronthalf - backhalf))[:center + 1] + + return [ + (license_values[index + offset] + current) % 10 + for index, current in enumerate(map(int, modlicense)) + for offset in range(4) + ] + + def _extract_kvs(self, url, webpage, video_id): + flashvars = self._search_json( + r'(?s:]*>.*?var\s+flashvars\s*=)', + webpage, 'flashvars', video_id, transform_source=js_to_json) + + # extract the part after the last / as the display_id from the + # canonical URL. + display_id = self._search_regex( + r'(?:' + r'|)', + webpage, 'display_id', fatal=False) + title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)', webpage, 'title') + + thumbnail = flashvars['preview_url'] + if thumbnail.startswith('//'): + protocol, _, _ = url.partition('/') + thumbnail = protocol + thumbnail + + url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys())) + formats = [] + for key in url_keys: + if '/get_file/' not in flashvars[key]: + continue + format_id = flashvars.get(f'{key}_text', key) + formats.append({ + 'url': urljoin(url, self._kvs_get_real_url(flashvars[key], flashvars['license_code'])), + 'format_id': format_id, + 'ext': 'mp4', + **(parse_resolution(format_id) or parse_resolution(flashvars[key])), + 'http_headers': {'Referer': url}, + }) + if not formats[-1].get('height'): + formats[-1]['quality'] = 1 - modlicense = str(4 * abs(fronthalf - backhalf)) - retval = '' - for o in range(0, center + 1): - for i in range(1, 5): - retval += str((int(license[o + i]) + int(modlicense[o])) % 10) - return retval + return { + 'id': flashvars['video_id'], + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } def _real_extract(self, url): if url.startswith('//'): @@ -2580,6 +2618,17 @@ class GenericIE(InfoExtractor): self.report_detected('video.js embed') return [{'formats': formats, 'subtitles': subtitles}] + # Look for generic KVS player (before json-ld bc of some urls that break otherwise) + found = self._search_regex(( + r']+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P\d+(?:\.\d+)+)\1[^>]*>', + r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P\d+(?:\.\d+)+)\2\s*,', + ), webpage, 'KVS player', group='ver', default=False) + if found: + self.report_detected('KWS Player') + if found.split('.')[0] not in ('4', '5', '6'): + self.report_warning(f'Untested major version ({found}) in player engine - download may fail.') + return [self._extract_kvs(url, webpage, video_id)] + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld(webpage, video_id, default={}) if json_ld.get('url') not in (url, None): @@ -2622,52 +2671,6 @@ class GenericIE(InfoExtractor): ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) if found: self.report_detected('JW Player embed') - if not found: - # Look for generic KVS player - found = re.search(r'', webpage) - flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json) - - # extract the part after the last / as the display_id from the - # canonical URL. - display_id = self._search_regex( - r'(?:' - r'|)', - webpage, 'display_id', fatal=False - ) - title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)', webpage, 'title') - - thumbnail = flashvars['preview_url'] - if thumbnail.startswith('//'): - protocol, _, _ = url.partition('/') - thumbnail = protocol + thumbnail - - url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys())) - formats = [] - for key in url_keys: - if '/get_file/' not in flashvars[key]: - continue - format_id = flashvars.get(f'{key}_text', key) - formats.append({ - 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']), - 'format_id': format_id, - 'ext': 'mp4', - **(parse_resolution(format_id) or parse_resolution(flashvars[key])) - }) - if not formats[-1].get('height'): - formats[-1]['quality'] = 1 - - return [{ - 'id': flashvars['video_id'], - 'display_id': display_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - }] if not found: # Broaden the search a little bit found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index 2d9b9a742..d1fc058b9 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -1,71 +1,128 @@ +import re + from .common import InfoExtractor +from ..utils import ( + ExtractorError, + get_element_by_class, + int_or_none, + merge_dicts, + url_or_none, +) + + +class PeekVidsBaseIE(InfoExtractor): + def _real_extract(self, url): + domain, video_id = self._match_valid_url(url).group('domain', 'id') + webpage = self._download_webpage(url, video_id, expected_status=429) + if '>Rate Limit Exceeded' in webpage: + raise ExtractorError( + f'You are suspected as a bot. Wait, or pass the captcha on the site and provide cookies. {self._login_hint()}', + video_id=video_id, expected=True) + + title = self._html_search_regex(r'(?s)]*>(.+?)

    ', webpage, 'title') + + display_id = video_id + video_id = self._search_regex(r'(?s)]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID') + srcs = self._download_json( + f'https://www.{domain}/v-alt/{video_id}', video_id, + note='Downloading list of source files') + + formats = [] + for k, v in srcs.items(): + f_url = url_or_none(v) + if not f_url: + continue + + height = self._search_regex(r'^data-src(\d{3,})$', k, 'height', default=None) + if not height: + continue + + formats.append({ + 'url': f_url, + 'format_id': height, + 'height': int_or_none(height), + }) + + if not formats: + formats = [{'url': url} for url in srcs.values()] + info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={}) + info.pop('url', None) -class PeekVidsIE(InfoExtractor): + # may not have found the thumbnail if it was in a list in the ld+json + info.setdefault('thumbnail', self._og_search_thumbnail(webpage)) + detail = (get_element_by_class('detail-video-block', webpage) + or get_element_by_class('detail-block', webpage) or '') + info['description'] = self._html_search_regex( + rf'(?s)(.+?)(?:{re.escape(info.get("description", ""))}\s*<|]*>\s*{re.escape(name)}\s*:\s*(.+?)', + html, name, default='') + return list(filter(None, re.split(r'\s+', l))) + + return merge_dicts({ + 'id': video_id, + 'display_id': display_id, + 'age_limit': 18, + 'formats': formats, + 'categories': cat_tags('Categories', detail), + 'tags': cat_tags('Tags', detail), + 'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None), + }, info) + + +class PeekVidsIE(PeekVidsBaseIE): _VALID_URL = r'''(?x) - https?://(?:www\.)?peekvids\.com/ + https?://(?:www\.)?(?Ppeekvids\.com)/ (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=) (?P[^/?&#]*) ''' _TESTS = [{ 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', - 'md5': 'a00940646c428e232407e3e62f0e8ef5', + 'md5': '2ff6a357a9717dc9dc9894b51307e9a2', 'info_dict': { - 'id': 'BSyLMbN0YCd', - 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub', + 'id': '1262717', + 'display_id': 'BSyLMbN0YCd', + 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', 'ext': 'mp4', 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'Watch Dane Jones - Cute redhead with perfect tits with Mini Vamp (7 min), uploaded by SEXYhub.com', + 'description': 'md5:0a61df3620de26c0af8963b1a730cd69', 'timestamp': 1642579329, 'upload_date': '20220119', 'duration': 416, 'view_count': int, 'age_limit': 18, + 'uploader': 'SEXYhub.com', + 'categories': list, + 'tags': list, }, }] - _DOMAIN = 'www.peekvids.com' - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - short_video_id = self._html_search_regex(r'