From 2314b4d89fc111ddfcb25937210f1f1c2390cc4a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 16 Sep 2022 16:37:38 +0530 Subject: [PATCH 001/515] Allow plugin extractors to replace the built-in ones This allows easier plugin chaining; e.g. - https://gist.github.com/pukkandan/24f13ff1ed385c5a390c1d7bd130d8f7 - https://gist.github.com/pukkandan/fcf5ca1785c80f64e471f0ee14f990fb --- yt_dlp/extractor/common.py | 13 +++++++++++++ yt_dlp/extractor/extractors.py | 7 +++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index dae952f6a..30042d61f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -5,6 +5,7 @@ import hashlib import http.client import http.cookiejar import http.cookies +import inspect import itertools import json import math @@ -3900,6 +3901,18 @@ class InfoExtractor: """Only for compatibility with some older extractors""" return next(iter(cls._extract_embed_urls(None, webpage) or []), None) + @classmethod + def __init_subclass__(cls, *, plugin_name=None, **kwargs): + if plugin_name: + mro = inspect.getmro(cls) + super_class = cls.__wrapped__ = mro[mro.index(cls) + 1] + cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key + while getattr(super_class, '__wrapped__', None): + super_class = super_class.__wrapped__ + setattr(sys.modules[super_class.__module__], super_class.__name__, cls) + + return super().__init_subclass__(**kwargs) + class SearchInfoExtractor(InfoExtractor): """ diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 32818a024..610e02f90 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -3,6 +3,9 @@ import os from ..utils import load_plugins +# NB: Must be before other imports so that plugins can be correctly injected +_PLUGIN_CLASSES = load_plugins('extractor', 'IE', {}) + _LAZY_LOADER = False if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): with contextlib.suppress(ImportError): @@ -19,5 +22,5 @@ if not _LAZY_LOADER: ] _ALL_CLASSES.append(GenericIE) # noqa: F405 -_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) -_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES +globals().update(_PLUGIN_CLASSES) +_ALL_CLASSES[:0] = _PLUGIN_CLASSES.values() From 2b9d02167fdf2fbe5bd8306144ab45027da263c1 Mon Sep 17 00:00:00 2001 From: Locke Date: Fri, 16 Sep 2022 23:59:02 +0800 Subject: [PATCH 002/515] [extractor/bilibili] Add space.bilibili extractors (#4468) Authored by: lockmatrix --- yt_dlp/extractor/_extractors.py | 4 +- yt_dlp/extractor/bilibili.py | 144 +++++++++++++++++++++++++------- 2 files changed, 119 insertions(+), 29 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index aedf063f6..6bf769a9e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -190,7 +190,9 @@ from .bilibili import ( BilibiliAudioIE, BilibiliAudioAlbumIE, BiliBiliPlayerIE, - BilibiliChannelIE, + BilibiliSpaceVideoIE, + BilibiliSpaceAudioIE, + BilibiliSpacePlaylistIE, BiliIntlIE, BiliIntlSeriesIE, BiliLiveIE, diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2c29bf3ce..2e03aee85 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -2,8 +2,8 @@ import base64 import hashlib import itertools import functools -import re import math +import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -13,23 +13,24 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + InAdvancePagedList, + OnDemandPagedList, filter_dict, - int_or_none, float_or_none, + int_or_none, mimetype2ext, + parse_count, parse_iso8601, qualities, - traverse_obj, - parse_count, smuggle_url, srt_subtitles_timecode, str_or_none, strip_jsonp, + traverse_obj, unified_timestamp, unsmuggle_url, urlencode_postdata, url_or_none, - OnDemandPagedList ) @@ -505,39 +506,126 @@ class BiliBiliBangumiIE(InfoExtractor): season_info.get('bangumi_title'), season_info.get('evaluate')) -class BilibiliChannelIE(InfoExtractor): - _VALID_URL = r'https?://space.bilibili\.com/(?P\d+)' - _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp" +class BilibiliSpaceBaseIE(InfoExtractor): + def _extract_playlist(self, fetch_page, get_metadata, get_entries): + first_page = fetch_page(1) + metadata = get_metadata(first_page) + + paged_list = InAdvancePagedList( + lambda idx: get_entries(fetch_page(idx) if idx > 1 else first_page), + metadata['page_count'], metadata['page_size']) + + return metadata, paged_list + + +class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): + _VALID_URL = r'https?://space\.bilibili\.com/(?P\d+)(?P