Improve handling for overriding extractors with plugins (#5916)

* Extractors replaced with plugin extractors now show in debug output
* Better testcase handling
* Added documentation
Authored by: coletdjnz, pukkandan
This commit is contained in:
Matthew 2023-01-02 04:55:11 +00:00 committed by GitHub
parent 8c53322cda
commit e756f45ba0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 40 additions and 17 deletions

View File

@ -1841,7 +1841,7 @@ ## Installing Plugins
* Source: where `<root-dir>/yt_dlp/__main__.py`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
3. **pip and other locations in `PYTHONPATH`**
* Plugin packages can be installed and managed using `pip`. See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example.
* Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example.
* Note: plugin files between plugin packages installed with pip must have unique filenames
* Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder.
* Note: This does not apply for Pyinstaller/py2exe builds.
@ -1854,9 +1854,12 @@ ## Installing Plugins
## Developing Plugins
See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development.
See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development.
All public classes with a name ending in `IE` are imported from each file. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`)
All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`)
To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`).
Due to the mechanics behind this, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above.
If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability

View File

@ -33,7 +33,7 @@
from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text
from .plugins import directories as plugin_directories
from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
from .postprocessor import (
EmbedThumbnailPP,
FFmpegFixupDuplicateMoovPP,
@ -3730,7 +3730,10 @@ def print_debug_header(self):
# These imports can be slow. So import them only as needed
from .extractor.extractors import _LAZY_LOADER
from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
from .extractor.extractors import (
_PLUGIN_CLASSES as plugin_ies,
_PLUGIN_OVERRIDES as plugin_ie_overrides
)
def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
@ -3808,12 +3811,17 @@ def get_encoding(stream):
proxy_map.update(handler.proxies)
write_debug(f'Proxy map: {proxy_map}')
for plugin_type, plugins in {'Extractor': plugin_extractors, 'Post-Processor': plugin_postprocessors}.items():
if not plugins:
continue
write_debug(f'{plugin_type} Plugins: %s' % (', '.join(sorted(('%s%s' % (
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
display_list = ['%s%s' % (
klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in plugins.items())))))
for name, klass in plugins.items()]
if plugin_type == 'Extractor':
display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
for parent, plugins in plugin_ie_overrides.items())
if not display_list:
continue
write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
plugin_dirs = plugin_directories()
if plugin_dirs:
write_debug(f'Plugin directories: {plugin_dirs}')

View File

@ -3442,13 +3442,17 @@ def get_testcases(cls, include_onlymatching=False):
continue
t['name'] = cls.ie_key()
yield t
if getattr(cls, '__wrapped__', None):
yield from cls.__wrapped__.get_testcases(include_onlymatching)
@classmethod
def get_webpage_testcases(cls):
tests = vars(cls).get('_WEBPAGE_TESTS', [])
for t in tests:
t['name'] = cls.ie_key()
return tests
yield t
if getattr(cls, '__wrapped__', None):
yield from cls.__wrapped__.get_webpage_testcases()
@classproperty(cache=True)
def age_limit(cls):
@ -3710,10 +3714,12 @@ def __init_subclass__(cls, *, plugin_name=None, **kwargs):
if plugin_name:
mro = inspect.getmro(cls)
super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key
cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key
cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}'
while getattr(super_class, '__wrapped__', None):
super_class = super_class.__wrapped__
setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
_PLUGIN_OVERRIDES[super_class].append(cls)
return super().__init_subclass__(**kwargs)
@ -3770,3 +3776,6 @@ class UnsupportedURLIE(InfoExtractor):
def _real_extract(self, url):
raise UnsupportedError(url)
_PLUGIN_OVERRIDES = collections.defaultdict(list)

View File

@ -24,3 +24,5 @@
globals().update(_PLUGIN_CLASSES)
_ALL_CLASSES[:0] = _PLUGIN_CLASSES.values()
from .common import _PLUGIN_OVERRIDES # noqa: F401

View File

@ -23,11 +23,12 @@ def _real_extract(self, url):
if len(matching_extractors) == 0:
raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True)
elif len(matching_extractors) > 1:
try: # Check for exact match
extractor = next(
ie for ie in matching_extractors
if ie.IE_NAME.lower() == extractor_id.lower())
except StopIteration:
extractor = next(( # Check for exact match
ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower()
), None) or next(( # Check for exact match without plugin suffix
ie for ie in matching_extractors if ie.IE_NAME.split('+')[0].lower() == extractor_id.lower()
), None)
if not extractor:
raise ExtractorError(
'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors),
expected=True)