From d6c2c2bc84f1434255be5c73baeb17d893d2c0d4 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 16 Jun 2024 19:01:46 -0500 Subject: [PATCH] [ie/sproutvideo] Add extractors (#10098) Closes #2933, Closes #8942 Authored by: bashonly, TheZ3ro Co-authored-by: thezero --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/patreon.py | 17 ++- yt_dlp/extractor/sproutvideo.py | 198 ++++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+), 5 deletions(-) create mode 100644 yt_dlp/extractor/sproutvideo.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0f599c9db..c411efb5a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1928,6 +1928,10 @@ ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE +from .sproutvideo import ( + SproutVideoIE, + VidsIoIE, +) from .srgssr import ( SRGSSRIE, SRGSSRPlayIE, diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 26ca84ab3..5dc46e317 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -2,6 +2,7 @@ import urllib.parse from .common import InfoExtractor +from .sproutvideo import VidsIoIE from .vimeo import VimeoIE from ..networking.exceptions import HTTPError from ..utils import ( @@ -12,6 +13,7 @@ int_or_none, mimetype2ext, parse_iso8601, + smuggle_url, str_or_none, traverse_obj, url_or_none, @@ -305,22 +307,27 @@ def _real_extract(self, url): 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}), })) + # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo + headers = {'referer': 'https://patreon.com/'} + # handle Vimeo embeds if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo': v_url = urllib.parse.unquote(self._html_search_regex( r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '') if url_or_none(v_url) and self._request_webpage( - v_url, video_id, 'Checking Vimeo embed URL', - headers={'Referer': 'https://patreon.com/'}, - fatal=False, errnote=False): + v_url, video_id, 'Checking Vimeo embed URL', headers=headers, fatal=False, errnote=False): entries.append(self.url_result( VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'), VimeoIE, url_transparent=True)) embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none})) - if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False): - entries.append(self.url_result(embed_url)) + if embed_url and (urlh := self._request_webpage( + embed_url, video_id, 'Checking embed URL', headers=headers, + fatal=False, errnote=False, expected_status=403)): + # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie + if urlh.status != 403 or VidsIoIE.suitable(embed_url): + entries.append(self.url_result(smuggle_url(embed_url, headers))) post_file = traverse_obj(attributes, ('post_file', {dict})) if post_file: diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py new file mode 100644 index 000000000..c0923594e --- /dev/null +++ b/yt_dlp/extractor/sproutvideo.py @@ -0,0 +1,198 @@ +import base64 +import urllib.parse + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + remove_start, + smuggle_url, + unsmuggle_url, + update_url_query, + url_or_none, + urlencode_postdata, +) +from ..utils.traversal import traverse_obj + + +class SproutVideoIE(InfoExtractor): + _NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P[\da-f]+)/[\da-f]+' + _VALID_URL = rf'https?:{_NO_SCHEME_RE}' + _EMBED_REGEX = [rf'