diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py index 3d26549a4..7e5e2ed1d 100644 --- a/yt_dlp/extractor/vrt.py +++ b/yt_dlp/extractor/vrt.py @@ -1,144 +1,177 @@ import functools import json -import time import urllib.parse +import urllib.request -from .gigya import GigyaBaseIE -from ..networking.exceptions import HTTPError +from .common import InfoExtractor from ..utils import ( - ExtractorError, clean_html, extract_attributes, float_or_none, get_element_by_class, get_element_html_by_class, int_or_none, - join_nonempty, - jwt_encode_hs256, make_archive_id, merge_dicts, parse_age_limit, parse_iso8601, + unified_strdate, str_or_none, strip_or_none, traverse_obj, url_or_none, - urlencode_postdata, ) -class VRTBaseIE(GigyaBaseIE): +class VRTBaseIE(InfoExtractor): _GEO_BYPASS = False - _PLAYER_INFO = { - 'platform': 'desktop', - 'app': { - 'type': 'browser', - 'name': 'Chrome', - }, - 'device': 'undefined (undefined)', - 'os': { - 'name': 'Windows', - 'version': 'x86_64' - }, - 'player': { - 'name': 'VRT web player', - 'version': '2.7.4-prod-2023-04-19T06:05:45' - } - } - # From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js - _JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w=' - _JWT_SIGNING_KEY = 'b5f500d55cb44715107249ccd8a5c0136cfb2788dbb71b90a4f142423bacaf38' # -dev - # player-stag.vrt.be key: d23987504521ae6fbf2716caca6700a24bb1579477b43c84e146b279de5ca595 - # player.vrt.be key: 2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae def _extract_formats_and_subtitles(self, data, video_id): if traverse_obj(data, 'drm'): self.report_drm(video_id) formats, subtitles = [], {} - for target in traverse_obj(data, ('targetUrls', lambda _, v: url_or_none(v['url']) and v['type'])): + for target in traverse_obj( + data, ('targetUrls', lambda _, v: url_or_none(v['url']) and v['type']) + ): format_type = target['type'].upper() format_url = target['url'] if format_type in ('HLS', 'HLS_AES'): fmts, subs = self._extract_m3u8_formats_and_subtitles( - format_url, video_id, 'mp4', m3u8_id=format_type, fatal=False) + format_url, video_id, 'mp4', m3u8_id=format_type, fatal=False + ) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif format_type == 'HDS': - formats.extend(self._extract_f4m_formats( - format_url, video_id, f4m_id=format_type, fatal=False)) + formats.extend( + self._extract_f4m_formats( + format_url, video_id, f4m_id=format_type, fatal=False + ) + ) elif format_type == 'MPEG_DASH': fmts, subs = self._extract_mpd_formats_and_subtitles( - format_url, video_id, mpd_id=format_type, fatal=False) + format_url, video_id, mpd_id=format_type, fatal=False + ) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif format_type == 'HSS': fmts, subs = self._extract_ism_formats_and_subtitles( - format_url, video_id, ism_id='mss', fatal=False) + format_url, video_id, ism_id='mss', fatal=False + ) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) else: - formats.append({ - 'format_id': format_type, - 'url': format_url, - }) - - for sub in traverse_obj(data, ('subtitleUrls', lambda _, v: v['url'] and v['type'] == 'CLOSED')): + formats.append( + { + 'format_id': format_type, + 'url': format_url, + } + ) + + for sub in traverse_obj( + data, ('subtitleUrls', lambda _, v: v['url'] and v['type'] == 'CLOSED') + ): subtitles.setdefault('nl', []).append({'url': sub['url']}) return formats, subtitles def _call_api(self, video_id, client='null', id_token=None, version='v2'): - player_info = {'exp': (round(time.time(), 3) + 900), **self._PLAYER_INFO} - player_token = self._download_json( - 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens', - video_id, 'Downloading player token', headers={ - **self.geo_verification_headers(), - 'Content-Type': 'application/json', - }, data=json.dumps({ - 'identityToken': id_token or {}, - 'playerInfo': jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={ - 'kid': self._JWT_KEY_ID - }).decode() - }, separators=(',', ':')).encode())['vrtPlayerToken'] + json_response = self._download_json( + f'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/{version}/tokens', + None, + 'Downloading player token', + 'Failed to download player token', + headers={'Content-Type': 'application/json'}, + data=json.dumps( + { + 'identityToken': id_token + or self._get_cookies('https://www.vrt.be') + .get('vrtnu-site_profile_vt') + .value + } + ).encode(), + ) + player_token = json_response['vrtPlayerToken'] return self._download_json( - f'https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id}', - video_id, 'Downloading API JSON', query={ + f'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/{version}/videos/{video_id}', + video_id, + 'Downloading API JSON', + 'Failed to download API JSON', + query={ 'vrtPlayerToken': player_token, 'client': client, - }, expected_status=400) + }, + ) -class VRTIE(VRTBaseIE): +class VRTLoginIE(VRTBaseIE): + _NETRC_MACHINE = 'vrtnu' + _authenticated = False + + def _perform_login(self, username, password): + self._request_webpage( + 'https://www.vrt.be/vrtnu/sso/login', + None, + note='Getting session cookies', + errnote='Failed to get session cookies', + ) + + self._download_json( + 'https://login.vrt.be/perform_login', + None, + data=json.dumps( + {'loginID': username, 'password': password, 'clientId': 'vrtnu-site'} + ).encode(), + headers={ + 'Content-Type': 'application/json', + 'Oidcxsrf': self._get_cookies('https://login.vrt.be') + .get('OIDCXSRF') + .value, + }, + note='Logging in', + errnote='Login failed', + ) + self._authenticated = True + return + + +class VRTIE(VRTLoginIE): IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza' _VALID_URL = r'https?://(?:www\.)?(?Pvrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P[^/?&#]+)' - _TESTS = [{ - 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/', - 'info_dict': { - 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd', - 'ext': 'mp4', - 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand', - 'description': 'md5:6fd85f999b2d1841aa5568f4bf02c3ff', - 'duration': 31.2, - 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg', + _TESTS = [ + { + 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/', + 'info_dict': { + 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd', + 'ext': 'mp4', + 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand', + 'description': 'md5:6fd85f999b2d1841aa5568f4bf02c3ff', + 'duration': 31.2, + 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg', + }, + 'params': {'skip_download': 'm3u8'}, }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/', - 'info_dict': { - 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818', - 'ext': 'mp4', - 'title': 'De Belgian Cats zijn klaar voor het EK', - 'description': 'Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal', - 'duration': 115.17, - 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg', + { + 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/', + 'info_dict': { + 'id': 'pbs-pub-e1d6e4ec-cbf4-451e-9e87-d835bb65cd28$vid-2ad45eb6-9bc8-40d4-ad72-5f25c0f59d75', + 'ext': 'mp4', + 'title': 'De Belgian Cats zijn klaar voor het EK', + 'description': 'Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal', + 'duration': 115.17, + 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg', + }, + 'params': {'skip_download': 'm3u8'}, }, - 'params': {'skip_download': 'm3u8'}, - }] - _CLIENT_MAP = { - 'vrt.be/vrtnws': 'vrtnieuws', - 'sporza.be': 'sporza', + ] + _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' + _CONTEXT_ID = 'R3595707040' + _REST_API_BASE_TOKEN = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2' + _REST_API_BASE_VIDEO = 'https://media-services-public.vrt.be/media-aggregator/v2' + _HLS_ENTRY_PROTOCOLS_MAP = { + 'HLS': 'm3u8_native', + 'HLS_AES': 'm3u8_native', } def _real_extract(self, url): @@ -147,16 +180,22 @@ class VRTIE(VRTBaseIE): attrs = extract_attributes(get_element_html_by_class('vrtvideo', webpage) or '') asset_id = attrs.get('data-video-id') or attrs['data-videoid'] - publication_id = traverse_obj(attrs, 'data-publication-id', 'data-publicationid') + publication_id = traverse_obj( + attrs, 'data-publication-id', 'data-publicationid' + ) if publication_id: asset_id = f'{publication_id}${asset_id}' - client = traverse_obj(attrs, 'data-client-code', 'data-client') or self._CLIENT_MAP[site] + client = ( + traverse_obj(attrs, 'data-client-code', 'data-client') + or self._CLIENT_MAP[site] + ) data = self._call_api(asset_id, client) formats, subtitles = self._extract_formats_and_subtitles(data, asset_id) description = self._html_search_meta( - ['og:description', 'twitter:description', 'description'], webpage) + ['og:description', 'twitter:description', 'description'], webpage + ) if description == '…': description = None @@ -168,164 +207,155 @@ class VRTIE(VRTBaseIE): 'thumbnail': url_or_none(attrs.get('data-posterimage')), 'duration': float_or_none(attrs.get('data-duration'), 1000), '_old_archive_ids': [make_archive_id('Canvas', asset_id)], - **traverse_obj(data, { - 'title': ('title', {str}), - 'description': ('shortDescription', {str}), - 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}), - 'thumbnail': ('posterImageUrl', {url_or_none}), - }), + **traverse_obj( + data, + { + 'title': ('title', {str}), + 'description': ('shortDescription', {str}), + 'duration': ( + 'duration', + {functools.partial(float_or_none, scale=1000)}, + ), + 'thumbnail': ('posterImageUrl', {url_or_none}), + }, + ), } -class VrtNUIE(VRTBaseIE): +class VrtNUIE(VRTLoginIE): IE_DESC = 'VRT MAX' - _VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P[^/?#&]+)' - _TESTS = [{ - # CONTENT_IS_AGE_RESTRICTED - 'url': 'https://www.vrt.be/vrtnu/a-z/de-ideale-wereld/2023-vj/de-ideale-wereld-d20230116/', - 'info_dict': { - 'id': 'pbs-pub-855b00a8-6ce2-4032-ac4f-1fcf3ae78524$vid-d2243aa1-ec46-4e34-a55b-92568459906f', - 'ext': 'mp4', - 'title': 'Tom Waes', - 'description': 'Satirisch actualiteitenmagazine met Ella Leyers. Tom Waes is te gast.', - 'timestamp': 1673905125, - 'release_timestamp': 1673905125, - 'series': 'De ideale wereld', - 'season_id': '1672830988794', - 'episode': 'Aflevering 1', - 'episode_number': 1, - 'episode_id': '1672830988861', - 'display_id': 'de-ideale-wereld-d20230116', - 'channel': 'VRT', - 'duration': 1939.0, - 'thumbnail': 'https://images.vrt.be/orig/2023/01/10/1bb39cb3-9115-11ed-b07d-02b7b76bf47f.jpg', - 'release_date': '20230116', - 'upload_date': '20230116', - 'age_limit': 12, + _VALID_URL = ( + r'https?://(?:www\.)?vrt\.be/(vrtmax|vrtnu)/a-z/(?:[^/]+/){2}(?P[^/?#&]+)' + ) + _TESTS = [ + { + 'url': 'https://www.vrt.be/vrtmax/a-z/pano/trailer/pano-trailer-najaar-2023/', + 'info_dict': { + 'title': 'Pano - Nieuwe afleveringen vanaf 15 november - Trailer | VRT MAX', + 'description': 'Duidingsmagazine met indringende reportages over de grote thema\'s van deze tijd. Een gedreven team van reporters diept de beste nieuwsverhalen uit en zoekt het antwoord op actuele vragen. Bekijk de trailer met VRT MAX via de site of app.', + 'timestamp': 1699246800, + 'release_timestamp': 1699246800, + 'release_date': '20231106', + 'upload_date': '20231106', + 'series': 'Pano', + 'season': 'Trailer', + 'season_number': 2023, + 'season_id': '/vrtnu/a-z/pano/trailer/#tvseason', + 'episode_id': '3226122918145', + 'id': 'pbs-pub-5260ad6d-372c-46d3-a542-0e781fd5831a$vid-75fdb750-82f5-4157-8ea9-4485f303f20b', + 'channel': 'VRT', + 'duration': 37.16, + 'thumbnail': 'https://images.vrt.be/orig/2023/11/03/f570eb9b-7a4e-11ee-91d7-02b7b76bf47f.jpg', + 'ext': 'mp4', + }, }, - }, { - 'url': 'https://www.vrt.be/vrtnu/a-z/buurman--wat-doet-u-nu-/6/buurman--wat-doet-u-nu--s6-trailer/', - 'info_dict': { - 'id': 'pbs-pub-ad4050eb-d9e5-48c2-9ec8-b6c355032361$vid-0465537a-34a8-4617-8352-4d8d983b4eee', - 'ext': 'mp4', - 'title': 'Trailer seizoen 6 \'Buurman, wat doet u nu?\'', - 'description': 'md5:197424726c61384b4e5c519f16c0cf02', - 'timestamp': 1652940000, - 'release_timestamp': 1652940000, - 'series': 'Buurman, wat doet u nu?', - 'season': 'Seizoen 6', - 'season_number': 6, - 'season_id': '1652344200907', - 'episode': 'Aflevering 0', - 'episode_number': 0, - 'episode_id': '1652951873524', - 'display_id': 'buurman--wat-doet-u-nu--s6-trailer', - 'channel': 'VRT', - 'duration': 33.13, - 'thumbnail': 'https://images.vrt.be/orig/2022/05/23/3c234d21-da83-11ec-b07d-02b7b76bf47f.jpg', - 'release_date': '20220519', - 'upload_date': '20220519', + { + 'url': 'https://www.vrt.be/vrtnu/a-z/factcheckers/trailer/factcheckers-trailer-s4/', + 'info_dict': { + 'title': 'Factcheckers - Nieuwe afleveringen vanaf 15 november - Trailer | VRT MAX', + 'season_number': 2023, + 'description': 'Infotainmentprogramma waarin Thomas Vanderveken, Jan Van Looveren en Britt Van Marsenille checken wat er nu eigenlijk klopt van de tsunami aan berichten, beweringen en weetjes die we dagelijks over ons heen krijgen. Bekijk de trailer met VRT MAX via de site of app.', + 'timestamp': 1699160400, + 'release_timestamp': 1699160400, + 'release_date': '20231105', + 'upload_date': '20231105', + 'series': 'Factcheckers', + 'episode': '0', + 'episode_number': 0, + 'season': 'Trailer', + 'season_id': '/vrtnu/a-z/factcheckers/trailer/#tvseason', + 'episode_id': '3179360900145', + 'id': 'pbs-pub-aa9397e9-ec2b-45f9-9148-7ce71b690b45$vid-04c67438-4866-4f5c-8978-51d173c0074b', + 'channel': 'VRT', + 'duration': 33.08, + 'thumbnail': 'https://images.vrt.be/orig/2023/11/07/37d244f0-7d8a-11ee-91d7-02b7b76bf47f.jpg', + 'ext': 'mp4', + }, }, - 'params': {'skip_download': 'm3u8'}, - }] - _NETRC_MACHINE = 'vrtnu' - _authenticated = False + ] - def _perform_login(self, username, password): - auth_info = self._gigya_login({ - 'APIKey': '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy', - 'targetEnv': 'jssdk', - 'loginID': username, - 'password': password, - 'authMode': 'cookie', - }) - - if auth_info.get('errorDetails'): - raise ExtractorError(f'Unable to login. VrtNU said: {auth_info["errorDetails"]}', expected=True) - - # Sometimes authentication fails for no good reason, retry - for retry in self.RetryManager(): - if retry.attempt > 1: - self._sleep(1, None) - try: - self._request_webpage( - 'https://token.vrt.be/vrtnuinitlogin', None, note='Requesting XSRF Token', - errnote='Could not get XSRF Token', query={ - 'provider': 'site', - 'destination': 'https://www.vrt.be/vrtnu/', - }) - self._request_webpage( - 'https://login.vrt.be/perform_login', None, - note='Performing login', errnote='Login failed', - query={'client_id': 'vrtnu-site'}, data=urlencode_postdata({ - 'UID': auth_info['UID'], - 'UIDSignature': auth_info['UIDSignature'], - 'signatureTimestamp': auth_info['signatureTimestamp'], - '_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value, - })) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 401: - retry.error = e - continue - raise + _NETRC_MACHINE = 'vrtnu' - self._authenticated = True + _VIDEOPAGE_QUERY = 'query VideoPage($pageId: ID!) {\n page(id: $pageId) {\n ... on EpisodePage {\n id\n title\n seo {\n ...seoFragment\n __typename\n }\n ldjson\n episode {\n onTimeRaw\n ageRaw\n name\n episodeNumberRaw\n program {\n title\n __typename\n }\n watchAction {\n streamId\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\nfragment seoFragment on SeoProperties {\n __typename\n title\n description\n}' def _real_extract(self, url): display_id = self._match_id(url) parsed_url = urllib.parse.urlparse(url) - details = self._download_json( - f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rstrip("/")}.model.json', - display_id, 'Downloading asset JSON', 'Unable to download asset JSON')['details'] - - watch_info = traverse_obj(details, ( - 'actions', lambda _, v: v['type'] == 'watch-episode', {dict}), get_all=False) or {} - video_id = join_nonempty( - 'episodePublicationId', 'episodeVideoId', delim='$', from_dict=watch_info) - if '$' not in video_id: - raise ExtractorError('Unable to extract video ID') - - vrtnutoken = self._download_json( - 'https://token.vrt.be/refreshtoken', video_id, note='Retrieving vrtnutoken', - errnote='Token refresh failed')['vrtnutoken'] if self._authenticated else None - - video_info = self._call_api(video_id, 'vrtnu-web@PROD', vrtnutoken) - - if 'title' not in video_info: - code = video_info.get('code') - if code in ('AUTHENTICATION_REQUIRED', 'CONTENT_IS_AGE_RESTRICTED'): - self.raise_login_required(code, method='password') - elif code in ('INVALID_LOCATION', 'CONTENT_AVAILABLE_ONLY_IN_BE'): - self.raise_geo_restricted(countries=['BE']) - elif code == 'CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS': - if not self._authenticated: - self.raise_login_required(code, method='password') - self.raise_geo_restricted(countries=['BE']) - raise ExtractorError(code, expected=True) - - formats, subtitles = self._extract_formats_and_subtitles(video_info, video_id) + + self._request_webpage( + 'https://www.vrt.be/vrtnu/sso/login', + None, + note='Getting tokens', + errnote='Failed to get tokens', + ) + + metadata = self._download_json( + 'https://www.vrt.be/vrtnu-api/graphql/v1', + display_id, + 'Downloading asset JSON', + 'Unable to download asset JSON', + headers={ + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self._get_cookies("https://www.vrt.be").get("vrtnu-site_profile_at").value}', + }, + data=json.dumps( + { + 'operationName': 'VideoPage', + 'query': self._VIDEOPAGE_QUERY, + 'variables': { + 'pageId': f'{parsed_url.path.rstrip("/")}.model.json' + }, + } + ).encode(), + )['data']['page'] + + video_id = metadata['episode']['watchAction']['streamId'] + try: + ld_json = json.loads(metadata['ldjson'][1]) + except Exception: + ld_json = {} + + streaming_info = self._call_api(video_id, client='vrtnu-web@PROD') + formats, subtitles = self._extract_formats_and_subtitles( + streaming_info, video_id + ) return { - **traverse_obj(details, { - 'title': 'title', - 'description': ('description', {clean_html}), - 'timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}), - 'release_timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}), - 'series': ('data', 'program', 'title'), - 'season': ('data', 'season', 'title', 'value'), - 'season_number': ('data', 'season', 'title', 'raw', {int_or_none}), - 'season_id': ('data', 'season', 'id', {str_or_none}), - 'episode': ('data', 'episode', 'number', 'value', {str_or_none}), - 'episode_number': ('data', 'episode', 'number', 'raw', {int_or_none}), - 'episode_id': ('data', 'episode', 'id', {str_or_none}), - 'age_limit': ('data', 'episode', 'age', 'raw', {parse_age_limit}), - }), + **traverse_obj( + metadata, + { + 'title': ('seo', 'title', {str_or_none}), + 'season_number': ( + 'episode', + 'onTimeRaw', + {lambda x: x[:4]}, + {int_or_none}, + ), + 'description': ('seo', 'description', {str_or_none}), + 'timestamp': ('episode', 'onTimeRaw', {parse_iso8601}), + 'release_timestamp': ('episode', 'onTimeRaw', {parse_iso8601}), + 'release_date': ('episode', 'onTimeRaw', {unified_strdate}), + 'upload_date': ('episode', 'onTimeRaw', {unified_strdate}), + 'series': ('episode', 'program', 'title'), + 'episode': ('episode', 'episodeNumberRaw', {str_or_none}), + 'episode_number': ('episode', 'episodeNumberRaw', {int_or_none}), + 'age_limit': ('episode', 'ageRaw', {parse_age_limit}), + 'display_id': ('episode', 'name', {parse_age_limit}), + }, + ), + **traverse_obj( + ld_json, + { + 'season': ('partOfSeason', 'name'), + 'season_id': ('partOfSeason', '@id'), + 'episode_id': ('@id', {str_or_none}), + }, + ), 'id': video_id, - 'display_id': display_id, 'channel': 'VRT', 'formats': formats, - 'duration': float_or_none(video_info.get('duration'), 1000), - 'thumbnail': url_or_none(video_info.get('posterImageUrl')), + 'duration': float_or_none(streaming_info.get('duration'), 1000), + 'thumbnail': url_or_none(streaming_info.get('posterImageUrl')), 'subtitles': subtitles, '_old_archive_ids': [make_archive_id('Canvas', video_id)], } @@ -333,26 +363,30 @@ class VrtNUIE(VRTBaseIE): class KetnetIE(VRTBaseIE): _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P(?:[^/]+/)*[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5', - 'info_dict': { - 'id': 'pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e', - 'ext': 'mp4', - 'title': 'Meisjes', - 'episode': 'Reeks 6: Week 5', - 'season': 'Reeks 6', - 'series': 'Meisjes', - 'timestamp': 1685251800, - 'upload_date': '20230528', - }, - 'params': {'skip_download': 'm3u8'}, - }] + _TESTS = [ + { + 'url': 'https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5', + 'info_dict': { + 'id': 'pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e', + 'ext': 'mp4', + 'title': 'Meisjes', + 'episode': 'Reeks 6: Week 5', + 'season': 'Reeks 6', + 'series': 'Meisjes', + 'timestamp': 1685251800, + 'upload_date': '20230528', + }, + 'params': {'skip_download': 'm3u8'}, + } + ] def _real_extract(self, url): display_id = self._match_id(url) video = self._download_json( - 'https://senior-bff.ketnet.be/graphql', display_id, query={ + 'https://senior-bff.ketnet.be/graphql', + display_id, + query={ 'query': '''{ video(id: "content/ketnet/nl/%s.model.json") { description @@ -365,8 +399,10 @@ class KetnetIE(VRTBaseIE): subtitleVideodetail titleVideodetail } -}''' % display_id, - })['data']['video'] +}''' + % display_id, + }, + )['data']['video'] video_id = urllib.parse.unquote(video['mediaReference']) data = self._call_api(video_id, 'ketnet@PROD', version='v1') @@ -377,39 +413,45 @@ class KetnetIE(VRTBaseIE): 'formats': formats, 'subtitles': subtitles, '_old_archive_ids': [make_archive_id('Canvas', video_id)], - **traverse_obj(video, { - 'title': ('titleVideodetail', {str}), - 'description': ('description', {str}), - 'thumbnail': ('thumbnail', {url_or_none}), - 'timestamp': ('publicationDate', {parse_iso8601}), - 'series': ('programTitle', {str}), - 'season': ('seasonTitle', {str}), - 'episode': ('subtitleVideodetail', {str}), - 'episode_number': ('episodeNr', {int_or_none}), - }), + **traverse_obj( + video, + { + 'title': ('titleVideodetail', {str}), + 'description': ('description', {str}), + 'thumbnail': ('thumbnail', {url_or_none}), + 'timestamp': ('publicationDate', {parse_iso8601}), + 'series': ('programTitle', {str}), + 'season': ('seasonTitle', {str}), + 'episode': ('subtitleVideodetail', {str}), + 'episode_number': ('episodeNr', {int_or_none}), + }, + ), } class DagelijkseKostIE(VRTBaseIE): IE_DESC = 'dagelijksekost.een.be' _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof', - 'info_dict': { - 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa', - 'ext': 'mp4', - 'title': 'Hachis parmentier met witloof', - 'description': 'md5:9960478392d87f63567b5b117688cdc5', - 'display_id': 'hachis-parmentier-met-witloof', - }, - 'params': {'skip_download': 'm3u8'}, - }] + _TESTS = [ + { + 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof', + 'info_dict': { + 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa', + 'ext': 'mp4', + 'title': 'Hachis parmentier met witloof', + 'description': 'md5:9960478392d87f63567b5b117688cdc5', + 'display_id': 'hachis-parmentier-met-witloof', + }, + 'params': {'skip_download': 'm3u8'}, + } + ] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( - r'data-url=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', group='id') + r'data-url=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', group='id' + ) data = self._call_api(video_id, 'dako@prod', version='v1') formats, subtitles = self._extract_formats_and_subtitles(data, video_id) @@ -419,11 +461,14 @@ class DagelijkseKostIE(VRTBaseIE): 'formats': formats, 'subtitles': subtitles, 'display_id': display_id, - 'title': strip_or_none(get_element_by_class( - 'dish-metadata__title', webpage) or self._html_search_meta('twitter:title', webpage)), - 'description': clean_html(get_element_by_class( - 'dish-description', webpage)) or self._html_search_meta( - ['description', 'twitter:description', 'og:description'], webpage), + 'title': strip_or_none( + get_element_by_class('dish-metadata__title', webpage) + or self._html_search_meta('twitter:title', webpage) + ), + 'description': clean_html(get_element_by_class('dish-description', webpage)) + or self._html_search_meta( + ['description', 'twitter:description', 'og:description'], webpage + ), '_old_archive_ids': [make_archive_id('Canvas', video_id)], }