From 6f8649f21315990dc28bc1c239d693c9a44f3d6f Mon Sep 17 00:00:00 2001 From: JerryZhouSirui <1806430169@qq.com> Date: Fri, 26 Apr 2024 16:12:35 -0400 Subject: [PATCH] Fix beatport and dplay extractor --- yt_dlp/extractor/beatport.py | 76 +++++++++++++++----------------- yt_dlp/extractor/dplay.py | 85 ++++++++++++++++++++++++++++-------- 2 files changed, 101 insertions(+), 60 deletions(-) diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index 0aecbd089..ee3ae1223 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -2,7 +2,7 @@ import re from .common import InfoExtractor from ..compat import compat_str -from ..utils import int_or_none +from ..utils import int_or_none, ExtractorError class BeatportIE(InfoExtractor): @@ -43,55 +43,47 @@ class BeatportIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - playables = self._parse_json( - self._search_regex( - r'window\.Playables\s*=\s*({.+?});', webpage, - 'playables info', flags=re.DOTALL), - track_id) + try: + playables_json = self._search_regex( + r'window\.Playables\s*=\s*({.+?})\s*;', webpage, + 'playables info', default='{}', flags=re.DOTALL) + playables = self._parse_json(playables_json, track_id) + except re.error: + raise ExtractorError('Failed to extract playables information. The page structure may have changed.') - track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) + if not playables or 'tracks' not in playables: + raise ExtractorError('No playable tracks found in the extracted information.') - title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] - if track['mix']: + track = next((t for t in playables['tracks'] if t['id'] == int(track_id)), None) + if not track: + raise ExtractorError(f'No track with ID {track_id} found.') + + title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name'] + if track.get('mix'): title += ' (' + track['mix'] + ')' formats = [] - for ext, info in track['preview'].items(): - if not info['url']: - continue - fmt = { - 'url': info['url'], - 'ext': ext, - 'format_id': ext, - 'vcodec': 'none', - } - if ext == 'mp3': - fmt['acodec'] = 'mp3' - fmt['abr'] = 96 - fmt['asr'] = 44100 - elif ext == 'mp4': - fmt['acodec'] = 'aac' - fmt['abr'] = 96 - fmt['asr'] = 44100 - formats.append(fmt) + for ext, info in track.get('preview', {}).items(): + url = info.get('url') + if url: + fmt = { + 'url': url, + 'ext': ext, + 'format_id': ext, + 'vcodec': 'none', + 'acodec': 'mp3' if ext == 'mp3' else 'aac', + 'abr': 96, + 'asr': 44100 + } + formats.append(fmt) - images = [] - for name, info in track['images'].items(): - image_url = info.get('url') - if name == 'dynamic' or not image_url: - continue - image = { - 'id': name, - 'url': image_url, - 'height': int_or_none(info.get('height')), - 'width': int_or_none(info.get('width')), - } - images.append(image) + images = [{'id': name, 'url': info['url'], 'height': int_or_none(info.get('height')), 'width': int_or_none(info.get('width'))} + for name, info in track.get('images', {}).items() if name != 'dynamic' and info.get('url')] return { - 'id': compat_str(track.get('id')) or track_id, - 'display_id': track.get('slug') or display_id, + 'id': compat_str(track.get('id', track_id)), + 'display_id': track.get('slug', display_id), 'title': title, 'formats': formats, - 'thumbnails': images, + 'thumbnails': images } diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 363b4bec9..fdb38d5a7 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -1,6 +1,10 @@ import json import uuid +from urllib.parse import urlsplit, urljoin + +import requests + from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( @@ -49,32 +53,77 @@ class DPlayBaseIE(InfoExtractor): 'This video is only available for registered users. You may want to use --cookies.', expected=True) raise ExtractorError(info['errors'][0]['detail'], expected=True) - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['Authorization'] = self._get_auth(disco_base, display_id, realm, False) - - def _download_video_playback_info(self, disco_base, video_id, headers): - streaming = self._download_json( - disco_base + 'playback/videoPlaybackInfo/' + video_id, - video_id, headers=headers)['data']['attributes']['streaming'] - streaming_list = [] - for format_id, format_dict in streaming.items(): - streaming_list.append({ - 'type': format_id, - 'url': format_dict.get('url'), + def _update_disco_api_headers(self, headers, disco_base, display_id, realm, api_version=2): + if api_version == 3: + headers.update({ + 'Authorization': self._get_auth(disco_base, display_id, realm, True), }) + else: + # old behaviour + headers['Authorization'] = self._get_auth(disco_base, display_id, realm, False) + + def _download_video_playback_info(self, disco_base, video_id, headers, api_version=2): + """ + Disco Api Playback Info + :param disco_base: The url base, i.e. https://{region}{instance_number}-prod.disco-api.com/ . + :param video_id: The Video ID, part of the url, and used for Log Output by this program. + :param headers: The headers to be used for the request. + :param api_version: Api Version V3 now uses json based approach. Default is 2 to keep old behaviour. + :return: A dictionary with {content_type: url_to_content_type} scheme. + """ + if api_version == 3: + video_playback_info_url = urljoin(base=disco_base, url="playback/v3/videoPlaybackInfo") + + request_json_content = { + "deviceInfo": {"adBlocker": False, # deviceInfo is mandatory, some keys inside are optional! + "drmSupported": False, + }, + "videoId": "{0}".format(video_id), + } + + video_playback_response = requests.post(url=video_playback_info_url, + headers=headers, + json=request_json_content, + ) + video_playback_response.raise_for_status() + + streaming_list = video_playback_response.json()['data']['attributes']['streaming'] + streaming_item = streaming_list[0] + streaming_item_protection = streaming_item.get("protection") + + assert streaming_item_protection.get("drmEnabled") is False + else: + # old behaviour + streaming = self._download_json( + disco_base + 'playback/videoPlaybackInfo/' + video_id, + video_id, headers=headers)['data']['attributes']['streaming'] + streaming_list = [] + for format_id, format_dict in streaming.items(): + streaming_list.append({ + 'type': format_id, + 'url': format_dict.get('url'), + }) return streaming_list - def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''): + def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain='', api_version=2): country = self.get_param('geo_bypass_country') or country geo_countries = [country.upper()] self._initialize_geo_bypass({ 'countries': geo_countries, }) disco_base = 'https://%s/' % disco_host - headers = { - 'Referer': url, - } - self._update_disco_api_headers(headers, disco_base, display_id, realm) + if api_version == 3: + url_base = "://".join(urlsplit(url)[:2]) + headers = { + 'Referer': urljoin(base=url_base, url="/"), + 'Origin': url_base, + } + else: + # old behaviour + headers = { + 'Referer': url, + } + self._update_disco_api_headers(headers, disco_base, display_id, realm, api_version=api_version) try: video = self._download_json( disco_base + 'content/videos/' + display_id, display_id, @@ -97,7 +146,7 @@ class DPlayBaseIE(InfoExtractor): subtitles = {} try: streaming = self._download_video_playback_info( - disco_base, video_id, headers) + disco_base, video_id, headers, api_version=api_version) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 403: self._process_errors(e, geo_countries)