mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-17 15:25:49 +00:00
[brightcove] delegate all supported BrightcoveLegacyIE URLs to BrightcoveNewIE
closes #11523 closes #12842 closes #13912 closes #15669 closes #16303
This commit is contained in:
parent
d9d3098675
commit
f455a934e9
@ -2,7 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
@ -11,14 +10,12 @@
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
@ -27,18 +24,19 @@
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
clean_html,
|
clean_html,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
UnsupportedError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BrightcoveLegacyIE(InfoExtractor):
|
class BrightcoveLegacyIE(InfoExtractor):
|
||||||
IE_NAME = 'brightcove:legacy'
|
IE_NAME = 'brightcove:legacy'
|
||||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||||
_FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
|
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -55,7 +53,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'timestamp': 1368213670,
|
'timestamp': 1368213670,
|
||||||
'upload_date': '20130510',
|
'upload_date': '20130510',
|
||||||
'uploader_id': '1589608506001',
|
'uploader_id': '1589608506001',
|
||||||
}
|
},
|
||||||
|
'skip': 'The player has been deactivated by the content owner',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# From http://medianetwork.oracle.com/video/player/1785452137001
|
# From http://medianetwork.oracle.com/video/player/1785452137001
|
||||||
@ -70,6 +69,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'upload_date': '20120814',
|
'upload_date': '20120814',
|
||||||
'uploader_id': '1460825906',
|
'uploader_id': '1460825906',
|
||||||
},
|
},
|
||||||
|
'skip': 'video not playable',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
||||||
@ -79,7 +79,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'This Bracelet Acts as a Personal Thermostat',
|
'title': 'This Bracelet Acts as a Personal Thermostat',
|
||||||
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
|
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
|
||||||
'uploader': 'Mashable',
|
# 'uploader': 'Mashable',
|
||||||
'timestamp': 1382041798,
|
'timestamp': 1382041798,
|
||||||
'upload_date': '20131017',
|
'upload_date': '20131017',
|
||||||
'uploader_id': '1130468786001',
|
'uploader_id': '1130468786001',
|
||||||
@ -124,6 +124,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'id': '3550319591001',
|
'id': '3550319591001',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
|
'skip': 'Unsupported URL',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
|
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
|
||||||
@ -133,6 +134,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'title': 'Lesson 08',
|
'title': 'Lesson 08',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 10,
|
'playlist_mincount': 10,
|
||||||
|
'skip': 'Unsupported URL',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# playerID inferred from bcpid
|
# playerID inferred from bcpid
|
||||||
@ -141,12 +143,6 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'only_matching': True, # Tested in GenericIE
|
'only_matching': True, # Tested in GenericIE
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
FLV_VCODECS = {
|
|
||||||
1: 'SORENSON',
|
|
||||||
2: 'ON2',
|
|
||||||
3: 'H264',
|
|
||||||
4: 'VP8',
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_brighcove_url(cls, object_str):
|
def _build_brighcove_url(cls, object_str):
|
||||||
@ -238,7 +234,8 @@ def _build_brighcove_url_from_js(cls, object_js):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _make_brightcove_url(cls, params):
|
def _make_brightcove_url(cls, params):
|
||||||
return update_url_query(cls._FEDERATED_URL, params)
|
return update_url_query(
|
||||||
|
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_brightcove_url(cls, webpage):
|
def _extract_brightcove_url(cls, webpage):
|
||||||
@ -297,38 +294,12 @@ def _real_extract(self, url):
|
|||||||
videoPlayer = query.get('@videoPlayer')
|
videoPlayer = query.get('@videoPlayer')
|
||||||
if videoPlayer:
|
if videoPlayer:
|
||||||
# We set the original url as the default 'Referer' header
|
# We set the original url as the default 'Referer' header
|
||||||
referer = smuggled_data.get('Referer', url)
|
referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
|
||||||
|
video_id = videoPlayer[0]
|
||||||
if 'playerID' not in query:
|
if 'playerID' not in query:
|
||||||
mobj = re.search(r'/bcpid(\d+)', url)
|
mobj = re.search(r'/bcpid(\d+)', url)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
query['playerID'] = [mobj.group(1)]
|
query['playerID'] = [mobj.group(1)]
|
||||||
return self._get_video_info(
|
|
||||||
videoPlayer[0], query, referer=referer)
|
|
||||||
elif 'playerKey' in query:
|
|
||||||
player_key = query['playerKey']
|
|
||||||
return self._get_playlist_info(player_key[0])
|
|
||||||
else:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
def _brightcove_new_url_result(self, publisher_id, video_id):
|
|
||||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
|
||||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
|
||||||
|
|
||||||
def _get_video_info(self, video_id, query, referer=None):
|
|
||||||
headers = {}
|
|
||||||
linkBase = query.get('linkBaseURL')
|
|
||||||
if linkBase is not None:
|
|
||||||
referer = linkBase[0]
|
|
||||||
if referer is not None:
|
|
||||||
headers['Referer'] = referer
|
|
||||||
webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
|
|
||||||
|
|
||||||
error_msg = self._html_search_regex(
|
|
||||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
|
||||||
'error message', default=None)
|
|
||||||
if error_msg is not None:
|
|
||||||
publisher_id = query.get('publisherId')
|
publisher_id = query.get('publisherId')
|
||||||
if publisher_id and publisher_id[0].isdigit():
|
if publisher_id and publisher_id[0].isdigit():
|
||||||
publisher_id = publisher_id[0]
|
publisher_id = publisher_id[0]
|
||||||
@ -339,6 +310,9 @@ def _get_video_info(self, video_id, query, referer=None):
|
|||||||
else:
|
else:
|
||||||
player_id = query.get('playerID')
|
player_id = query.get('playerID')
|
||||||
if player_id and player_id[0].isdigit():
|
if player_id and player_id[0].isdigit():
|
||||||
|
headers = {}
|
||||||
|
if referer:
|
||||||
|
headers['Referer'] = referer
|
||||||
player_page = self._download_webpage(
|
player_page = self._download_webpage(
|
||||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||||
video_id, headers=headers, fatal=False)
|
video_id, headers=headers, fatal=False)
|
||||||
@ -349,136 +323,16 @@ def _get_video_info(self, video_id, query, referer=None):
|
|||||||
if player_key:
|
if player_key:
|
||||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||||
if publisher_id:
|
if publisher_id:
|
||||||
return self._brightcove_new_url_result(publisher_id, video_id)
|
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||||
raise ExtractorError(
|
if referer:
|
||||||
'brightcove said: %s' % error_msg, expected=True)
|
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||||
|
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||||
self.report_extraction(video_id)
|
# TODO: figure out if it's possible to extract playlistId from playerKey
|
||||||
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
|
# elif 'playerKey' in query:
|
||||||
info = json.loads(info)['data']
|
# player_key = query['playerKey']
|
||||||
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
# return self._get_playlist_info(player_key[0])
|
||||||
video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
|
raise UnsupportedError(url)
|
||||||
|
|
||||||
return self._extract_video_info(video_info)
|
|
||||||
|
|
||||||
def _get_playlist_info(self, player_key):
|
|
||||||
info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
|
|
||||||
playlist_info = self._download_webpage(
|
|
||||||
info_url, player_key, 'Downloading playlist information')
|
|
||||||
|
|
||||||
json_data = json.loads(playlist_info)
|
|
||||||
if 'videoList' in json_data:
|
|
||||||
playlist_info = json_data['videoList']
|
|
||||||
playlist_dto = playlist_info['mediaCollectionDTO']
|
|
||||||
elif 'playlistTabs' in json_data:
|
|
||||||
playlist_info = json_data['playlistTabs']
|
|
||||||
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Empty playlist')
|
|
||||||
|
|
||||||
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
|
|
||||||
|
|
||||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
|
||||||
playlist_title=playlist_dto['displayName'])
|
|
||||||
|
|
||||||
def _extract_video_info(self, video_info):
|
|
||||||
video_id = compat_str(video_info['id'])
|
|
||||||
publisher_id = video_info.get('publisherId')
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'title': video_info['displayName'].strip(),
|
|
||||||
'description': video_info.get('shortDescription'),
|
|
||||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
|
||||||
'uploader': video_info.get('publisherName'),
|
|
||||||
'uploader_id': compat_str(publisher_id) if publisher_id else None,
|
|
||||||
'duration': float_or_none(video_info.get('length'), 1000),
|
|
||||||
'timestamp': int_or_none(video_info.get('creationDate'), 1000),
|
|
||||||
}
|
|
||||||
|
|
||||||
renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
|
|
||||||
if renditions:
|
|
||||||
formats = []
|
|
||||||
for rend in renditions:
|
|
||||||
url = rend['defaultURL']
|
|
||||||
if not url:
|
|
||||||
continue
|
|
||||||
ext = None
|
|
||||||
if rend['remote']:
|
|
||||||
url_comp = compat_urllib_parse_urlparse(url)
|
|
||||||
if url_comp.path.endswith('.m3u8'):
|
|
||||||
formats.extend(
|
|
||||||
self._extract_m3u8_formats(
|
|
||||||
url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
|
||||||
continue
|
|
||||||
elif 'akamaihd.net' in url_comp.netloc:
|
|
||||||
# This type of renditions are served through
|
|
||||||
# akamaihd.net, but they don't use f4m manifests
|
|
||||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
|
||||||
ext = 'flv'
|
|
||||||
if ext is None:
|
|
||||||
ext = determine_ext(url)
|
|
||||||
tbr = int_or_none(rend.get('encodingRate'), 1000)
|
|
||||||
a_format = {
|
|
||||||
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
|
|
||||||
'url': url,
|
|
||||||
'ext': ext,
|
|
||||||
'filesize': int_or_none(rend.get('size')) or None,
|
|
||||||
'tbr': tbr,
|
|
||||||
}
|
|
||||||
if rend.get('audioOnly'):
|
|
||||||
a_format.update({
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
a_format.update({
|
|
||||||
'height': int_or_none(rend.get('frameHeight')),
|
|
||||||
'width': int_or_none(rend.get('frameWidth')),
|
|
||||||
'vcodec': rend.get('videoCodec'),
|
|
||||||
})
|
|
||||||
|
|
||||||
# m3u8 manifests with remote == false are media playlists
|
|
||||||
# Not calling _extract_m3u8_formats here to save network traffic
|
|
||||||
if ext == 'm3u8':
|
|
||||||
a_format.update({
|
|
||||||
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'protocol': 'm3u8_native',
|
|
||||||
})
|
|
||||||
|
|
||||||
formats.append(a_format)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
info['formats'] = formats
|
|
||||||
elif video_info.get('FLVFullLengthURL') is not None:
|
|
||||||
info.update({
|
|
||||||
'url': video_info['FLVFullLengthURL'],
|
|
||||||
'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
|
|
||||||
'filesize': int_or_none(video_info.get('FLVFullSize')),
|
|
||||||
})
|
|
||||||
|
|
||||||
if self._downloader.params.get('include_ads', False):
|
|
||||||
adServerURL = video_info.get('_youtubedl_adServerURL')
|
|
||||||
if adServerURL:
|
|
||||||
ad_info = {
|
|
||||||
'_type': 'url',
|
|
||||||
'url': adServerURL,
|
|
||||||
}
|
|
||||||
if 'url' in info:
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'title': info['title'],
|
|
||||||
'entries': [ad_info, info],
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
return ad_info
|
|
||||||
|
|
||||||
if not info.get('url') and not info.get('formats'):
|
|
||||||
uploader_id = info.get('uploader_id')
|
|
||||||
if uploader_id:
|
|
||||||
info.update(self._brightcove_new_url_result(uploader_id, video_id))
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class BrightcoveNewIE(AdobePassIE):
|
class BrightcoveNewIE(AdobePassIE):
|
||||||
|
Loading…
Reference in New Issue
Block a user