Merge branch 'master' of github.com:rg3/youtube-dl

This commit is contained in:
Philipp Hagemeister 2015-02-20 23:20:14 +01:00
commit e14ced7918
4 changed files with 148 additions and 29 deletions

View File

@ -490,6 +490,7 @@
from .tunein import TuneInIE from .tunein import TuneInIE
from .turbo import TurboIE from .turbo import TurboIE
from .tutv import TutvIE from .tutv import TutvIE
from .tv4 import TV4IE
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE from .tvplay import TVPlayIE

View File

@ -1,40 +1,35 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import remove_start from ..utils import (
remove_start,
int_or_none,
)
class BlinkxIE(InfoExtractor): class BlinkxIE(InfoExtractor):
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
IE_NAME = 'blinkx' IE_NAME = 'blinkx'
_TEST = { _TEST = {
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
'md5': '2e9a07364af40163a908edbf10bb2492', 'md5': '337cf7a344663ec79bf93a526a2e06c7',
'info_dict': { 'info_dict': {
'id': '8aQUy7GV', 'id': 'Da0Gw3xc',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Police Car Rolls Away', 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
'uploader': 'stupidvideos.com', 'uploader': 'IGN News',
'upload_date': '20131215', 'upload_date': '20150217',
'timestamp': 1387068000, 'timestamp': 1424215740,
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!', 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
'duration': 14.886, 'duration': 47.743333,
'thumbnails': [{
'width': 100,
'height': 76,
'resolution': '100x76',
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
}],
}, },
} }
def _real_extract(self, rl): def _real_extract(self, url):
m = re.match(self._VALID_URL, rl) video_id = self._match_id(url)
video_id = m.group('id')
display_id = video_id[:8] display_id = video_id[:8]
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
@ -60,18 +55,20 @@ def _real_extract(self, rl):
elif m['type'] in ('flv', 'mp4'): elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff') vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff') acodec = remove_start(m['acodec'], 'ff')
tbr = (int(m['vbr']) + int(m['abr'])) // 1000 vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
tbr = vbr + abr if vbr and abr else None
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
'url': m['link'], 'url': m['link'],
'vcodec': vcodec, 'vcodec': vcodec,
'acodec': acodec, 'acodec': acodec,
'abr': int(m['abr']) // 1000, 'abr': abr,
'vbr': int(m['vbr']) // 1000, 'vbr': vbr,
'tbr': tbr, 'tbr': tbr,
'width': int(m['w']), 'width': int_or_none(m.get('w')),
'height': int(m['h']), 'height': int_or_none(m.get('h')),
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# YouTube video
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
'add_ie': ['Youtube'],
'info_dict': {
'id': 'aFBIPO-P7LM',
'ext': 'mp4',
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
'uploader': 'TEDx Talks',
'uploader_id': 'TEDxTalks',
'upload_date': '20111216',
},
'params': {
'skip_download': True,
},
}] }]
_NATIVE_FORMATS = { _NATIVE_FORMATS = {
@ -132,11 +148,16 @@ def _talk_info(self, url, video_name):
talk_info = self._extract_info(webpage)['talks'][0] talk_info = self._extract_info(webpage)['talks'][0]
if talk_info.get('external') is not None: external = talk_info.get('external')
self.to_screen('Found video from %s' % talk_info['external']['service']) if external:
service = external['service']
self.to_screen('Found video from %s' % service)
ext_url = None
if service.lower() == 'youtube':
ext_url = external.get('code')
return { return {
'_type': 'url', '_type': 'url',
'url': talk_info['external']['uri'], 'url': ext_url or external['uri'],
} }
formats = [{ formats = [{

100
youtube_dl/extractor/tv4.py Normal file
View File

@ -0,0 +1,100 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_iso8601,
)
class TV4IE(InfoExtractor):
IE_DESC = 'tv4.se and tv4play.se'
_VALID_URL = r'''(?x)https?://(?:www\.)?
(?:
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
tv4play\.se/
(?:
(?:program|barn)/(?:[^\?]+)\?video_id=|
iframe/video/|
film/|
sport/|
)
)(?P<id>[0-9]+)'''
_TESTS = [
{
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
'md5': '909d6454b87b10a25aa04c4bdd416a9b',
'info_dict': {
'id': '2491650',
'ext': 'mp4',
'title': 'Kalla Fakta 5 (english subtitles)',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': int,
'upload_date': '20131125',
},
},
{
'url': 'http://www.tv4play.se/iframe/video/3054113',
'md5': '77f851c55139ffe0ebd41b6a5552489b',
'info_dict': {
'id': '3054113',
'ext': 'mp4',
'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
'timestamp': int,
'upload_date': '20150130',
},
},
{
'url': 'http://www.tv4play.se/sport/3060959',
'only_matching': True,
},
{
'url': 'http://www.tv4play.se/film/2378136',
'only_matching': True,
},
{
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
'only_matching': True,
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(
'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
# If is_geo_restricted is true, it doesn't neceserally mean we can't download it
if info['is_geo_restricted']:
self.report_warning('This content might not be available in your country due to licensing restrictions.')
if info['requires_subscription']:
raise ExtractorError('This content requires subscription.', expected=True)
sources_data = self._download_json(
'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
sources = sources_data['playback']
formats = []
for item in sources.get('items', {}).get('item', []):
ext, bitrate = item['mediaFormat'], item['bitrate']
formats.append({
'format_id': '%s_%s' % (ext, bitrate),
'tbr': bitrate,
'ext': ext,
'url': item['url'],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': info['title'],
'formats': formats,
'description': info.get('description'),
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': info.get('duration'),
'thumbnail': info.get('image'),
'is_live': sources.get('live'),
}