[allocine] add extractor for allocine.fr (fixes #3189)

This commit is contained in:
Petr Půlpán 2014-07-05 14:42:26 +02:00
parent ba4133c9eb
commit 49cbe7c8e3
2 changed files with 90 additions and 0 deletions

View File

@ -3,6 +3,7 @@ from .addanime import AddAnimeIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE

View File

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_str,
qualities,
determine_ext,
)
class AllocineIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
_TESTS = [{
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
'md5': '0c9fcf59a841f65635fa300ac43d8269',
'info_dict': {
'id': '19546517',
'ext': 'mp4',
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
'info_dict': {
'id': '19540403',
'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF',
'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
'md5': '101250fb127ef9ca3d73186ff22a47ce',
'info_dict': {
'id': '19544709',
'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:e74a4dc750894bac300ece46c7036490',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
typ = mobj.group('typ')
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
if typ == 'film':
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
else:
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
player_data = json.loads(player)
video_id = compat_str(player_data['refMedia'])
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
video = xml.find('.//AcVisionVideo').attrib
quality = qualities(['ld', 'md', 'hd'])
formats = []
for k, v in video.items():
if re.match(r'.+_path', k):
format_id = k.split('_')[0]
formats.append({
'format_id': format_id,
'quality': quality(format_id),
'url': v,
'ext': determine_ext(v),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video['videoTitle'],
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
}