yt-dlp/youtube_dl/extractor/douyutv.py
Yen Chi Hsuan f4c68ba372
[douyu] Fix extraction and update _TESTS
They've switched from flv to hls

Closes #12301
2017-02-28 21:41:03 +08:00

109 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unescapeHTML,
)
class DouyuTVIE(InfoExtractor):
IE_DESC = '斗鱼'
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'http://www.douyutv.com/iseven',
'info_dict': {
'id': '17732',
'display_id': 'iseven',
'ext': 'mp4',
'title': 're:^清晨醒脑T-ARA根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r're:.*m7show@163\.com.*',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '7师傅',
'is_live': True,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.douyutv.com/85982',
'info_dict': {
'id': '85982',
'display_id': '85982',
'ext': 'mp4',
'title': 're:^小漠从零单排记——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'douyu小漠',
'is_live': True,
},
'params': {
'skip_download': True,
},
'skip': 'Room not found',
}, {
'url': 'http://www.douyutv.com/17732',
'info_dict': {
'id': '17732',
'display_id': '17732',
'ext': 'mp4',
'title': 're:^清晨醒脑T-ARA根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': r're:.*m7show@163\.com.*',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '7师傅',
'is_live': True,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.douyu.com/xiaocang',
'only_matching': True,
}, {
# \"room_id\"
'url': 'http://www.douyu.com/t/lpl',
'only_matching': True,
}]
# Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
# is encrypted originally, but ffdec can dump memory to get the decrypted one.
_API_KEY = 'A12Svb&%1UUmf@hC'
def _real_extract(self, url):
video_id = self._match_id(url)
if video_id.isdigit():
room_id = video_id
else:
page = self._download_webpage(url, video_id)
room_id = self._html_search_regex(
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
room = self._download_json(
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
note='Downloading room info')['data']
# 1 = live, 2 = offline
if room.get('show_status') == '2':
raise ExtractorError('Live stream is offline', expected=True)
formats = self._extract_m3u8_formats(
room['hls_url'], video_id, ext='mp4')
title = self._live_title(unescapeHTML(room['room_name']))
description = room.get('show_details')
thumbnail = room.get('room_src')
uploader = room.get('nickname')
return {
'id': room_id,
'display_id': video_id,
'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'is_live': True,
}