From ce78943ae1be8cc24ec43f97dc67d34010ae08f7 Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Mon, 24 Feb 2014 23:30:09 +0700 Subject: [PATCH 1/4] [novamov] Generalize extractor --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/novamov.py | 42 ++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6dccd5ae7..8eff3df41 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -161,7 +161,7 @@ from .nhl import NHLIE, NHLVideocenterIE from .niconico import NiconicoIE from .ninegag import NineGagIE from .normalboots import NormalbootsIE -from .novamov import NovamovIE +from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE from .ooyala import OoyalaIE diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index 22a382457..fd310e219 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -9,14 +9,25 @@ from ..utils import ( ) -class NovamovIE(InfoExtractor): - _VALID_URL = r'http://(?:(?:www\.)?novamov\.com/video/|(?:(?:embed|www)\.)novamov\.com/embed\.php\?(?:.*?&)?v=)(?P[a-z\d]{13})' +class NovaMovIE(InfoExtractor): + IE_NAME = 'novamov' + IE_DESC = 'NovaMov' + + _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P[a-z\d]{13})' % {'host': 'novamov\.com'} + + _HOST = 'www.novamov.com' + + _FILE_DELETED_REGEX = r'This file no longer exists on our servers!' + _FILEKEY_REGEX = r'flashvars\.filekey="(?P[^"]+)";' + _TITLE_REGEX = r'(?s)
\s*

([^<]+)

' + _DESCRIPTION_REGEX = r'(?s)
\s*

[^<]+

([^<]+)

' _TEST = { 'url': 'http://www.novamov.com/video/4rurhn9x446jj', - 'file': '4rurhn9x446jj.flv', 'md5': '7205f346a52bbeba427603ba10d4b935', 'info_dict': { + 'id': '4rurhn9x446jj', + 'ext': 'flv', 'title': 'search engine optimization', 'description': 'search engine optimization is used to rank the web page in the google search engine' }, @@ -27,31 +38,26 @@ class NovamovIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') - page = self._download_webpage('http://www.novamov.com/video/%s' % video_id, - video_id, 'Downloading video page') + page = self._download_webpage( + 'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page') - if re.search(r'This file no longer exists on our servers!', page) is not None: + if re.search(self._FILE_DELETED_REGEX, page) is not None: raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) - filekey = self._search_regex( - r'flashvars\.filekey="(?P[^"]+)";', page, 'filekey') + filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey') - title = self._html_search_regex( - r'(?s)
\s*

([^<]+)

', - page, 'title', fatal=False) + title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False) - description = self._html_search_regex( - r'(?s)
\s*

[^<]+

([^<]+)

', - page, 'description', fatal=False) + description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False) api_response = self._download_webpage( - 'http://www.novamov.com/api/player.api.php?key=%s&file=%s' % (filekey, video_id), - video_id, 'Downloading video api response') + 'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id, + 'Downloading video api response') response = compat_urlparse.parse_qs(api_response) if 'error_msg' in response: - raise ExtractorError('novamov returned error: %s' % response['error_msg'][0], expected=True) + raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True) video_url = response['url'][0] @@ -60,4 +66,4 @@ class NovamovIE(InfoExtractor): 'url': video_url, 'title': title, 'description': description - } + } \ No newline at end of file From f1c9dfcc010611adf145f74d86047b7387b62025 Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Mon, 24 Feb 2014 23:30:58 +0700 Subject: [PATCH 2/4] [nowvideo] Rewrite based on novamov extractor --- youtube_dl/extractor/nowvideo.py | 60 +++++++++++--------------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py index 168ca8b9f..dd665874d 100644 --- a/youtube_dl/extractor/nowvideo.py +++ b/youtube_dl/extractor/nowvideo.py @@ -1,46 +1,28 @@ -import re +from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import compat_urlparse +from .novamov import NovaMovIE -class NowVideoIE(InfoExtractor): - _VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.(?:ch|sx)/video/(?P\w+)' - _TEST = { - u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa', - u'file': u'0mw0yow7b6dxa.flv', - u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817', - u'info_dict': { - u"title": u"youtubedl test video _BaW_jenozKc.mp4" - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - webpage_url = 'http://www.nowvideo.ch/video/' + video_id - embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id - webpage = self._download_webpage(webpage_url, video_id) - embed_page = self._download_webpage(embed_url, video_id, - u'Downloading embed page') +class NowVideoIE(NovaMovIE): + IE_NAME = 'nowvideo' + IE_DESC = 'NowVideo' - self.report_extraction(video_id) + _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'} - video_title = self._html_search_regex(r'

(.*)

', - webpage, u'video title') + _HOST = 'www.nowvideo.ch' - video_key = self._search_regex(r'var fkzd="(.*)";', - embed_page, u'video key') + _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' + _FILEKEY_REGEX = r'var fkzd="([^"]+)";' + _TITLE_REGEX = r'

([^<]+)

' + _DESCRIPTION_REGEX = r'\s*

([^<]+)

' - api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key) - api_response = self._download_webpage(api_call, video_id, - u'Downloading API page') - video_url = compat_urlparse.parse_qs(api_response)[u'url'][0] - - return [{ - 'id': video_id, - 'url': video_url, - 'ext': 'flv', - 'title': video_title, - }] + _TEST = { + 'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa', + 'md5': 'f8fbbc8add72bd95b7850c6a02fc8817', + 'info_dict': { + 'id': '0mw0yow7b6dxa', + 'ext': 'flv', + 'title': 'youtubedl test video _BaW_jenozKc.mp4', + 'description': 'Description', + } + } \ No newline at end of file From b1c6c32f785d2e0ec943da0bc88609bcd4409ae3 Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Mon, 24 Feb 2014 23:37:42 +0700 Subject: [PATCH 3/4] [generic] Add support for nowvideo embedded videos --- youtube_dl/extractor/generic.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9a2e54d14..22d1b5daa 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -363,11 +363,17 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group(1), 'Mpora') - # Look for embedded Novamov player + # Look for embedded NovaMov player mobj = re.search( r']+?src=(["\'])(?Phttp://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage) if mobj is not None: - return self.url_result(mobj.group('url'), 'Novamov') + return self.url_result(mobj.group('url'), 'NovaMov') + + # Look for embedded NowVideo player + mobj = re.search( + r']+?src=(["\'])(?Phttp://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'NowVideo') # Look for embedded Facebook player mobj = re.search( From 2bfe4ead4ba13e1fed69e70ef49cec9dfa7f84a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 24 Feb 2014 22:01:34 +0100 Subject: [PATCH 4/4] [veoh] Allow to download videos with age protection (fixes #2455) --- youtube_dl/extractor/veoh.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index baa57f343..c90feefd2 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -4,6 +4,7 @@ import re import json from .common import InfoExtractor +from ..utils import compat_urllib_request class VeohIE(InfoExtractor): @@ -24,6 +25,13 @@ class VeohIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) + age_limit = 0 + if 'class="adultwarning-container"' in webpage: + self.report_age_confirmation() + age_limit = 18 + request = compat_urllib_request.Request(url) + request.add_header('Cookie', 'confirmedAdult=true') + webpage = self._download_webpage(request, video_id) m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage) if m_youtube is not None: @@ -44,4 +52,5 @@ class VeohIE(InfoExtractor): 'thumbnail': info.get('highResImage') or info.get('medResImage'), 'description': info['description'], 'view_count': info['views'], + 'age_limit': age_limit, }