From b20d4f8626783ae61f5865a4d9aa3f460053c9a4 Mon Sep 17 00:00:00 2001 From: Kevin Ngo Date: Thu, 10 Nov 2011 01:04:33 -0800 Subject: [PATCH] changed spaces to tabs (by yt-dl standards), fixed bugs, but still won't download. need to figure out how the whole process works to integrate correctly --- youtube-dl | 93 +++++++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/youtube-dl b/youtube-dl index 48616015d..6eafc30b1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -3481,20 +3481,20 @@ class XVideosIE(InfoExtractor): self._downloader.trouble(u'\nERROR: unable to download ' + video_id) -class SoundcloudIE(InformationExtractor): +class SoundcloudIE(InfoExtractor): """Information extractor for soundcloud.com - To access the media, the uid of the song and a stream token - must be extracted from the page source and the script must make - a request to media.soundcloud.com/crossdomain.xml. Then - the media can be grabbed by requesting from an url composed - of the stream token and uid - """ + To access the media, the uid of the song and a stream token + must be extracted from the page source and the script must make + a request to media.soundcloud.com/crossdomain.xml. Then + the media can be grabbed by requesting from an url composed + of the stream token and uid + """ _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' IE_NAME = u'soundcloud' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) def report_webpage(self, video_id): """Report information extraction.""" @@ -3504,8 +3504,8 @@ class SoundcloudIE(InformationExtractor): """Report information extraction.""" self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) - def _real_initialize(self): - return + def _real_initialize(self): + return def _real_extract(self, url): htmlParser = HTMLParser.HTMLParser() @@ -3515,10 +3515,10 @@ class SoundcloudIE(InformationExtractor): self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - # extract uploader (which is in the url) - uploader = mobj.group(3).decode('utf-8') - # extract simple title (uploader + slug of song title) - slug_title = mobj.group(4).decode('utf-8') + # extract uploader (which is in the url) + uploader = mobj.group(1).decode('utf-8') + # extract simple title (uploader + slug of song title) + slug_title = mobj.group(2).decode('utf-8') simple_title = uploader + '-' + slug_title self.report_webpage('%s/%s' % (uploader, slug_title)) @@ -3532,32 +3532,36 @@ class SoundcloudIE(InformationExtractor): self.report_extraction('%s/%s' % (uploader, slug_title)) - # extract uid and access token - mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page) - if mobj: - video_id = match.group(1) - stream_token = match.group(2) - - # construct media url (with uid/token) to request song - mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s" - mediaURL = mediaURL % (video_id, stream_token) - - # description - description = u'No description available' - mobj = re.search('track-description-value">

(.*?)

', page) - if mobj: - description = mobj.group(1) - - # upload date - mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)", page) - if mobj: - try: - upload_date = datetime.datetime.strptime(match.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') - except: - pass - - try: - self._download.process_info({ + # extract uid and access token + mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', page) + if mobj: + video_id = match.group(1) + stream_token = match.group(2) + + # construct media url (with uid/token) to request song + mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s" + mediaURL = mediaURL % (video_id, stream_token) + + # description + description = u'No description available' + mobj = re.search('track-description-value">

(.*?)

', page) + if mobj: + description = mobj.group(1) + + # upload date + mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)", page) + if mobj: + try: + upload_date = datetime.datetime.strptime(match.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') + except: + pass + + # for soundcloud, a request must be made to a cross domain to establish + # needed cookies + request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) + + try: + self._downloader.process_info({ 'id': video_id, 'url': video_url, 'uploader': uploader, @@ -3567,8 +3571,10 @@ class SoundcloudIE(InformationExtractor): 'ext': u'mp3', 'format': u'NA', 'player_url': None, - 'description': description - }) + 'description': description + }) + except UnavailableVideoError: + self._downloader.trouble(u'\nERROR: unable to download video') class PostProcessor(object): """Post Processor class. @@ -3966,6 +3972,7 @@ def gen_extractors(): EscapistIE(), CollegeHumorIE(), XVideosIE(), + SoundcloudIE(), GenericIE() ]