Use None on missing required info_dict fields

This commit is contained in:
Filippo Valsorda 2012-11-27 23:15:33 +01:00
parent 03c5b0fbd4
commit f462df021a
2 changed files with 40 additions and 39 deletions

View File

@ -327,6 +327,7 @@ def prepare_filename(self, info_dict):
template_dict = dict(info_dict) template_dict = dict(info_dict)
template_dict['epoch'] = unicode(int(time.time())) template_dict['epoch'] = unicode(int(time.time()))
template_dict['autonumber'] = unicode('%05d' % self._num_downloads) template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
filename = self.params['outtmpl'] % template_dict filename = self.params['outtmpl'] % template_dict
return filename return filename
except (ValueError, KeyError), err: except (ValueError, KeyError), err:

View File

@ -380,7 +380,7 @@ def _real_extract(self, url):
video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
# upload date # upload date
upload_date = u'NA' upload_date = None
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL) mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
if mobj is not None: if mobj is not None:
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
@ -631,7 +631,7 @@ def _real_extract(self, url):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'), 'url': video_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'),
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
}] }]
@ -707,7 +707,7 @@ def _real_extract(self, url):
return return
video_title = unescapeHTML(mobj.group('title').decode('utf-8')) video_title = unescapeHTML(mobj.group('title').decode('utf-8'))
video_uploader = u'NA' video_uploader = None
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage) mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
if mobj is None: if mobj is None:
# lookin for official user # lookin for official user
@ -719,7 +719,7 @@ def _real_extract(self, url):
else: else:
video_uploader = mobj.group(1) video_uploader = mobj.group(1)
video_upload_date = u'NA' video_upload_date = None
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
if mobj is not None: if mobj is not None:
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
@ -820,8 +820,8 @@ def _real_extract(self, url):
return [{ return [{
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'), 'url': video_url.decode('utf-8'),
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
}] }]
@ -886,7 +886,7 @@ def _real_extract(self, url):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'), 'url': video_url.decode('utf-8'),
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
}] }]
@ -1026,7 +1026,7 @@ def _real_extract(self, url, new_video=True):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url, 'url': video_url,
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
'thumbnail': video_thumbnail.decode('utf-8'), 'thumbnail': video_thumbnail.decode('utf-8'),
@ -1098,7 +1098,7 @@ def _real_extract(self, url, new_video=True):
else: video_description = '' else: video_description = ''
# Extract upload date # Extract upload date
video_upload_date = u'NA' video_upload_date = None
mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage) mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage)
if mobj is not None: if mobj is not None:
video_upload_date = mobj.group(1) video_upload_date = mobj.group(1)
@ -1286,7 +1286,7 @@ def _real_extract(self, url):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'), 'url': video_url.decode('utf-8'),
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
}] }]
@ -1889,8 +1889,8 @@ def _real_extract(self, url):
return [{ return [{
'id': file_id.decode('utf-8'), 'id': file_id.decode('utf-8'),
'url': file_url.decode('utf-8'), 'url': file_url.decode('utf-8'),
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
'title': file_title, 'title': file_title,
'ext': file_extension.decode('utf-8'), 'ext': file_extension.decode('utf-8'),
}] }]
@ -2044,7 +2044,7 @@ def _real_extract(self, url):
video_thumbnail = video_info['thumbnail'] video_thumbnail = video_info['thumbnail']
# upload date # upload date
upload_date = u'NA' upload_date = None
if 'upload_date' in video_info: if 'upload_date' in video_info:
upload_time = video_info['upload_date'] upload_time = video_info['upload_date']
timetuple = email.utils.parsedate_tz(upload_time) timetuple = email.utils.parsedate_tz(upload_time)
@ -2142,8 +2142,8 @@ def _real_extract(self, url):
info = { info = {
'id': title, 'id': title,
'url': url, 'url': url,
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
'title': title, 'title': title,
'ext': ext, 'ext': ext,
'urlhandle': urlh 'urlhandle': urlh
@ -2244,8 +2244,8 @@ def _real_extract(self,url):
return [{ return [{
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': u'flv', 'ext': u'flv',
}] }]
@ -2501,7 +2501,7 @@ def _real_extract(self, url):
'id': videoId, 'id': videoId,
'url': videoUrl, 'url': videoUrl,
'uploader': showName, 'uploader': showName,
'upload_date': u'NA', 'upload_date': None,
'title': showName, 'title': showName,
'ext': 'flv', 'ext': 'flv',
'thumbnail': imgUrl, 'thumbnail': imgUrl,
@ -2550,8 +2550,8 @@ def _real_extract(self, url):
info = { info = {
'id': video_id, 'id': video_id,
'internal_id': internal_video_id, 'internal_id': internal_video_id,
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
} }
self.report_extraction(video_id) self.report_extraction(video_id)
@ -2636,8 +2636,8 @@ def _real_extract(self, url):
info = { info = {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': 'flv', 'ext': 'flv',
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
@ -2717,7 +2717,7 @@ def _real_extract(self, url):
description = mobj.group(1) description = mobj.group(1)
# upload date # upload date
upload_date = u'NA' upload_date = None
mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage) mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
if mobj: if mobj:
try: try:
@ -2798,8 +2798,8 @@ def _real_extract(self, url):
info = { info = {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': extension, # Extension is always(?) mp4, but seems to be flv 'ext': extension, # Extension is always(?) mp4, but seems to be flv
'thumbnail': None, 'thumbnail': None,
@ -2912,7 +2912,7 @@ def _real_extract(self, url):
'id': file_id.decode('utf-8'), 'id': file_id.decode('utf-8'),
'url': file_url.decode('utf-8'), 'url': file_url.decode('utf-8'),
'uploader': uploader.decode('utf-8'), 'uploader': uploader.decode('utf-8'),
'upload_date': u'NA', 'upload_date': None,
'title': json_data['name'], 'title': json_data['name'],
'ext': file_url.split('.')[-1].decode('utf-8'), 'ext': file_url.split('.')[-1].decode('utf-8'),
'format': (format_param is None and u'NA' or format_param.decode('utf-8')), 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
@ -2946,8 +2946,8 @@ def _real_extract(self, url):
video = mobj.group('video') video = mobj.group('video')
info = { info = {
'id': course + '_' + video, 'id': course + '_' + video,
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
} }
self.report_extraction(info['id']) self.report_extraction(info['id'])
@ -2972,8 +2972,8 @@ def _real_extract(self, url):
info = { info = {
'id': course, 'id': course,
'type': 'playlist', 'type': 'playlist',
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
} }
self.report_download_webpage(info['id']) self.report_download_webpage(info['id'])
@ -3010,8 +3010,8 @@ def _real_extract(self, url):
info = { info = {
'id': 'Stanford OpenClassroom', 'id': 'Stanford OpenClassroom',
'type': 'playlist', 'type': 'playlist',
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
} }
self.report_download_webpage(info['id']) self.report_download_webpage(info['id'])
@ -3120,7 +3120,7 @@ def _real_extract(self, url):
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'uploader': performer, 'uploader': performer,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': ext, 'ext': ext,
'format': format, 'format': format,
@ -3241,8 +3241,8 @@ def _real_extract(self, url):
info = { info = {
'id': '%s_part%02d' % (video_id, index), 'id': '%s_part%02d' % (video_id, index),
'url': download_url, 'url': download_url,
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': ext, 'ext': ext,
} }
@ -3305,8 +3305,8 @@ def _real_extract(self, url):
return [{ return [{
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'uploader': u'NA', 'uploader': None,
'upload_date': u'NA', 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': 'flv', 'ext': 'flv',
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
@ -3365,7 +3365,7 @@ def _real_extract(self, url):
return return
# Extract update date # Extract update date
upload_date = u'NA' upload_date = None
pattern = 'title="Timestamp">(.*?)</a>' pattern = 'title="Timestamp">(.*?)</a>'
mobj = re.search(pattern, webpage) mobj = re.search(pattern, webpage)
if mobj: if mobj:
@ -3376,7 +3376,7 @@ def _real_extract(self, url):
self.report_date(upload_date) self.report_date(upload_date)
# Extract uploader # Extract uploader
uploader = u'NA' uploader = None
pattern = r'rel\="author".*?>(.*?)</a>' pattern = r'rel\="author".*?>(.*?)</a>'
mobj = re.search(pattern, webpage) mobj = re.search(pattern, webpage)
if mobj: if mobj: