[dailymotion] Fix playlist extraction

The html code has changed, make the video ids extraction more solid.
This commit is contained in:
Jaime Marquínez Ferrándiz 2013-10-04 14:07:29 +02:00
parent 46e28a84ca
commit c3fef636b5

View File

@ -10,6 +10,7 @@
compat_str, compat_str,
get_element_by_attribute, get_element_by_attribute,
get_element_by_id, get_element_by_id,
orderedSet,
ExtractorError, ExtractorError,
) )
@ -158,12 +159,12 @@ def _extract_entries(self, id):
id, u'Downloading page %s' % pagenum) id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break break
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in video_ids] for video_id in orderedSet(video_ids)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)