[pornhub:playlistbase] Skip videos from drop-down menu for all playlists (closes #12819, closes #13902)

pull/8/head
Sergey M․ 7 years ago
parent b3c6515365
commit 475bcb225f
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

@ -227,13 +227,20 @@ class PornHubIE(InfoExtractor):
class PornHubPlaylistBaseIE(InfoExtractor): class PornHubPlaylistBaseIE(InfoExtractor):
def _extract_entries(self, webpage): def _extract_entries(self, webpage):
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
# https://github.com/rg3/youtube-dl/issues/11594).
container = self._search_regex(
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
'container', default=webpage)
return [ return [
self.url_result( self.url_result(
'http://www.pornhub.com/%s' % video_url, 'http://www.pornhub.com/%s' % video_url,
PornHubIE.ie_key(), video_title=title) PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall( for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
webpage)) container))
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -241,14 +248,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
# Only process container div with main playlist content skipping entries = self._extract_entries(webpage)
# drop-down menu that uses similar pattern for videos (see
# https://github.com/rg3/youtube-dl/issues/11594).
container = self._search_regex(
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
'container', default=webpage)
entries = self._extract_entries(container)
playlist = self._parse_json( playlist = self._parse_json(
self._search_regex( self._search_regex(

Loading…
Cancel
Save