[facebook] Recognize #! URLs (Fixes #1988)

This commit is contained in:
Philipp Hagemeister 2013-12-16 21:10:06 +01:00
parent 7ebc9dee69
commit 8c8e3eec79
2 changed files with 7 additions and 3 deletions

View File

@ -10,6 +10,7 @@
from test.helper import get_testcases from test.helper import get_testcases
from youtube_dl.extractor import ( from youtube_dl.extractor import (
FacebookIE,
gen_extractors, gen_extractors,
JustinTVIE, JustinTVIE,
YoutubeIE, YoutubeIE,
@ -87,12 +88,15 @@ def test_youtube_extract(self):
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
assertExtractId('BaW_jenozKc', 'BaW_jenozKc') assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
def test_no_duplicates(self): def test_no_duplicates(self):
ies = gen_extractors() ies = gen_extractors()
for tc in get_testcases(): for tc in get_testcases():
url = tc['url'] url = tc['url']
for ie in ies: for ie in ies:
if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']: if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
else: else:
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))

View File

@ -17,7 +17,7 @@
class FacebookIE(InfoExtractor): class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook""" """Information Extractor for Facebook"""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
_NETRC_MACHINE = 'facebook' _NETRC_MACHINE = 'facebook'