[facebook] Recognize #! URLs (Fixes #1988)

2024-11-15 18:12:58 +00:00 · 2013-12-16 21:10:06 +01:00 · 2013-12-16 21:10:06 +01:00 · 8c8e3eec79
commit 8c8e3eec79
parent 7ebc9dee69
2 changed files with 7 additions and 3 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -10,6 +10,7 @@
 from test.helper import get_testcases
 from youtube_dl.extractor import (
    FacebookIE,
    gen_extractors,
    JustinTVIE,
    YoutubeIE,
@ -87,12 +88,15 @@ def test_youtube_extract(self):
        assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
    def test_facebook_matching(self):
        self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
    def test_no_duplicates(self):
        ies = gen_extractors()
        for tc in get_testcases():
            url = tc['url']
            for ie in ies:
-                if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
+                if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
                    self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
                else:
                    self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -17,7 +17,7 @@
 class FacebookIE(InfoExtractor):
    """Information Extractor for Facebook"""
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
    _NETRC_MACHINE = 'facebook'