From 2f1983572659415354c88743130a303af8188caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 Apr 2020 20:07:12 +0700 Subject: [PATCH] [thisoldhouse] Improve video id extraction (closes #24549) --- youtube_dl/extractor/thisoldhouse.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py index 33269705f..a3d9b4017 100644 --- a/youtube_dl/extractor/thisoldhouse.py +++ b/youtube_dl/extractor/thisoldhouse.py @@ -19,20 +19,6 @@ class ThisOldHouseIE(InfoExtractor): 'params': { 'skip_download': True, }, - }, { - 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project', - 'note': 'test for updated video URL', - 'info_dict': { - 'id': '5e2b70e95216cc0001615120', - 'ext': 'mp4', - 'title': 'E12 | The Westerly Project | Seaside Transformation', - 'description': 'Kevin and Tommy take the tour with the homeowners and Jeff. Norm presents his pine coffee table. Jenn gives Tommy the garden tour. Everyone meets at the flagpole to raise the flags.', - 'timestamp': 1579755600, - 'upload_date': '20200123', - }, - 'params': { - 'skip_download': True, - }, }, { 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', 'only_matching': True, @@ -45,6 +31,10 @@ class ThisOldHouseIE(InfoExtractor): }, { 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost', 'only_matching': True, + }, { + # iframe www.thisoldhouse.com + 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project', + 'only_matching': True, }] _ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe' @@ -52,6 +42,6 @@ def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r']+src=[\'"](?:https?:)?//(?:www\.|)thisoldhouse(?:\.chorus\.build|\.com)/videos/zype/([0-9a-f]{24})', + r']+src=[\'"](?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})', webpage, 'video id') return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)