From a7b899f87290c42a804c22d5e36dfd3a69fb671f Mon Sep 17 00:00:00 2001 From: nixxo Date: Tue, 27 Dec 2022 21:10:45 +0100 Subject: [PATCH 1/5] [extractor/common] Added ability to force CODECS for malformed m3u8 manifest - fixes #887 A+V stream getting recognized as only V - fixes #2918 and https://github.com/ytdl-org/youtube-dl/issues/27830 only V stream getting recognized as A+V - added tests in test_InfoExtractor with the 2 manifest from the issues --- test/test_InfoExtractor.py | 175 +++++++++++++++++- ...-d8a1-49c0-876c-f20a2d56d4c1-playlist.m3u8 | 12 ++ ...-d6f0-4e53-843e-23565b24cd82-playlist.m3u8 | 14 ++ yt_dlp/extractor/common.py | 17 +- 4 files changed, 209 insertions(+), 9 deletions(-) create mode 100644 test/testdata/m3u8/rai-383bca47-d8a1-49c0-876c-f20a2d56d4c1-playlist.m3u8 create mode 100644 test/testdata/m3u8/rai-livestream-48cc9aec-d6f0-4e53-843e-23565b24cd82-playlist.m3u8 diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 683ead315..fda4d3430 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -864,6 +864,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'height': 1080, 'vcodec': 'avc1.64002a', }], + {}, {} ), ( @@ -1031,14 +1032,182 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'ext': 'vtt', 'protocol': 'm3u8_native' }], - } + }, + {} + ), + ( + 'rai-383bca47-d8a1-49c0-876c-f20a2d56d4c1-playlist', + 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/playlist.m3u8?auth=daEaKdPbObBcsaIbabWdqd2adavcddNdsaK-bJQVtj-c0-GmsvrFu&aifp=V001', + [{ + 'format_id': 'aac-Audiodescrizione', + 'format_note': 'Audiodescrizione', + 'url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/chunklist_b192400_ao_slAudiodescrizione_t64QXVkaW9kZXNjcml6aW9uZV9hdWRpbw==.m3u8', + 'manifest_url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/playlist.m3u8?auth=daEaKdPbObBcsaIbabWdqd2adavcddNdsaK-bJQVtj-c0-GmsvrFu&aifp=V001', + 'language': 'Audiodescrizione', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'vcodec': 'none', + 'audio_ext': 'mp4', + 'video_ext': 'none', + }, { + 'format_id': 'aac-Italiano', + 'format_note': 'Italiano', + 'url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/chunklist_b192400_ao_slItaliano_t64SXRhbGlhbm9fYXVkaW8=.m3u8', + 'manifest_url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/playlist.m3u8?auth=daEaKdPbObBcsaIbabWdqd2adavcddNdsaK-bJQVtj-c0-GmsvrFu&aifp=V001', + 'language': 'Italiano', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'vcodec': 'none', + 'audio_ext': 'mp4', + 'video_ext': 'none', + }, { + 'format_id': '1800', + 'url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/chunklist_b1758000_vo_slita_t64MTgwMA==.m3u8', + 'manifest_url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/playlist.m3u8?auth=daEaKdPbObBcsaIbabWdqd2adavcddNdsaK-bJQVtj-c0-GmsvrFu&aifp=V001', + 'tbr': 1758.0, + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1024, + 'height': 576, + 'vcodec': 'avc1', + 'acodec': 'none', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 1758.0, + }, { + 'format_id': '2400', + 'url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/chunklist_b2344000_vo_slita_t64MjQwMA==.m3u8', + 'manifest_url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/playlist.m3u8?auth=daEaKdPbObBcsaIbabWdqd2adavcddNdsaK-bJQVtj-c0-GmsvrFu&aifp=V001', + 'tbr': 2344.0, + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1280, + 'height': 720, + 'vcodec': 'avc1', + 'acodec': 'none', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 2344.0, + }, { + 'format_id': '3600', + 'url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/chunklist_b3516000_vo_slita_t64MzYwMA==.m3u8', + 'manifest_url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/playlist.m3u8?auth=daEaKdPbObBcsaIbabWdqd2adavcddNdsaK-bJQVtj-c0-GmsvrFu&aifp=V001', + 'tbr': 3516.0, + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1440, + 'height': 810, + 'vcodec': 'avc1', + 'acodec': 'none', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 3516.0, + }, { + 'format_id': '5000', + 'url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/chunklist_b5210000_vo_slita_t64NTAwMA==.m3u8', + 'manifest_url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/playlist.m3u8?auth=daEaKdPbObBcsaIbabWdqd2adavcddNdsaK-bJQVtj-c0-GmsvrFu&aifp=V001', + 'tbr': 5210.0, + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1', + 'acodec': 'none', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 5210.0, + }], + {}, + {'acodec': 'mp4a', 'vcodec': 'avc1'}, + ), + ( + 'rai-livestream-48cc9aec-d6f0-4e53-843e-23565b24cd82-playlist', + 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/playlist_ma.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672352713&tof=86400&tk2=ecc6060eb5e4fc27a07f695c90fccfc9cb1ced2bac8f5d9b7442e884912b63c3', + [{ + 'format_id': 'aac-Audiodescrizione', + 'format_note': 'Audiodescrizione', + 'url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/desrai1_160/chunklist_ao.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56', + 'manifest_url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/playlist_ma.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672352713&tof=86400&tk2=ecc6060eb5e4fc27a07f695c90fccfc9cb1ced2bac8f5d9b7442e884912b63c3', + 'language': 'des', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'vcodec': 'none', + 'audio_ext': 'mp4', + 'video_ext': 'none', + }, { + 'format_id': 'aac-Italiano', + 'format_note': 'Italiano', + 'url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/itarai1_160/chunklist_ao.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56', + 'manifest_url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/playlist_ma.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672352713&tof=86400&tk2=ecc6060eb5e4fc27a07f695c90fccfc9cb1ced2bac8f5d9b7442e884912b63c3', + 'language': 'ita', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'vcodec': 'none', + 'audio_ext': 'mp4', + 'video_ext': 'none', + }, { + 'format_id': 'aac-Lingua Originale', + 'format_note': 'Lingua Originale', + 'url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/engrai1_160/chunklist_ao.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56', + 'manifest_url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/playlist_ma.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672352713&tof=86400&tk2=ecc6060eb5e4fc27a07f695c90fccfc9cb1ced2bac8f5d9b7442e884912b63c3', + 'language': 'V.O', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'vcodec': 'none', + 'audio_ext': 'mp4', + 'video_ext': 'none', + }, { + 'format_id': '1365', + 'url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/rai1_1200/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56', + 'manifest_url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/playlist_ma.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672352713&tof=86400&tk2=ecc6060eb5e4fc27a07f695c90fccfc9cb1ced2bac8f5d9b7442e884912b63c3', + 'tbr': 1365.331, + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 768, + 'height': 432, + 'vcodec': 'avc1.77.31', + 'acodec': 'mp4a.40.2', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 1365.331, + }, { + 'format_id': '2137', + 'url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/rai1_1800/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56', + 'manifest_url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/playlist_ma.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672352713&tof=86400&tk2=ecc6060eb5e4fc27a07f695c90fccfc9cb1ced2bac8f5d9b7442e884912b63c3', + 'tbr': 2137.033, + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1024, + 'height': 576, + 'vcodec': 'avc1.77.31', + 'acodec': 'mp4a.40.2', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 2137.033, + }, { + 'format_id': '2793', + 'url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/rai1_2400/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56', + 'manifest_url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/playlist_ma.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672352713&tof=86400&tk2=ecc6060eb5e4fc27a07f695c90fccfc9cb1ced2bac8f5d9b7442e884912b63c3', + 'tbr': 2793.078, + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1280, + 'height': 720, + 'vcodec': 'avc1.77.41', + 'acodec': 'mp4a.40.2', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 2793.078, + }], + {}, + {'acodec': 'mp4a', 'vcodec': 'avc1'}, ), ] - for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES: + for m3u8_file, m3u8_url, expected_formats, expected_subs, codecs in _TEST_CASES: with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f: formats, subs = self.ie._parse_m3u8_formats_and_subtitles( - f.read(), m3u8_url, ext='mp4') + f.read(), m3u8_url, ext='mp4', force_codecs=codecs) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subs, expected_subs, None) diff --git a/test/testdata/m3u8/rai-383bca47-d8a1-49c0-876c-f20a2d56d4c1-playlist.m3u8 b/test/testdata/m3u8/rai-383bca47-d8a1-49c0-876c-f20a2d56d4c1-playlist.m3u8 new file mode 100644 index 000000000..c70e644a1 --- /dev/null +++ b/test/testdata/m3u8/rai-383bca47-d8a1-49c0-876c-f20a2d56d4c1-playlist.m3u8 @@ -0,0 +1,12 @@ +#EXTM3U +#EXT-X-VERSION:3 +#EXT-X-STREAM-INF:BANDWIDTH=1758000,NAME="1800",RESOLUTION=1024x576,AUDIO="aac" +chunklist_b1758000_vo_slita_t64MTgwMA==.m3u8 +#EXT-X-STREAM-INF:BANDWIDTH=2344000,NAME="2400",RESOLUTION=1280x720,AUDIO="aac" +chunklist_b2344000_vo_slita_t64MjQwMA==.m3u8 +#EXT-X-STREAM-INF:BANDWIDTH=3516000,NAME="3600",RESOLUTION=1440x810,AUDIO="aac" +chunklist_b3516000_vo_slita_t64MzYwMA==.m3u8 +#EXT-X-STREAM-INF:BANDWIDTH=5210000,NAME="5000",RESOLUTION=1920x1080,AUDIO="aac" +chunklist_b5210000_vo_slita_t64NTAwMA==.m3u8 +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="Italiano",NAME="Italiano",DEFAULT=YES,AUTOSELECT=YES,URI="chunklist_b192400_ao_slItaliano_t64SXRhbGlhbm9fYXVkaW8=.m3u8" +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="Audiodescrizione",NAME="Audiodescrizione",DEFAULT=NO,AUTOSELECT=YES,URI="chunklist_b192400_ao_slAudiodescrizione_t64QXVkaW9kZXNjcml6aW9uZV9hdWRpbw==.m3u8" diff --git a/test/testdata/m3u8/rai-livestream-48cc9aec-d6f0-4e53-843e-23565b24cd82-playlist.m3u8 b/test/testdata/m3u8/rai-livestream-48cc9aec-d6f0-4e53-843e-23565b24cd82-playlist.m3u8 new file mode 100644 index 000000000..ad94c2a49 --- /dev/null +++ b/test/testdata/m3u8/rai-livestream-48cc9aec-d6f0-4e53-843e-23565b24cd82-playlist.m3u8 @@ -0,0 +1,14 @@ +#EXTM3U +#EXT-X-VERSION:3 +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="ita",NAME="Italiano",DEFAULT=YES,AUTOSELECT=YES,URI="itarai1_160/chunklist_ao.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56" +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="V.O",NAME="Lingua Originale",DEFAULT=NO,AUTOSELECT=YES,URI="engrai1_160/chunklist_ao.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56" +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="des",NAME="Audiodescrizione",DEFAULT=NO,AUTOSELECT=YES,URI="desrai1_160/chunklist_ao.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56" +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2793078,CODECS="avc1.77.41,mp4a.40.2",RESOLUTION=1280x720,AUDIO="aac" +rai1_2400/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56 +#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2793078,CODECS="avc1.77.42",URI="rai1_2400/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56" +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2137033,CODECS="avc1.77.31,mp4a.40.2",RESOLUTION=1024x576,AUDIO="aac" +rai1_1800/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56 +#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2137033,CODECS="avc1.77.42",URI="rai1_1800/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56" +#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1365331,CODECS="avc1.77.31,mp4a.40.2",RESOLUTION=768x432,AUDIO="aac" +rai1_1200/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56 +#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1365331,CODECS="avc1.77.31,mp4a.40.2",URI="rai1_1200/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56" diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 9031f3c11..5f0fd93b1 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1909,7 +1909,7 @@ class InfoExtractor: self, m3u8_url, video_id, ext=None, entry_protocol='m3u8_native', preference=None, quality=None, m3u8_id=None, note=None, errnote=None, fatal=True, live=False, data=None, headers={}, - query={}): + query={}, force_codecs={}): if self.get_param('ignore_no_formats_error'): fatal = False @@ -1938,13 +1938,13 @@ class InfoExtractor: m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, preference=preference, quality=quality, m3u8_id=m3u8_id, note=note, errnote=errnote, fatal=fatal, live=live, data=data, - headers=headers, query=query, video_id=video_id) + headers=headers, query=query, video_id=video_id, force_codecs=force_codecs) def _parse_m3u8_formats_and_subtitles( self, m3u8_doc, m3u8_url=None, ext=None, entry_protocol='m3u8_native', preference=None, quality=None, m3u8_id=None, live=False, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, - video_id=None): + video_id=None, force_codecs={}): formats, subtitles = [], {} has_drm = re.search('|'.join([ @@ -2125,7 +2125,6 @@ class InfoExtractor: 'abr': abr, }) codecs = parse_codecs(last_stream_inf.get('CODECS')) - f.update(codecs) audio_group_id = last_stream_inf.get('AUDIO') # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which # references a rendition group MUST have a CODECS attribute. @@ -2136,12 +2135,18 @@ class InfoExtractor: # (with audio and video) format. So, for such cases we will # ignore references to rendition groups and treat them # as complete formats. - if audio_group_id and codecs and f.get('vcodec') != 'none': + + def xor(x, y): + return bool((x and not y) or (not x and y)) + + if audio_group_id and xor(codecs, force_codecs) and f.get('vcodec') != 'none' and force_codecs.get('vcodec') != 'none': audio_group = groups.get(audio_group_id) if audio_group and audio_group[0].get('URI'): # TODO: update acodec for audio only formats with # the same GROUP-ID - f['acodec'] = 'none' + codecs = codecs or force_codecs + codecs['acodec'] = 'none' + f.update(codecs) if not f.get('ext'): f['ext'] = 'm4a' if f.get('vcodec') == 'none' else 'mp4' formats.append(f) From 156a738f2c93698adf4b24fbe05523e5351a4799 Mon Sep 17 00:00:00 2001 From: nixxo Date: Tue, 3 Jan 2023 21:04:06 +0100 Subject: [PATCH 2/5] [extractor/common] improvements - implemented "update acodec for audio only formats with the same GROUP-ID" (removed TODO) - set stream type based on the manifest url - improved force_codecs behaviour --- yt_dlp/extractor/common.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5f0fd93b1..1795fdda6 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2125,6 +2125,8 @@ class InfoExtractor: 'abr': abr, }) codecs = parse_codecs(last_stream_inf.get('CODECS')) + for k, v in force_codecs.items(): + codecs.setdefault(k, v) audio_group_id = last_stream_inf.get('AUDIO') # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which # references a rendition group MUST have a CODECS attribute. @@ -2136,16 +2138,32 @@ class InfoExtractor: # ignore references to rendition groups and treat them # as complete formats. - def xor(x, y): - return bool((x and not y) or (not x and y)) + # updates acodec for audio only formats with + # the same GROUP-ID + for f_id in [join_nonempty(m3u8_id, audio_group_id, ag.get('NAME')) for ag in groups.get(audio_group_id) or []]: + for fmt in formats: + if fmt['format_id'].startswith(f_id) and fmt.get('vcodec') == 'none': + fmt['acodec'] = codecs.get('acodec') + break - if audio_group_id and xor(codecs, force_codecs) and f.get('vcodec') != 'none' and force_codecs.get('vcodec') != 'none': + fixed = False + # Trying to set Video-only (and Audio-only???) streams based on the manifest url (be very strict to avoid issues). Eg: + # .../chunklist_b5210000_vo_... + # .../v9/prog_index.m3u8... + # .../index-f14-v1.m3u8?... + if re.search(r'chunklist\w*?_vo_|/v\d{1,2}/\w*?index\.m3u8|index-f\d{1,2}-v\d{1,2}\.m3u8', f['url']): + codecs['acodec'] = 'none' + fixed = True + # Audio only: very likely not necessary + elif re.search(r'chunklist\w*?_ao_|/a\d{1,2}/\w*?index\.m3u8|index-f\d{1,2}-a\d{1,2}\.m3u8', f['url']): + codecs['vcodec'] = 'none' + fixed = True + + if audio_group_id and codecs and not force_codecs and f.get('vcodec') != 'none' and not fixed: audio_group = groups.get(audio_group_id) if audio_group and audio_group[0].get('URI'): - # TODO: update acodec for audio only formats with - # the same GROUP-ID - codecs = codecs or force_codecs codecs['acodec'] = 'none' + f.update(codecs) if not f.get('ext'): f['ext'] = 'm4a' if f.get('vcodec') == 'none' else 'mp4' From d0e7a64e92c349bd2abfeda07e17a23479e2efd5 Mon Sep 17 00:00:00 2001 From: nixxo Date: Tue, 3 Jan 2023 21:25:53 +0100 Subject: [PATCH 3/5] fixed test_InfoExtractor --- test/test_InfoExtractor.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index fda4d3430..242a26020 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -624,24 +624,24 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'img_bipbop_adv_example_fmp4', 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', [{ - 'format_id': 'aud1-English', - 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8', + 'format_id': 'aud2-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', }, { - 'format_id': 'aud2-English', - 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8', + 'format_id': 'aud3-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', }, { - 'format_id': 'aud3-English', - 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8', + 'format_id': 'aud1-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', @@ -1047,6 +1047,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'ext': 'mp4', 'protocol': 'm3u8_native', 'vcodec': 'none', + 'acodec': 'mp4a', 'audio_ext': 'mp4', 'video_ext': 'none', }, { @@ -1058,6 +1059,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'ext': 'mp4', 'protocol': 'm3u8_native', 'vcodec': 'none', + 'acodec': 'mp4a', 'audio_ext': 'mp4', 'video_ext': 'none', }, { @@ -1132,6 +1134,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'ext': 'mp4', 'protocol': 'm3u8_native', 'vcodec': 'none', + 'acodec': 'mp4a.40.2', 'audio_ext': 'mp4', 'video_ext': 'none', }, { @@ -1143,6 +1146,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'ext': 'mp4', 'protocol': 'm3u8_native', 'vcodec': 'none', + 'acodec': 'mp4a.40.2', 'audio_ext': 'mp4', 'video_ext': 'none', }, { @@ -1154,6 +1158,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'ext': 'mp4', 'protocol': 'm3u8_native', 'vcodec': 'none', + 'acodec': 'mp4a.40.2', 'audio_ext': 'mp4', 'video_ext': 'none', }, { From 26a3171a6cb63f70fa2fdf3c77d6cff75e6d609c Mon Sep 17 00:00:00 2001 From: nixxo Date: Tue, 10 Jan 2023 22:52:17 +0100 Subject: [PATCH 4/5] removed Audio-only checks as not needed --- yt_dlp/extractor/common.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1795fdda6..57fa5f0c3 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2146,20 +2146,14 @@ class InfoExtractor: fmt['acodec'] = codecs.get('acodec') break - fixed = False - # Trying to set Video-only (and Audio-only???) streams based on the manifest url (be very strict to avoid issues). Eg: + # Trying to set Video-only streams based on the manifest url (be very strict to avoid issues). Eg: # .../chunklist_b5210000_vo_... # .../v9/prog_index.m3u8... # .../index-f14-v1.m3u8?... - if re.search(r'chunklist\w*?_vo_|/v\d{1,2}/\w*?index\.m3u8|index-f\d{1,2}-v\d{1,2}\.m3u8', f['url']): + if re.search(r'chunklist\w*?_vo_|/v\d{1,2}/\w*?index\.m3u8|index(?:-f\d{1,2})?-v\d{1,2}\.m3u8', f['url']): codecs['acodec'] = 'none' - fixed = True - # Audio only: very likely not necessary - elif re.search(r'chunklist\w*?_ao_|/a\d{1,2}/\w*?index\.m3u8|index-f\d{1,2}-a\d{1,2}\.m3u8', f['url']): - codecs['vcodec'] = 'none' - fixed = True - if audio_group_id and codecs and not force_codecs and f.get('vcodec') != 'none' and not fixed: + if audio_group_id and codecs and not force_codecs and f.get('vcodec') != 'none': audio_group = groups.get(audio_group_id) if audio_group and audio_group[0].get('URI'): codecs['acodec'] = 'none' From 9b00443198397ea922a5c7deb44d49522295b1db Mon Sep 17 00:00:00 2001 From: bashonly Date: Fri, 1 Mar 2024 10:35:49 -0600 Subject: [PATCH 5/5] [test:InfoExtractor] Fix `test_parse_m3u8_formats` tests --- test/test_InfoExtractor.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 80b2c27d6..b17ba46c9 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1081,7 +1081,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'acodec': 'none', 'video_ext': 'mp4', 'audio_ext': 'none', - 'vbr': 2344.0, }, { 'format_id': '3600', 'url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/chunklist_b3516000_vo_slita_t64MzYwMA==.m3u8', @@ -1095,7 +1094,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'acodec': 'none', 'video_ext': 'mp4', 'audio_ext': 'none', - 'vbr': 3516.0, }, { 'format_id': '5000', 'url': 'https://b70cb04c54ab478189e9d8ee45637b13.msvdn.net/ostr8/podcastcdn/teche_root/YT_ITALIA_TECHE_HD_multiaudio/13834457_,1200,1800,2400,3600,5000/chunklist_b5210000_vo_slita_t64NTAwMA==.m3u8', @@ -1109,7 +1107,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'acodec': 'none', 'video_ext': 'mp4', 'audio_ext': 'none', - 'vbr': 5210.0, }], {}, {'acodec': 'mp4a', 'vcodec': 'avc1'}, @@ -1166,7 +1163,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', - 'vbr': 1365.331, }, { 'format_id': '2137', 'url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/rai1_1800/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56', @@ -1180,7 +1176,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', - 'vbr': 2137.033, }, { 'format_id': '2793', 'url': 'https://streamcdng18-8e7439fdb1694c8da3a0fd63e4dda518.msvdn.net/raiuno1/hls/rai1_2400/chunklist.m3u8?baseuri=%2Fraiuno1%2Fhls%2F&tstart=0&tend=1672438656&tk2=2b4cb4fd233734fe415b44d8bdb422941028f6314c0aa7088d8fed35c72edf56', @@ -1194,7 +1189,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', - 'vbr': 2793.078, }], {}, {'acodec': 'mp4a', 'vcodec': 'avc1'},