[ie/common] Support ranges in MPD

pull/8711/head
Sergey Nikolaev 6 months ago
parent fc2cc626f0
commit 484b39111d

@ -1205,6 +1205,20 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'height': 1080, 'height': 1080,
}], }],
{}, {},
), (
# https://github.com/yt-dlp/yt-dlp/pull/8711
'urls_with_ranges',
'http://unknown/manifest.mpd', # mpd_url
None, # mpd_base_url
[{
'fragments': [
{'byte_range': {'start': 0, 'end': 200000}},
{'byte_range': {'start': 200001, 'end': 300000}},
{'byte_range': {'start': 300000, 'end': 400000}},
{'byte_range': None},
]
}],
{},
), ( ), (
# https://github.com/ytdl-org/youtube-dl/issues/20346 # https://github.com/ytdl-org/youtube-dl/issues/20346
# Media considered unfragmented even though it contains # Media considered unfragmented even though it contains

@ -0,0 +1,18 @@
<?xml version="1.0" ?>
<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
<Period duration="PT0H4M1.728S">
<AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true">
<ContentComponent contentType="video" id="1"/>
<Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s" range="0-999"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s" mediaRange="1000-99999"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s" mediaRange="100000-199999"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s" mediaRange="200001-299999"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s" mediaRange="300000-399999"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/>
</SegmentList>
</Representation>
</AdaptationSet>
</Period>
</MPD>

@ -87,4 +87,5 @@ class DashSegmentsFD(FragmentFD):
'fragment_count': fragment.get('fragment_count'), 'fragment_count': fragment.get('fragment_count'),
'index': i, 'index': i,
'url': fragment_url, 'url': fragment_url,
'byte_range': fragment.get('byte_range'),
} }

@ -2614,10 +2614,20 @@ class InfoExtractor:
if segment_duration: if segment_duration:
ms_info['segment_duration'] = float(segment_duration) ms_info['segment_duration'] = float(segment_duration)
def parse_range(byte_range):
if isinstance(byte_range, str):
split_byte_range = byte_range.split('-')
if len(split_byte_range) == 2:
return {
'start': int(split_byte_range[0]),
'end': int(split_byte_range[1]) + 1,
}
def extract_Initialization(source): def extract_Initialization(source):
initialization = source.find(_add_ns('Initialization')) initialization = source.find(_add_ns('Initialization'))
if initialization is not None: if initialization is not None:
ms_info['initialization_url'] = initialization.attrib['sourceURL'] ms_info['initialization_url'] = initialization.attrib['sourceURL']
ms_info['initialization_byte_range'] = parse_range(initialization.attrib.get('range'))
segment_list = element.find(_add_ns('SegmentList')) segment_list = element.find(_add_ns('SegmentList'))
if segment_list is not None: if segment_list is not None:
@ -2625,7 +2635,10 @@ class InfoExtractor:
extract_Initialization(segment_list) extract_Initialization(segment_list)
segment_urls_e = segment_list.findall(_add_ns('SegmentURL')) segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
if segment_urls_e: if segment_urls_e:
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e] ms_info['segments'] = [{
'url': segment.attrib['media'],
'byte_range': parse_range(segment.attrib.get('mediaRange')),
} for segment in segment_urls_e]
else: else:
segment_template = element.find(_add_ns('SegmentTemplate')) segment_template = element.find(_add_ns('SegmentTemplate'))
if segment_template is not None: if segment_template is not None:
@ -2780,7 +2793,7 @@ class InfoExtractor:
def location_key(location): def location_key(location):
return 'url' if re.match(r'^https?://', location) else 'path' return 'url' if re.match(r'^https?://', location) else 'path'
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: if 'segments' not in representation_ms_info and 'media' in representation_ms_info:
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
media_location_key = location_key(media_template) media_location_key = location_key(media_template)
@ -2832,38 +2845,63 @@ class InfoExtractor:
add_segment_url() add_segment_url()
segment_number += 1 segment_number += 1
segment_time += segment_d segment_time += segment_d
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info: elif 'segments' in representation_ms_info and 's' in representation_ms_info:
# No media template, # No media template,
# e.g. https://www.youtube.com/watch?v=iXZV5uAYMJI # e.g. https://www.youtube.com/watch?v=iXZV5uAYMJI
# or any YouTube dashsegments video # or any YouTube dashsegments video
fragments = [] fragments = []
fragment = None
segment_index = 0 segment_index = 0
timescale = representation_ms_info['timescale'] timescale = representation_ms_info['timescale']
for s in representation_ms_info['s']: for s in representation_ms_info['s']:
duration = float_or_none(s['d'], timescale) duration = float_or_none(s['d'], timescale)
for r in range(s.get('r', 0) + 1): for r in range(s.get('r', 0) + 1):
segment_uri = representation_ms_info['segment_urls'][segment_index] segment = representation_ms_info['segments'][segment_index]
fragments.append({ segment_url = segment['url']
location_key(segment_uri): segment_uri, fragment_location_key = location_key(segment_url)
'duration': duration, if (fragment is not None and fragment.get(fragment_location_key) == segment_url
}) and fragment['byte_range'] is not None and segment['byte_range'] is not None
and fragment['byte_range']['end'] == segment['byte_range']['start']
and ((fragment['duration'] is not None and duration is not None)
or (fragment['duration'] is None and duration is None))):
fragment['byte_range']['end'] = segment['byte_range']['end']
if duration:
fragment['duration'] += duration
else:
fragment = {
fragment_location_key: segment_url,
'byte_range': segment['byte_range'],
'duration': duration,
}
fragments.append(fragment)
segment_index += 1 segment_index += 1
representation_ms_info['fragments'] = fragments representation_ms_info['fragments'] = fragments
elif 'segment_urls' in representation_ms_info: elif 'segments' in representation_ms_info:
# Segment URLs with no SegmentTimeline # Segment URLs with no SegmentTimeline
# E.g. https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091 # E.g. https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
# https://github.com/ytdl-org/youtube-dl/pull/14844 # https://github.com/ytdl-org/youtube-dl/pull/14844
fragments = [] fragments = []
fragment = None
segment_duration = float_or_none( segment_duration = float_or_none(
representation_ms_info['segment_duration'], representation_ms_info['segment_duration'],
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
for segment_url in representation_ms_info['segment_urls']: for segment in representation_ms_info['segments']:
fragment = { segment_url = segment['url']
location_key(segment_url): segment_url, fragment_location_key = location_key(segment_url)
} if (fragment is not None and fragment.get(fragment_location_key) == segment_url
if segment_duration: and fragment['byte_range'] is not None and segment['byte_range'] is not None
fragment['duration'] = segment_duration and fragment['byte_range']['end'] == segment['byte_range']['start']):
fragments.append(fragment) fragment['byte_range']['end'] = segment['byte_range']['end']
if segment_duration:
fragment['duration'] += segment_duration
else:
fragment = {
fragment_location_key: segment_url,
'byte_range': segment['byte_range'],
}
if segment_duration:
fragment['duration'] = segment_duration
fragments.append(fragment)
representation_ms_info['fragments'] = fragments representation_ms_info['fragments'] = fragments
# If there is a fragments key available then we correctly recognized fragmented media. # If there is a fragments key available then we correctly recognized fragmented media.
# Otherwise we will assume unfragmented media with direct access. Technically, such # Otherwise we will assume unfragmented media with direct access. Technically, such
@ -2881,7 +2919,19 @@ class InfoExtractor:
initialization_url = representation_ms_info['initialization_url'] initialization_url = representation_ms_info['initialization_url']
if not f.get('url'): if not f.get('url'):
f['url'] = initialization_url f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url}) initialization_byte_range = representation_ms_info.get('initialization_byte_range')
fragments = representation_ms_info['fragments']
fragment = fragments[0] if len(fragments) > 0 else None
fragment_location_key = location_key(initialization_url)
if (fragment is not None and initialization_url == fragment.get(fragment_location_key)
and initialization_byte_range is not None and fragment['byte_range'] is not None
and initialization_byte_range['end'] == fragment['byte_range']['start']):
fragment['byte_range']['start'] = initialization_byte_range['start']
else:
f['fragments'].append({
fragment_location_key: initialization_url,
'byte_range': initialization_byte_range,
})
f['fragments'].extend(representation_ms_info['fragments']) f['fragments'].extend(representation_ms_info['fragments'])
if not period_duration: if not period_duration:
period_duration = try_get( period_duration = try_get(

Loading…
Cancel
Save