From 86e3b82261e8ebc6c6707c09544c9dfb8907c0fd Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Mon, 1 Apr 2024 01:17:24 +0200 Subject: [PATCH] [core] Fix `filesize_approx` calculation (#9560) Reverts 22e4dfacb61f62dfbb3eb41b31c7b69ba1059b80 Despite being documented as `Kbit/s`, the extractors/manifests were returning bitrates in SI units of kilobits/sec. Authored by: seproDev, pukkandan --- README.md | 16 ++++++++-------- devscripts/changelog_override.json | 4 ++++ yt_dlp/YoutubeDL.py | 8 ++++---- yt_dlp/extractor/common.py | 6 +++--- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/utils/_utils.py | 11 +++++++++++ 6 files changed, 31 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index d0683a34a..014bf262e 100644 --- a/README.md +++ b/README.md @@ -1472,9 +1472,9 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `width`: Width of the video, if known - `height`: Height of the video, if known - `aspect_ratio`: Aspect ratio of the video, if known - - `tbr`: Average bitrate of audio and video in KBit/s - - `abr`: Average audio bitrate in KBit/s - - `vbr`: Average video bitrate in KBit/s + - `tbr`: Average bitrate of audio and video in [kbps](## "1000 bits/sec") + - `abr`: Average audio bitrate in [kbps](## "1000 bits/sec") + - `vbr`: Average video bitrate in [kbps](## "1000 bits/sec") - `asr`: Audio sampling rate in Hertz - `fps`: Frame rate - `audio_channels`: The number of audio channels @@ -1499,7 +1499,7 @@ Any string comparison may be prefixed with negation `!` in order to produce an o **Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. -Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. +Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480. @@ -1531,10 +1531,10 @@ The available fields are: - `fps`: Framerate of video - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`) - `channels`: The number of audio channels - - `tbr`: Total average bitrate in KBit/s - - `vbr`: Average video bitrate in KBit/s - - `abr`: Average audio bitrate in KBit/s - - `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr` + - `tbr`: Total average bitrate in [kbps](## "1000 bits/sec") + - `vbr`: Average video bitrate in [kbps](## "1000 bits/sec") + - `abr`: Average audio bitrate in [kbps](## "1000 bits/sec") + - `br`: Average bitrate in [kbps](## "1000 bits/sec"), `tbr`/`vbr`/`abr` - `asr`: Audio sample rate in Hz **Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names. diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 2a34ad071..eaa348cf2 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -126,5 +126,9 @@ "when": "4ce57d3b873c2887814cbec03d029533e82f7db5", "short": "[ie] Support multi-period MPD streams (#6654)", "authors": ["alard", "pukkandan"] + }, + { + "action": "remove", + "when": "22e4dfacb61f62dfbb3eb41b31c7b69ba1059b80" } ] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 563667600..e83108619 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -146,6 +146,7 @@ from .utils import ( subtitles_filename, supports_terminal_sequences, system_identifier, + filesize_from_tbr, timetuple_from_msec, to_high_limit_path, traverse_obj, @@ -2826,9 +2827,8 @@ class YoutubeDL: format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) # For fragmented formats, "tbr" is often max bitrate and not average if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url')) - and info_dict.get('duration') and format.get('tbr') and not format.get('filesize') and not format.get('filesize_approx')): - format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) + format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration')) format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True) # Safeguard against old/insecure infojson when using --load-info-json @@ -3878,8 +3878,8 @@ class YoutubeDL: delim, ( format_field(f, 'filesize', ' \t%s', func=format_bytes) or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes) - or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))), - None, self._format_out('~\t%s', self.Styles.SUPPRESS))), + or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None, + self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)), format_field(f, 'tbr', '\t%dk', func=round), shorten_protocol_name(f.get('protocol', '')), delim, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index bd318a7f4..57bbf9bdf 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -171,12 +171,12 @@ class InfoExtractor: Automatically calculated from width and height * dynamic_range The dynamic range of the video. One of: "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV" - * tbr Average bitrate of audio and video in KBit/s - * abr Average audio bitrate in KBit/s + * tbr Average bitrate of audio and video in kbps (1000 bits/sec) + * abr Average audio bitrate in kbps (1000 bits/sec) * acodec Name of the audio codec in use * asr Audio sampling rate in Hertz * audio_channels Number of audio channels - * vbr Average video bitrate in KBit/s + * vbr Average video bitrate in kbps (1000 bits/sec) * fps Frame rate * vcodec Name of the video codec in use * container Name of the container format diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 31733aefb..b41191b7f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3834,7 +3834,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_id=video_id, only_once=True) throttled = True - tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1024) + tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) language_preference = ( 10 if audio_track.get('audioIsDefault') and 10 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 9efeb6a1c..648cf0abd 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -5415,6 +5415,17 @@ class FormatSorter: return tuple(self._calculate_field_preference(format, field) for field in self._order) +def filesize_from_tbr(tbr, duration): + """ + @param tbr: Total bitrate in kbps (1000 bits/sec) + @param duration: Duration in seconds + @returns Filesize in bytes + """ + if tbr is None or duration is None: + return None + return int(duration * tbr * (1000 / 8)) + + # XXX: Temporary class _YDLLogger: def __init__(self, ydl=None):