diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 52ddf0613..046060cb2 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -142,5 +142,10 @@ "when": "e3a3ed8a981d9395c4859b6ef56cd02bc3148db2", "short": "[cleanup:ie] No `from` stdlib imports in extractors", "authors": ["pukkandan"] + }, + { + "action": "add", + "when": "9590cc6b4768e190183d7d071a6c78170889116a", + "short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly." } ] diff --git a/test/test_utils.py b/test/test_utils.py index 71febeefd..ddf0a7c24 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -2069,6 +2069,10 @@ Line 1 # Test escaping assert run_shell(['echo', 'test"&']) == '"test""&"\n' + assert run_shell(['echo', '%CMDCMDLINE:~-1%&']) == '"%CMDCMDLINE:~-1%&"\n' + assert run_shell(['echo', 'a\nb']) == '"a"\n"b"\n' + assert run_shell(['echo', '"']) == '""""\n' + assert run_shell(['echo', '\\']) == '\\\n' # Test if delayed expansion is disabled assert run_shell(['echo', '^!']) == '"^!"\n' assert run_shell('echo "^!"') == '"^!"\n' diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 35aba968f..9f730d038 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -25,7 +25,7 @@ import unicodedata from .cache import Cache from .compat import functools, urllib # isort: split -from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req +from .compat import compat_os_name, urllib_req_to_req from .cookies import LenientSimpleCookie, load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader.rtmp import rtmpdump_version @@ -102,7 +102,6 @@ from .utils import ( UserNotLive, YoutubeDLError, age_restricted, - args_to_str, bug_reports_message, date_from_str, deprecation_warning, @@ -141,6 +140,7 @@ from .utils import ( sanitize_filename, sanitize_path, sanitize_url, + shell_quote, str_or_none, strftime_or_none, subtitles_filename, @@ -823,7 +823,7 @@ class YoutubeDL: self.report_warning( 'Long argument string detected. ' 'Use -- to separate parameters and URLs, like this:\n%s' % - args_to_str(correct_argv)) + shell_quote(correct_argv)) def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -1355,7 +1355,7 @@ class YoutubeDL: value, fmt = escapeHTML(str(value)), str_fmt elif fmt[-1] == 'q': # quoted value = map(str, variadic(value) if '#' in flags else [value]) - value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt + value, fmt = shell_quote(value, shell=True), str_fmt elif fmt[-1] == 'B': # bytes value = f'%{str_fmt}'.encode() % str(value).encode() value, fmt = value.decode('utf-8', 'ignore'), 's' diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index 5ad5c70ec..d820adaf1 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -27,12 +27,9 @@ def compat_etree_fromstring(text): compat_os_name = os._name if os.name == 'java' else os.name -if compat_os_name == 'nt': - def compat_shlex_quote(s): - import re - return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""') -else: - from shlex import quote as compat_shlex_quote # noqa: F401 +def compat_shlex_quote(s): + from ..utils import shell_quote + return shell_quote(s) def compat_ord(c): diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index dec514674..e3e80f3d3 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -50,7 +50,6 @@ from ..compat import ( compat_expanduser, compat_HTMLParseError, compat_os_name, - compat_shlex_quote, ) from ..dependencies import xattr @@ -836,9 +835,11 @@ class Popen(subprocess.Popen): if shell and compat_os_name == 'nt' and kwargs.get('executable') is None: if not isinstance(args, str): - args = ' '.join(compat_shlex_quote(a) for a in args) + args = shell_quote(args, shell=True) shell = False - args = f'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"' + # Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`) + env['='] = '"^\n\n"' + args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"' super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo) @@ -1637,15 +1638,38 @@ def get_filesystem_encoding(): return encoding if encoding is not None else 'utf-8' -def shell_quote(args): - quoted_args = [] - encoding = get_filesystem_encoding() - for a in args: - if isinstance(a, bytes): - # We may get a filename encoded with 'encodeFilename' - a = a.decode(encoding) - quoted_args.append(compat_shlex_quote(a)) - return ' '.join(quoted_args) +_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'}) +_CMD_QUOTE_TRANS = str.maketrans({ + # Keep quotes balanced by replacing them with `""` instead of `\\"` + '"': '""', + # Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`) + # `=` should be unique since variables containing `=` cannot be set using cmd + '\n': '%=%', + # While we are only required to escape backslashes immediately before quotes, + # we instead escape all of 'em anyways to be consistent + '\\': '\\\\', + # Use zero length variable replacement so `%` doesn't get expanded + # `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`) + '%': '%%cd:~,%', +}) + + +def shell_quote(args, *, shell=False): + args = list(variadic(args)) + if any(isinstance(item, bytes) for item in args): + deprecation_warning('Passing bytes to utils.shell_quote is deprecated') + encoding = get_filesystem_encoding() + for index, item in enumerate(args): + if isinstance(item, bytes): + args[index] = item.decode(encoding) + + if compat_os_name != 'nt': + return shlex.join(args) + + trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS + return ' '.join( + s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""') + for s in args) def smuggle_url(url, data): @@ -2849,7 +2873,7 @@ def ytdl_is_updateable(): def args_to_str(args): # Get a short string representation for a subprocess command - return ' '.join(compat_shlex_quote(a) for a in args) + return shell_quote(args) def error_to_str(err):