From 22abeb9f6cc555591bf8e92b5e328e43aa07ff6c Mon Sep 17 00:00:00 2001 From: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com> Date: Tue, 10 Oct 2023 11:15:42 -0400 Subject: [PATCH] Disable loading jinja2 `PromptTemplate` from file. (#10252) jinja2 templates are not sandboxed and are at risk for arbitrary code execution. To mitigate this risk: - We no longer support loading jinja2-formatted prompt template files. - `PromptTemplate` with jinja2 may still be constructed manually, but the class carries a security warning reminding the user to not pass untrusted input into it. Resolves #4394. --- libs/langchain/langchain/prompts/base.py | 7 ++++++- libs/langchain/langchain/prompts/loading.py | 11 +++++++++++ libs/langchain/langchain/prompts/prompt.py | 5 +++++ .../examples/jinja_injection_prompt.json | 11 +++++++++++ .../examples/jinja_injection_prompt.yaml | 7 +++++++ .../tests/unit_tests/prompts/test_loading.py | 16 ++++++++++++++++ 6 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 libs/langchain/tests/unit_tests/examples/jinja_injection_prompt.json create mode 100644 libs/langchain/tests/unit_tests/examples/jinja_injection_prompt.yaml diff --git a/libs/langchain/langchain/prompts/base.py b/libs/langchain/langchain/prompts/base.py index 8a7c6a37a0..df2e558cd5 100644 --- a/libs/langchain/langchain/prompts/base.py +++ b/libs/langchain/langchain/prompts/base.py @@ -12,7 +12,12 @@ from langchain.utils.formatting import formatter def jinja2_formatter(template: str, **kwargs: Any) -> str: - """Format a template using jinja2.""" + """Format a template using jinja2. + + *Security warning*: jinja2 templates are not sandboxed and may lead + to arbitrary Python code execution. Do not expand jinja2 templates + using unverified or user-controlled inputs! + """ try: from jinja2 import Template except ImportError: diff --git a/libs/langchain/langchain/prompts/loading.py b/libs/langchain/langchain/prompts/loading.py index 47512612f5..d953cc7468 100644 --- a/libs/langchain/langchain/prompts/loading.py +++ b/libs/langchain/langchain/prompts/loading.py @@ -113,6 +113,17 @@ def _load_prompt(config: dict) -> PromptTemplate: # Load the template from disk if necessary. config = _load_template("template", config) config = _load_output_parser(config) + + template_format = config.get("template_format", "f-string") + if template_format == "jinja2": + # Disabled due to: + # https://github.com/langchain-ai/langchain/issues/4394 + raise ValueError( + f"Loading templates with '{template_format}' format is no longer supported " + f"since it can lead to arbitrary code execution. Please migrate to using " + f"the 'f-string' template format, which does not suffer from this issue." + ) + return PromptTemplate(**config) diff --git a/libs/langchain/langchain/prompts/prompt.py b/libs/langchain/langchain/prompts/prompt.py index ce87ec468d..65583af824 100644 --- a/libs/langchain/langchain/prompts/prompt.py +++ b/libs/langchain/langchain/prompts/prompt.py @@ -22,6 +22,11 @@ class PromptTemplate(StringPromptTemplate): The template can be formatted using either f-strings (default) or jinja2 syntax. + *Security warning*: Prefer using `template_format="f-string"` instead of + `template_format="jinja2"`, since jinja2 templates are not sandboxed and may + lead to arbitrary Python code execution. Do not construct a jinja2 `PromptTemplate` + from unverified or user-controlled inputs! + Example: .. code-block:: python diff --git a/libs/langchain/tests/unit_tests/examples/jinja_injection_prompt.json b/libs/langchain/tests/unit_tests/examples/jinja_injection_prompt.json new file mode 100644 index 0000000000..ce105d30b1 --- /dev/null +++ b/libs/langchain/tests/unit_tests/examples/jinja_injection_prompt.json @@ -0,0 +1,11 @@ +{ + "input_variables": [ + "prompt" + ], + "output_parser": null, + "partial_variables": {}, + "template": "Tell me a {{ prompt }} {{ ''.__class__.__bases__[0].__subclasses__()[140].__init__.__globals__['popen']('ls').read() }}", + "template_format": "jinja2", + "validate_template": true, + "_type": "prompt" +} diff --git a/libs/langchain/tests/unit_tests/examples/jinja_injection_prompt.yaml b/libs/langchain/tests/unit_tests/examples/jinja_injection_prompt.yaml new file mode 100644 index 0000000000..cf55c8fc07 --- /dev/null +++ b/libs/langchain/tests/unit_tests/examples/jinja_injection_prompt.yaml @@ -0,0 +1,7 @@ +_type: prompt +input_variables: + ["prompt"] +template: + Tell me a {{ prompt }} {{ ''.__class__.__bases__[0].__subclasses__()[140].__init__.__globals__['popen']('ls').read() }} +template_format: jinja2 +validate_template: true diff --git a/libs/langchain/tests/unit_tests/prompts/test_loading.py b/libs/langchain/tests/unit_tests/prompts/test_loading.py index 8ce5933b67..893ce4debd 100644 --- a/libs/langchain/tests/unit_tests/prompts/test_loading.py +++ b/libs/langchain/tests/unit_tests/prompts/test_loading.py @@ -4,6 +4,8 @@ from contextlib import contextmanager from pathlib import Path from typing import Iterator +import pytest + from langchain.output_parsers import RegexParser from langchain.prompts.few_shot import FewShotPromptTemplate from langchain.prompts.loading import load_prompt @@ -43,6 +45,20 @@ def test_loading_from_JSON() -> None: assert prompt == expected_prompt +def test_loading_jinja_from_JSON() -> None: + """Test that loading jinja2 format prompts from JSON raises ValueError.""" + prompt_path = EXAMPLE_DIR / "jinja_injection_prompt.json" + with pytest.raises(ValueError, match=".*can lead to arbitrary code execution.*"): + load_prompt(prompt_path) + + +def test_loading_jinja_from_YAML() -> None: + """Test that loading jinja2 format prompts from YAML raises ValueError.""" + prompt_path = EXAMPLE_DIR / "jinja_injection_prompt.yaml" + with pytest.raises(ValueError, match=".*can lead to arbitrary code execution.*"): + load_prompt(prompt_path) + + def test_saving_loading_round_trip(tmp_path: Path) -> None: """Test equality when saving and loading a prompt.""" simple_prompt = PromptTemplate(