Add security notices on PAL and CPAL experimental chains. (#9938)

Clearly document that the PAL and CPAL techniques involve generating code, and that such code must be properly sandboxed and given appropriate narrowly-scoped credentials in order to ensure security. While our implementations include some mitigations, Python and SQL sandboxing is well-known to be a very hard problem and our mitigations are no replacement for proper sandboxing and permissions management. The implementation of such techniques must be performed outside the scope of the Python process where this package's code runs, so its correct setup and administration must therefore be the responsibility of the user of this code.
1 year ago · b5cd1e0fed
parent f5faac8859
commit b5cd1e0fed
2 changed files with 42 additions and 2 deletions
--- a/libs/experimental/langchain_experimental/cpal/base.py
+++ b/libs/experimental/langchain_experimental/cpal/base.py
@ -131,13 +131,34 @@ class InterventionChain(_BaseStoryElementChain):


 class QueryChain(_BaseStoryElementChain):
-    """Query the outcome table using SQL."""
+    """Query the outcome table using SQL.
+
+    *Security note*: This class implements an AI technique that generates SQL code.
+        If those SQL commands are executed, it's critical to ensure they use credentials
+        that are narrowly-scoped to only include the permissions this chain needs.
+        Failure to do so may result in data corruption or loss, since this chain may
+        attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this chain.
+    """

    pydantic_model: ClassVar[Type[pydantic.BaseModel]] = QueryModel
    template: ClassVar[str] = query_template  # TODO: incl. table schema


 class CPALChain(_BaseStoryElementChain):
+    """Causal program-aided language (CPAL) chain implementation.
+
+    *Security note*: The building blocks of this class include the implementation
+        of an AI technique that generates SQL code. If those SQL commands
+        are executed, it's critical to ensure they use credentials that
+        are narrowly-scoped to only include the permissions this chain needs.
+        Failure to do so may result in data corruption or loss, since this chain may
+        attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this chain.
+    """
+
    llm: BaseLanguageModel
    narrative_chain: Optional[NarrativeChain] = None
    causal_chain: Optional[CausalChain] = None
@ -151,7 +172,17 @@ class CPALChain(_BaseStoryElementChain):
        llm: BaseLanguageModel,
        **kwargs: Any,
    ) -> CPALChain:
-        """instantiation depends on component chains"""
+        """instantiation depends on component chains
+
+        *Security note*: The building blocks of this class include the implementation
+            of an AI technique that generates SQL code. If those SQL commands
+            are executed, it's critical to ensure they use credentials that
+            are narrowly-scoped to only include the permissions this chain needs.
+            Failure to do so may result in data corruption or loss, since this chain may
+            attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
+            The best way to guard against such negative outcomes is to (as appropriate)
+            limit the permissions granted to the credentials used with this chain.
+        """
        return cls(
            llm=llm,
            chain=LLMChain(
--- a/libs/experimental/langchain_experimental/pal_chain/base.py
+++ b/libs/experimental/langchain_experimental/pal_chain/base.py
@ -90,6 +90,15 @@ class PALChain(Chain):
    This class implements the Program-Aided Language Models (PAL) for generating code
    solutions. PAL is a technique described in the paper "Program-Aided Language Models"
    (https://arxiv.org/pdf/2211.10435.pdf).
+
+    *Security note*: This class implements an AI technique that generates and evaluates
+        Python code, which can be dangerous and requires a specially sandboxed
+        environment to be safely used. While this class implements some basic guardrails
+        by limiting available locals/globals and by parsing and inspecting
+        the generated Python AST using `PALValidation`, those guardrails will not
+        deter sophisticated attackers and are not a replacement for a proper sandbox.
+        Do not use this class on untrusted inputs, with elevated permissions,
+        or without consulting your security team about proper sandboxing!
    """

    llm_chain: LLMChain