diff --git a/docs/modules/utils/examples/docker.ipynb b/docs/modules/utils/examples/docker.ipynb index e328dd39..50c57d71 100644 --- a/docs/modules/utils/examples/docker.ipynb +++ b/docs/modules/utils/examples/docker.ipynb @@ -1,77 +1,103 @@ { - "cells": [ - { - "cell_type": "code", - "metadata": { - "jukit_cell_id": "O4HPx3boF0" - }, - "source": [], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "metadata": { - "jukit_cell_id": "hqQkbPEwTJ" - }, - "source": [ - "# Using the DockerWrapper utility" - ] - }, - { - "cell_type": "code", - "metadata": { - "jukit_cell_id": "vCepuypaFH" - }, - "source": [ - "from langchain.utilities.docker import DockerWrapper, DockerSocket" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "metadata": { - "jukit_cell_id": "BtYVqy2YtO" - }, - "source": [ - "d = DockerWrapper()\n", - "query = \"\"\"\n", - "for i in $(seq 1 10)\n", - "do\n", - " echo $i\n", - "done\n", - "\"\"\"" - ], - "outputs": [], - "execution_count": null - }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "jukit_cell_id": "O4HPx3boF0" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "hqQkbPEwTJ" + }, + "source": [ + "# Using the DockerWrapper utility" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "jukit_cell_id": "vCepuypaFH" + }, + "outputs": [], + "source": [ + "from langchain.utilities.docker import DockerWrapper, DockerSocket" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "jukit_cell_id": "BtYVqy2YtO" + }, + "outputs": [], + "source": [ + "d = DockerWrapper()\n", + "query = \"\"\"\n", + "for i in $(seq 1 10)\n", + "do\n", + " echo $i\n", + "done\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "jukit_cell_id": "ELWWm03ptQ" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "jukit_cell_id": "ELWWm03ptQ" - }, - "source": [ - "print(d.exec_run(query, \"alpine\"))" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "[header] blocking IO\n{'stdout': '1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n10\\n'}\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n\n" - } - ], - "execution_count": 1 - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "python", - "language": "python", - "name": "python3" + "name": "stdout", + "output_type": "stream", + "text": [ + "[header] blocking IO\n", + "{'stdout': '1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n10\\n'}\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "10\n", + "\n" + ] } + ], + "source": [ + "print(d.exec_run(query, \"alpine\"))" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/langchain/utilities/docker.py b/langchain/utilities/docker/__init__.py similarity index 69% rename from langchain/utilities/docker.py rename to langchain/utilities/docker/__init__.py index 1b17023a..c10ef35c 100644 --- a/langchain/utilities/docker.py +++ b/langchain/utilities/docker/__init__.py @@ -1,6 +1,8 @@ """Wrapper for untrusted code exectuion on docker.""" # TODO: Validation: # - verify gVisor runtime (runsc) if available +# -TEST: spawned container: make sure it's ready ? before sending/reading commands +# - attach to running container # - pass arbitrary image names # - embed file payloads in the call to run (in LLMChain)? # - image selection helper @@ -8,39 +10,64 @@ import docker import struct -import time -import pandas as pd +import pandas as pd # type: ignore from docker.client import DockerClient # type: ignore -from docker.errors import APIError, ContainerError +from docker.errors import APIError, ContainerError # type: ignore +import logging from typing import Any, Dict from typing import Optional from pydantic import BaseModel, PrivateAttr, Extra, root_validator, validator +logger = logging.getLogger(__name__) + docker_images = { - "default": "alpine:{version}", - "python": "python:{version}", + 'default': 'alpine:{version}', + 'python': 'python:{version}', } SOCK_BUF_SIZE = 1024 +GVISOR_WARNING = """Warning: gVisor runtime not available for {docker_host}. + +Running untrusted code in a container without gVisor is not recommended. Docker +containers are not isolated. They can be abused to gain access to the host +system. To mitigate this risk, gVisor can be used to run the container in a +sandboxed environment. see: https://gvisor.dev/ for more info. +""" + +def gvisor_runtime_available(client: DockerClient) -> bool: + """Verify if gVisor runtime is available.""" + logger.debug("verifying availability of gVisor runtime...") + info = client.info() + if 'Runtimes' in info: + return 'runsc' in info['Runtimes'] + return False + +def _check_gvisor_runtime(): + client = docker.from_env() + docker_host = client.api.base_url + if not gvisor_runtime_available(docker.from_env()): + logger.warning(GVISOR_WARNING.format(docker_host=docker_host)) + +_check_gvisor_runtime() + class DockerSocket: """Wrapper around docker API's socket object. Can be used as a context manager.""" def __init__(self, socket): self.socket = socket - self.socket._sock.setblocking(False) + # self.socket._sock.setblocking(False) def __enter__(self): - print("context enter") return self def __exit__(self, exc_type, exc_value, traceback): - print("context exit") self.close() def close(self): + logger.debug("closing socket...") self.socket._sock.shutdown(2) # 2 = SHUT_RDWR self.socket._sock.close() self.socket.close() @@ -51,6 +78,15 @@ class DockerSocket: def recv(self): """Wrapper for socket.recv that does buffured read.""" + # NOTE: this is optional as a bonus + # TODO: Recv with TTY enabled + # + # When the TTY setting is enabled in POST /containers/create, the stream + # is not multiplexed. The data exchanged over the hijacked connection is + # simply the raw data from the process PTY and client's stdin. + + + # header := [8]byte{STREAM_TYPE, 0, 0, 0, SIZE1, SIZE2, SIZE3, SIZE4} # STREAM_TYPE can be: # @@ -70,7 +106,6 @@ class DockerSocket: # - Goto 1. chunks = [] - # strip the header # try: # self.socket._sock.recv(8) # except BlockingIOError as e: @@ -79,11 +114,15 @@ class DockerSocket: while True: header = b'' try: + # strip the header + # the first recv is blocking to wait for the container to start header = self.socket._sock.recv(8) except BlockingIOError: print("[header] blocking IO") break + self.socket._sock.setblocking(False) + if header == b'': break stream_type, size = struct.unpack("!BxxxI", header) @@ -112,7 +151,6 @@ class DockerSocket: - class DockerWrapper(BaseModel, extra=Extra.forbid): """Executes arbitrary payloads and returns the output.""" @@ -129,6 +167,7 @@ class DockerWrapper(BaseModel, extra=Extra.forbid): if self.from_env: self._docker_client = docker.from_env() + @property def client(self) -> DockerClient: """Docker client.""" @@ -142,7 +181,6 @@ class DockerWrapper(BaseModel, extra=Extra.forbid): @root_validator() def validate_all(cls, values: Dict) -> Dict: """Validate environment.""" - # print("root validator") return values def run(self, query: str, **kwargs: Any) -> str: @@ -154,7 +192,6 @@ class DockerWrapper(BaseModel, extra=Extra.forbid): """ try: image = kwargs.get("image", self.image) - del kwargs['image'] return self._docker_client.containers.run(image, query, remove=True, @@ -173,28 +210,36 @@ class DockerWrapper(BaseModel, extra=Extra.forbid): """Run arbitrary shell command inside a container. This is a lower level API that sends the input to the container's - stdin through a socket using Docker API. + stdin through a socket using Docker API. It effectively simulates a tty session. """ # it is necessary to open stdin to keep the container running after it's started # the attach_socket will hold the connection open until the container is stopped or # the socket is closed. + + # TODO!: use tty=True to be able to simulate a tty session. + + # NOTE: some images like python need to be launched with custom + # parameters to keep stdin open. For example python image needs to be + # started with the command `python3 -i` + + # TODO: handle both output mode for tty=True/False container = self._docker_client.containers.create(image, stdin_open=True) container.start() # input() # get underlying socket - _socket = container.attach_socket(params={'stdout': 1, 'stderr': 1, 'stdin': 1, 'stream': 1}) - output = None + _socket = container.attach_socket(params={'stdout': 1, 'stderr': 1, + 'stdin': 1, 'stream': 1}) - socket = DockerSocket(_socket) - socket.sendall(query.encode('utf-8')) - time.sleep(2) - #FIX: how to make sure that the container is done executing the command? - # input() + with DockerSocket(_socket) as socket: + # TEST: make sure the container is ready ? use a blocking call first + socket.sendall(query.encode('utf-8')) + + # read the output + output = None + output = socket.recv() + # print(output) - # read the output - output = socket.recv() - # print(output) container.kill() @@ -214,10 +259,3 @@ class DockerWrapper(BaseModel, extra=Extra.forbid): else: return payload['stdout'] - - -def _exec_run_stdin(input): - """Pipes the input data to a container. - - input should be an object with a `read` method that returns bytes. - """ diff --git a/langchain/utilities/docker/images.py b/langchain/utilities/docker/images.py new file mode 100644 index 00000000..eef0aa4e --- /dev/null +++ b/langchain/utilities/docker/images.py @@ -0,0 +1,57 @@ +"""Optimized parameters for commonly used docker images that can be used by +the docker wrapper utility to attach to.""" + +from enum import Enum +from typing import Optional, List +from pydantic import BaseModel, Extra, validator + + +class BaseImage(BaseModel, extra=Extra.forbid): + """Base docker image class.""" + tty: bool = False + stdin_open: bool = True + image: str + default_command: Optional[List[str]] = None + +class ShellTypes(str, Enum): + """Enum class for shell types.""" + bash = '/bin/bash' + sh = '/bin/sh' + zsh = '/bin/zsh' + + +class Shell(BaseImage): + """Shell image focused on running shell commands. + + A shell image can be crated by passing a shell alias such as `sh` or `bash` + or by passing the full path to the shell binary. + """ + image: str = 'alpine' + shell: str = ShellTypes.bash.value + + @validator('shell') + def validate_shell(cls, value: str) -> str: + """Validate shell type.""" + val = getattr(ShellTypes, value, None) + if val: + return val.value + # elif value in [v.value for v in list(ShellTypes.__members__.values())]: + # print(f"docker: overriding shell binary to: {value}") + return value + +# example using base image to construct python image +class Python(BaseImage): + """Python image class. + + The python image needs to be launced using the `python3 -i` command to keep + stdin open. + """ + image: str = 'python' + default_command: List[str] = ['python3', '-i'] + + def __setattr__(self, name, value): + if name == 'default_command': + raise AttributeError(f'running this image with {self.default_command}' + ' is necessary to keep stdin open.') + + super().__setattr__(name, value) diff --git a/tests/unit_tests/test_docker.py b/tests/unit_tests/test_docker.py index bd64a9ab..75ff8041 100644 --- a/tests/unit_tests/test_docker.py +++ b/tests/unit_tests/test_docker.py @@ -1,25 +1,48 @@ """Test the docker wrapper utility.""" import pytest -from langchain.utilities.docker import DockerWrapper +from langchain.utilities.docker import DockerWrapper, gvisor_runtime_available +from unittest.mock import MagicMock +import subprocess -def test_command_default_image() -> None: - """Test running a command with the default alpine image.""" - docker = DockerWrapper() - output = docker.run("cat /etc/os-release") - assert output.find(b"alpine") +def docker_installed() -> bool: + """Checks if docker is installed locally.""" + try: + subprocess.run(['which', 'docker',], check=True) + except subprocess.CalledProcessError: + return False -def test_inner_failing_command() -> None: - """Test inner command with non zero exit""" - docker = DockerWrapper() - output = docker.run("ls /inner-failing-command") - assert str(output).startswith("STDERR") + return True -def test_entrypoint_failure() -> None: - """Test inner command with non zero exit""" - docker = DockerWrapper() - output = docker.run("todo handle APIError") +@pytest.mark.skipif(not docker_installed(), reason="docker not installed") +class TestDockerUtility: + + def test_command_default_image(self) -> None: + """Test running a command with the default alpine image.""" + docker = DockerWrapper() + output = docker.run('cat /etc/os-release') + assert output.find(b'alpine') + + def test_inner_failing_command(self) -> None: + """Test inner command with non zero exit""" + docker = DockerWrapper() + output = docker.run('ls /inner-failing-command') + assert str(output).startswith("STDERR") + + def test_entrypoint_failure(self) -> None: + """Test inner command with non zero exit""" + docker = DockerWrapper() + output = docker.run('todo handle APIError') + assert output == 'ERROR' + + def test_check_gvisor_runtime(self) -> None: + """test gVisor runtime verification using a mock docker client""" + mock_client = MagicMock() + mock_client.info.return_value = {'Runtimes': {'runsc': {'path': 'runsc'}}} + assert gvisor_runtime_available(mock_client) + mock_client.info.return_value = {'Runtimes': {'runc': {'path': 'runc'}}} + assert not gvisor_runtime_available(mock_client)