docker: attach to container's stdin

- wip image helper for optimized params with common images
- gVisor runtime checker
- make tests skipped if docker installed
blob42 1 year ago
parent 3d42daa474
commit da2087c754

@ -1,77 +1,103 @@
{
"cells": [
{
"cell_type": "code",
"metadata": {
"jukit_cell_id": "O4HPx3boF0"
},
"source": [],
"outputs": [],
"execution_count": null
},
{
"cell_type": "markdown",
"metadata": {
"jukit_cell_id": "hqQkbPEwTJ"
},
"source": [
"# Using the DockerWrapper utility"
]
},
{
"cell_type": "code",
"metadata": {
"jukit_cell_id": "vCepuypaFH"
},
"source": [
"from langchain.utilities.docker import DockerWrapper, DockerSocket"
],
"outputs": [],
"execution_count": null
},
{
"cell_type": "code",
"metadata": {
"jukit_cell_id": "BtYVqy2YtO"
},
"source": [
"d = DockerWrapper()\n",
"query = \"\"\"\n",
"for i in $(seq 1 10)\n",
"do\n",
" echo $i\n",
"done\n",
"\"\"\""
],
"outputs": [],
"execution_count": null
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jukit_cell_id": "O4HPx3boF0"
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"jukit_cell_id": "hqQkbPEwTJ"
},
"source": [
"# Using the DockerWrapper utility"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jukit_cell_id": "vCepuypaFH"
},
"outputs": [],
"source": [
"from langchain.utilities.docker import DockerWrapper, DockerSocket"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jukit_cell_id": "BtYVqy2YtO"
},
"outputs": [],
"source": [
"d = DockerWrapper()\n",
"query = \"\"\"\n",
"for i in $(seq 1 10)\n",
"do\n",
" echo $i\n",
"done\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"jukit_cell_id": "ELWWm03ptQ"
},
"outputs": [
{
"cell_type": "code",
"metadata": {
"jukit_cell_id": "ELWWm03ptQ"
},
"source": [
"print(d.exec_run(query, \"alpine\"))"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "[header] blocking IO\n{'stdout': '1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n10\\n'}\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n\n"
}
],
"execution_count": 1
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "python",
"language": "python",
"name": "python3"
"name": "stdout",
"output_type": "stream",
"text": [
"[header] blocking IO\n",
"{'stdout': '1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n10\\n'}\n",
"1\n",
"2\n",
"3\n",
"4\n",
"5\n",
"6\n",
"7\n",
"8\n",
"9\n",
"10\n",
"\n"
]
}
],
"source": [
"print(d.exec_run(query, \"alpine\"))"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"nbformat": 4,
"nbformat_minor": 4
}
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.11"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -1,6 +1,8 @@
"""Wrapper for untrusted code exectuion on docker."""
# TODO: Validation:
# - verify gVisor runtime (runsc) if available
# -TEST: spawned container: make sure it's ready ? before sending/reading commands
# - attach to running container
# - pass arbitrary image names
# - embed file payloads in the call to run (in LLMChain)?
# - image selection helper
@ -8,39 +10,64 @@
import docker
import struct
import time
import pandas as pd
import pandas as pd # type: ignore
from docker.client import DockerClient # type: ignore
from docker.errors import APIError, ContainerError
from docker.errors import APIError, ContainerError # type: ignore
import logging
from typing import Any, Dict
from typing import Optional
from pydantic import BaseModel, PrivateAttr, Extra, root_validator, validator
logger = logging.getLogger(__name__)
docker_images = {
"default": "alpine:{version}",
"python": "python:{version}",
'default': 'alpine:{version}',
'python': 'python:{version}',
}
SOCK_BUF_SIZE = 1024
GVISOR_WARNING = """Warning: gVisor runtime not available for {docker_host}.
Running untrusted code in a container without gVisor is not recommended. Docker
containers are not isolated. They can be abused to gain access to the host
system. To mitigate this risk, gVisor can be used to run the container in a
sandboxed environment. see: https://gvisor.dev/ for more info.
"""
def gvisor_runtime_available(client: DockerClient) -> bool:
"""Verify if gVisor runtime is available."""
logger.debug("verifying availability of gVisor runtime...")
info = client.info()
if 'Runtimes' in info:
return 'runsc' in info['Runtimes']
return False
def _check_gvisor_runtime():
client = docker.from_env()
docker_host = client.api.base_url
if not gvisor_runtime_available(docker.from_env()):
logger.warning(GVISOR_WARNING.format(docker_host=docker_host))
_check_gvisor_runtime()
class DockerSocket:
"""Wrapper around docker API's socket object. Can be used as a context manager."""
def __init__(self, socket):
self.socket = socket
self.socket._sock.setblocking(False)
# self.socket._sock.setblocking(False)
def __enter__(self):
print("context enter")
return self
def __exit__(self, exc_type, exc_value, traceback):
print("context exit")
self.close()
def close(self):
logger.debug("closing socket...")
self.socket._sock.shutdown(2) # 2 = SHUT_RDWR
self.socket._sock.close()
self.socket.close()
@ -51,6 +78,15 @@ class DockerSocket:
def recv(self):
"""Wrapper for socket.recv that does buffured read."""
# NOTE: this is optional as a bonus
# TODO: Recv with TTY enabled
#
# When the TTY setting is enabled in POST /containers/create, the stream
# is not multiplexed. The data exchanged over the hijacked connection is
# simply the raw data from the process PTY and client's stdin.
# header := [8]byte{STREAM_TYPE, 0, 0, 0, SIZE1, SIZE2, SIZE3, SIZE4}
# STREAM_TYPE can be:
#
@ -70,7 +106,6 @@ class DockerSocket:
# - Goto 1.
chunks = []
# strip the header
# try:
# self.socket._sock.recv(8)
# except BlockingIOError as e:
@ -79,11 +114,15 @@ class DockerSocket:
while True:
header = b''
try:
# strip the header
# the first recv is blocking to wait for the container to start
header = self.socket._sock.recv(8)
except BlockingIOError:
print("[header] blocking IO")
break
self.socket._sock.setblocking(False)
if header == b'':
break
stream_type, size = struct.unpack("!BxxxI", header)
@ -112,7 +151,6 @@ class DockerSocket:
class DockerWrapper(BaseModel, extra=Extra.forbid):
"""Executes arbitrary payloads and returns the output."""
@ -129,6 +167,7 @@ class DockerWrapper(BaseModel, extra=Extra.forbid):
if self.from_env:
self._docker_client = docker.from_env()
@property
def client(self) -> DockerClient:
"""Docker client."""
@ -142,7 +181,6 @@ class DockerWrapper(BaseModel, extra=Extra.forbid):
@root_validator()
def validate_all(cls, values: Dict) -> Dict:
"""Validate environment."""
# print("root validator")
return values
def run(self, query: str, **kwargs: Any) -> str:
@ -154,7 +192,6 @@ class DockerWrapper(BaseModel, extra=Extra.forbid):
"""
try:
image = kwargs.get("image", self.image)
del kwargs['image']
return self._docker_client.containers.run(image,
query,
remove=True,
@ -173,28 +210,36 @@ class DockerWrapper(BaseModel, extra=Extra.forbid):
"""Run arbitrary shell command inside a container.
This is a lower level API that sends the input to the container's
stdin through a socket using Docker API.
stdin through a socket using Docker API. It effectively simulates a tty session.
"""
# it is necessary to open stdin to keep the container running after it's started
# the attach_socket will hold the connection open until the container is stopped or
# the socket is closed.
# TODO!: use tty=True to be able to simulate a tty session.
# NOTE: some images like python need to be launched with custom
# parameters to keep stdin open. For example python image needs to be
# started with the command `python3 -i`
# TODO: handle both output mode for tty=True/False
container = self._docker_client.containers.create(image, stdin_open=True)
container.start()
# input()
# get underlying socket
_socket = container.attach_socket(params={'stdout': 1, 'stderr': 1, 'stdin': 1, 'stream': 1})
output = None
_socket = container.attach_socket(params={'stdout': 1, 'stderr': 1,
'stdin': 1, 'stream': 1})
socket = DockerSocket(_socket)
socket.sendall(query.encode('utf-8'))
time.sleep(2)
#FIX: how to make sure that the container is done executing the command?
# input()
with DockerSocket(_socket) as socket:
# TEST: make sure the container is ready ? use a blocking call first
socket.sendall(query.encode('utf-8'))
# read the output
output = None
output = socket.recv()
# print(output)
# read the output
output = socket.recv()
# print(output)
container.kill()
@ -214,10 +259,3 @@ class DockerWrapper(BaseModel, extra=Extra.forbid):
else:
return payload['stdout']
def _exec_run_stdin(input):
"""Pipes the input data to a container.
input should be an object with a `read` method that returns bytes.
"""

@ -0,0 +1,57 @@
"""Optimized parameters for commonly used docker images that can be used by
the docker wrapper utility to attach to."""
from enum import Enum
from typing import Optional, List
from pydantic import BaseModel, Extra, validator
class BaseImage(BaseModel, extra=Extra.forbid):
"""Base docker image class."""
tty: bool = False
stdin_open: bool = True
image: str
default_command: Optional[List[str]] = None
class ShellTypes(str, Enum):
"""Enum class for shell types."""
bash = '/bin/bash'
sh = '/bin/sh'
zsh = '/bin/zsh'
class Shell(BaseImage):
"""Shell image focused on running shell commands.
A shell image can be crated by passing a shell alias such as `sh` or `bash`
or by passing the full path to the shell binary.
"""
image: str = 'alpine'
shell: str = ShellTypes.bash.value
@validator('shell')
def validate_shell(cls, value: str) -> str:
"""Validate shell type."""
val = getattr(ShellTypes, value, None)
if val:
return val.value
# elif value in [v.value for v in list(ShellTypes.__members__.values())]:
# print(f"docker: overriding shell binary to: {value}")
return value
# example using base image to construct python image
class Python(BaseImage):
"""Python image class.
The python image needs to be launced using the `python3 -i` command to keep
stdin open.
"""
image: str = 'python'
default_command: List[str] = ['python3', '-i']
def __setattr__(self, name, value):
if name == 'default_command':
raise AttributeError(f'running this image with {self.default_command}'
' is necessary to keep stdin open.')
super().__setattr__(name, value)

@ -1,25 +1,48 @@
"""Test the docker wrapper utility."""
import pytest
from langchain.utilities.docker import DockerWrapper
from langchain.utilities.docker import DockerWrapper, gvisor_runtime_available
from unittest.mock import MagicMock
import subprocess
def test_command_default_image() -> None:
"""Test running a command with the default alpine image."""
docker = DockerWrapper()
output = docker.run("cat /etc/os-release")
assert output.find(b"alpine")
def docker_installed() -> bool:
"""Checks if docker is installed locally."""
try:
subprocess.run(['which', 'docker',], check=True)
except subprocess.CalledProcessError:
return False
def test_inner_failing_command() -> None:
"""Test inner command with non zero exit"""
docker = DockerWrapper()
output = docker.run("ls /inner-failing-command")
assert str(output).startswith("STDERR")
return True
def test_entrypoint_failure() -> None:
"""Test inner command with non zero exit"""
docker = DockerWrapper()
output = docker.run("todo handle APIError")
@pytest.mark.skipif(not docker_installed(), reason="docker not installed")
class TestDockerUtility:
def test_command_default_image(self) -> None:
"""Test running a command with the default alpine image."""
docker = DockerWrapper()
output = docker.run('cat /etc/os-release')
assert output.find(b'alpine')
def test_inner_failing_command(self) -> None:
"""Test inner command with non zero exit"""
docker = DockerWrapper()
output = docker.run('ls /inner-failing-command')
assert str(output).startswith("STDERR")
def test_entrypoint_failure(self) -> None:
"""Test inner command with non zero exit"""
docker = DockerWrapper()
output = docker.run('todo handle APIError')
assert output == 'ERROR'
def test_check_gvisor_runtime(self) -> None:
"""test gVisor runtime verification using a mock docker client"""
mock_client = MagicMock()
mock_client.info.return_value = {'Runtimes': {'runsc': {'path': 'runsc'}}}
assert gvisor_runtime_available(mock_client)
mock_client.info.return_value = {'Runtimes': {'runc': {'path': 'runc'}}}
assert not gvisor_runtime_available(mock_client)

Loading…
Cancel
Save