From c30ef7dbc48a7fe58b6e43d76696148c3d46aa9b Mon Sep 17 00:00:00 2001 From: blob42 Date: Fri, 3 Mar 2023 21:59:22 +0100 Subject: [PATCH] drop network capabilities by default, example on using networking --- docs/modules/utils/examples/docker.ipynb | 298 +++++++++++++---------- langchain/utilities/docker/__init__.py | 74 +++++- langchain/utilities/docker/images.py | 1 + 3 files changed, 248 insertions(+), 125 deletions(-) diff --git a/docs/modules/utils/examples/docker.ipynb b/docs/modules/utils/examples/docker.ipynb index 168cb16b..fd273d2b 100644 --- a/docs/modules/utils/examples/docker.ipynb +++ b/docs/modules/utils/examples/docker.ipynb @@ -1,128 +1,180 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "jukit_cell_id": "hqQkbPEwTJ" - }, - "source": [ - "# Using the DockerWrapper utility" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jukit_cell_id": "vCepuypaFH" - }, - "outputs": [], - "source": [ - "from langchain.utilities.docker import DockerWrapper" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jukit_cell_id": "BtYVqy2YtO" - }, - "outputs": [], - "source": [ - "d = DockerWrapper(image='shell')" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "jukit_cell_id": "ELWWm03ptQ" - }, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "1\n", - "2\n", - "3\n", - "4\n", - "5\n", - "6\n", - "7\n", - "8\n", - "9\n", - "10\n" - ] - } - ], - "source": [ - "query = \"\"\"\n", - "for i in $(seq 1 10)\n", - "do\n", - " echo $i\n", - "done\n", - "\"\"\"\n", - "print(d.exec_run(query))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jukit_cell_id": "lGMqLz5sDo" - }, - "outputs": [], - "source": [ - "p = DockerWrapper(image='python')\n", - "\n", - "py_payload = \"\"\"\n", - "def hello_world():\n", - " return 'hello world'\n", - "\n", - "hello_world()\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "jukit_cell_id": "X04Wd6zbrk" - }, - "outputs": [ + "cell_type": "code", + "metadata": { + "jukit_cell_id": "O4HPx3boF0" + }, + "source": [], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "hqQkbPEwTJ" + }, + "source": [ + "# Using the DockerWrapper utility" + ] + }, + { + "cell_type": "code", + "metadata": { + "jukit_cell_id": "vCepuypaFH" + }, + "source": [ + "from langchain.utilities.docker import DockerWrapper" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "metadata": { + "jukit_cell_id": "BtYVqy2YtO" + }, + "source": [ + "d = DockerWrapper(image='shell')" + ], + "outputs": [], + "execution_count": null + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "'hello world'\n" - ] + "cell_type": "code", + "metadata": { + "jukit_cell_id": "ELWWm03ptQ" + }, + "source": [ + "query = \"\"\"\n", + "for i in $(seq 1 10)\n", + "do\n", + " echo $i\n", + "done\n", + "\"\"\"\n", + "print(d.exec_run(query))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n" + } + ], + "execution_count": 1 + }, + { + "cell_type": "code", + "metadata": { + "jukit_cell_id": "lGMqLz5sDo" + }, + "source": [ + "p = DockerWrapper(image='python')\n", + "\n", + "py_payload = \"\"\"\n", + "def hello_world():\n", + " return 'hello world'\n", + "\n", + "hello_world()\n", + "\"\"\"" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "metadata": { + "jukit_cell_id": "X04Wd6zbrk" + }, + "source": [ + "print(p.exec_run(py_payload))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "'hello world'\n" + } + ], + "execution_count": 2 + }, + { + "cell_type": "code", + "metadata": { + "jukit_cell_id": "lKOfuDoJGk" + }, + "source": [], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "eSzXtDrpqU" + }, + "source": [ + "## Passing custom parameters\n", + "\n", + "By default containers are run with a safe set of parameters. You can pass any parameters\n", + "that are accepted by the docker python sdk to the run and exec commands.\n", + "\n", + "### Using networking" + ] + }, + { + "cell_type": "code", + "metadata": { + "jukit_cell_id": "eWFGCxD9pv" + }, + "source": [ + "# by default containers don't have access to the network\n", + "print(d.run('ping -c 1 google.com'))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "STDERR: Command '/bin/sh -c 'ping -c 1 google.com'' in image 'alpine:latest' returned non-zero exit status 1: b\"ping: bad address 'google.com'\\n\"\n" + } + ], + "execution_count": 3 + }, + { + "cell_type": "code", + "metadata": { + "jukit_cell_id": "Z0YkpuXVyL" + }, + "source": [ + "# using the network parameter\n", + "print(d.run('ping -c 1 google.com', network='bridge'))" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "PING google.com (142.250.200.110): 56 data bytes\n64 bytes from 142.250.200.110: seq=0 ttl=42 time=13.695 ms\n\n--- google.com ping statistics ---\n1 packets transmitted, 1 packets received, 0% packet loss\nround-trip min/avg/max = 13.695/13.695/13.695 ms\n" + } + ], + "execution_count": 4 + }, + { + "cell_type": "code", + "metadata": { + "jukit_cell_id": "3rMWzzuLHq" + }, + "source": [], + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "python", + "language": "python", + "name": "python3" } - ], - "source": [ - "print(p.exec_run(py_payload))" - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/langchain/utilities/docker/__init__.py b/langchain/utilities/docker/__init__.py index 0b0a41b0..8e3a2e79 100644 --- a/langchain/utilities/docker/__init__.py +++ b/langchain/utilities/docker/__init__.py @@ -1,8 +1,8 @@ """Wrapper for untrusted code exectuion on docker.""" # TODO: pass payload to contanier via filesystem -# TODO: attach to running container +# TEST: more tests for attach to running container # TODO: embed file payloads in the call to run (in LLMChain)? -# TODO: image selection helper +# TODO: [doc] image selection helper # TODO: LLMChain decorator ? import docker @@ -12,6 +12,7 @@ import shlex import os import io import tarfile +import time import pandas as pd # type: ignore from docker.client import DockerClient # type: ignore from docker.errors import APIError, ContainerError # type: ignore @@ -517,3 +518,72 @@ class DockerWrapper(BaseModel, extra=Extra.allow): return f"STDERR: {payload['stderr']}" else: return payload['stdout'].strip() + + + + # method that will copy the given payload to the container filesystem then + # invoke the command on the file and return the output + def run_file(self, payload: bytes, filename: str = None, + **kwargs: Any) -> str: + """Run arbitrary shell command inside an ephemeral container on the + specified input payload.""" + + + for arg in kwargs.keys(): + if arg in locals(): + del kwargs[arg] + + kwargs = {**self._params, **kwargs} + self._clean_kwargs(kwargs) + + kwargs['command'] = '/bin/sh' + + k_file_location = '/tmp/payload' + if filename is not None: + # store at /tmp/file_name + # strip all leading path components + file_loc = os.path.basename(filename) + k_file_location = f'/tmp/{file_loc}' + + # print(kwargs) + # return + + # create a container with the given payload + # container = self._docker_client.containers.create(**kwargs) + # container.start() + container = self._docker_client.containers.list()[0] + print(container.short_id) + + + # copy the payload to the container + try: + # put the data in tar archive at the path specified by k_file_location + archive = io.BytesIO() + with tarfile.TarFile(fileobj=archive, mode='w') as tar: + tarinfo = tarfile.TarInfo(name='test-archive') + tarinfo.size = len(payload) + tarinfo.mtime = time.time() + tar.addfile(tarinfo, io.BytesIO(payload)) + archive.seek(0) + + # store archive on local host at /tmp/test + # with open('/tmp/test', 'wb') as f: + # f.write(archive.read()) + + + container.put_archive(path='/', data=archive) + except APIError as e: + logger.error(f"Error: {e}") + return "ERROR" + + #execute the command + exit_code, out = container.exec_run(['sh', k_file_location]) + print(f"exit_code: {exit_code}") + print(f"out: {out}") + + + # try: + # container.kill() + # except APIError: + # pass + # container.remove(force=True) diff --git a/langchain/utilities/docker/images.py b/langchain/utilities/docker/images.py index 0c36c038..1490717c 100644 --- a/langchain/utilities/docker/images.py +++ b/langchain/utilities/docker/images.py @@ -14,6 +14,7 @@ class BaseImage(BaseModel, extra=Extra.forbid): tag: Optional[str] = 'latest' default_command: Optional[List[str]] = None stdin_command: Optional[List[str]] = None + network: str = 'none' def dict(self, *args, **kwargs): """Override the dict method to add the image name."""