@ -1,30 +1,26 @@
""" Wrapper for untrusted code exectuion on docker. """
# TODO: Validation:
# - verify gVisor runtime (runsc) if available
# -TEST: spawned container: make sure it's ready ? before sending/reading commands
# - attach to running container
# - pass arbitrary image names
# - embed file payloads in the call to run (in LLMChain)?
# - image selection helper
# - LLMChain decorator ?
#TODO: attach to running container
#TODO: pull images
#TODO: embed file payloads in the call to run (in LLMChain)?
#TODO: image selection helper
#TODO: LLMChain decorator ?
import docker
import struct
import socket
import shlex
from time import sleep
import pandas as pd # type: ignore
from docker . client import DockerClient # type: ignore
from docker . errors import APIError , ContainerError # type: ignore
import logging
from typing import Any , Dict
from typing import Optional
from pydantic import BaseModel , PrivateAttr , Extra , root_validator , validator
from . images import BaseImage , get_image_template , Python , Shell
logger = logging . getLogger ( __name__ )
from typing import Any , Dict , Optional , Union , Type
from pydantic import BaseModel , PrivateAttr , Extra , root_validator , validator , Field
docker_images = {
' default ' : ' alpine: {version} ' ,
' python ' : ' python: {version} ' ,
}
logger = logging . getLogger ( __name__ )
SOCK_BUF_SIZE = 1024
@ -55,9 +51,12 @@ _check_gvisor_runtime()
class DockerSocket :
""" Wrapper around docker API ' s socket object. Can be used as a context manager. """
_timeout : int = 10
def __init__ ( self , socket ) :
def __init__ ( self , socket , timeout : int = _timeout ) :
self . socket = socket
self . socket . _sock . settimeout ( timeout )
# self.socket._sock.setblocking(False)
def __enter__ ( self ) :
@ -75,7 +74,10 @@ class DockerSocket:
def sendall ( self , data : bytes ) - > None :
self . socket . _sock . sendall ( data )
def recv ( self ) :
def setblocking ( self , flag : bool ) - > None :
self . socket . _sock . setblocking ( flag )
def recv ( self ) - > Any :
""" Wrapper for socket.recv that does buffured read. """
# NOTE: this is optional as a bonus
@ -149,17 +151,40 @@ class DockerSocket:
return chunks
def _default_params ( ) - > Dict :
return {
# the only required parameter to be able to attach.
' stdin_open ' : True ,
}
def _get_command ( query : str , * * kwargs : Dict ) - > str :
""" Build an escaped command from a query string and keyword arguments. """
cmd = query
if ' default_command ' in kwargs :
cmd = shlex . join ( [ * kwargs . get ( ' default_command ' ) , query ] ) # type: ignore
class DockerWrapper ( BaseModel , extra = Extra . forbid ) :
""" Executes arbitrary payloads and returns the output. """
return cmd
_docker_client : DockerClient = PrivateAttr ( )
image : Optional [ str ] = " alpine "
class DockerWrapper ( BaseModel , extra = Extra . allow ) :
""" Executes arbitrary payloads and returns the output.
# use env by default when create docker client
from_env : Optional [ bool ] = True
Args :
image ( str | Type [ BaseImage ] ) : Docker image to use for execution . The
image can be a string or a subclass of images . BaseImage .
"""
_docker_client : DockerClient = PrivateAttr ( )
_params : Dict = Field ( default_factory = Shell ( ) . dict ( ) , skip = True )
image : Union [ str , Type [ BaseImage ] ] = Field ( default_factory = Shell , skip = True )
from_env : Optional [ bool ] = Field ( default = True , skip = True )
# @property
# def image_name(self) -> str:
# """The image name that will be used when creating a container."""
# return self._params.image
#
def __init__ ( self , * * kwargs ) :
""" Initialize docker client. """
super ( ) . __init__ ( * * kwargs )
@ -167,6 +192,13 @@ class DockerWrapper(BaseModel, extra=Extra.forbid):
if self . from_env :
self . _docker_client = docker . from_env ( )
# if not isinstance(self.image, str) and issubclass(self.image, BaseImage):
# self._params = {**self._params, **self.image().dict()}
#
# # if the user defined a custom image not pre registerd already we should
# # not use the custom command
# elif isinstance(self.image, str):
# self._params = {**_default_params(), **{'image': self.image}}
@property
def client ( self ) - > DockerClient :
@ -178,24 +210,87 @@ class DockerWrapper(BaseModel, extra=Extra.forbid):
""" Prints docker `info`. """
return self . _docker_client . info ( )
# @validator("image", pre=True, always=True)
# def validate_image(cls, value):
# if value is None:
# raise ValueError("image is required")
# if isinstance(value, str) :
# image = get_image(value)
# if isinstance(image, BaseImage):
# return image
# else:
# #set default params to base ones
# if issubclass(value, BaseImage):
# return value
# else:
# raise ValueError("image must be a string or a subclass of images.BaseImage")
@root_validator ( )
def validate_all ( cls , values : Dict ) - > Dict :
""" Validate environment. """
image = values . get ( " image " )
if image is None :
raise ValueError ( " image is required " )
if isinstance ( image , str ) :
# try to get image
_image = get_image_template ( image )
if isinstance ( _image , str ) :
# user wants a custom image, we should use default params
values [ " _params " ] = { * * _default_params ( ) , * * { ' image ' : image } }
else :
# user wants a pre registered image, we should use the image params
values [ " _params " ] = _image ( ) . dict ( )
# image is a BaseImage class
elif issubclass ( image . __class__ , BaseImage ) :
values [ " _params " ] = image . dict ( )
def field_filter ( x ) :
fields = cls . __fields__
if x [ 0 ] == ' _params ' :
return False
field = fields . get ( x [ 0 ] , None )
if not field :
return True
return not field . field_info . extra . get ( ' skip ' , False )
filtered_fields : Dict [ Any , Any ] = dict ( filter ( field_filter , values . items ( ) ) ) # type: ignore
values [ " _params " ] = { * * values [ " _params " ] ,
* * filtered_fields }
return values
def run ( self , query : str , * * kwargs : Any ) - > str :
""" Run arbitrary shell command inside a container.
This method will concatenate the registered default command with the provided
query .
Args :
query ( str ) : The command to run .
* * kwargs : Pass extra parameters to DockerClient . container . run .
"""
kwargs = { * * self . _params , * * kwargs }
args = {
' image ' : self . _params . get ( ' image ' ) ,
' command ' : query ,
}
del kwargs [ ' image ' ]
cmd = _get_command ( query , * * kwargs )
kwargs . pop ( ' default_command ' , None )
args [ ' command ' ] = cmd
# print(f"args: {args}")
# print(f"kwargs: {kwargs}")
# return
logger . debug ( f " running command { args [ ' command ' ] } " )
logger . debug ( f " with params { kwargs } " )
try :
image = kwargs . get ( " image " , self . image )
return self . _docker_client . containers . run ( image ,
query ,
result = self . _docker_client . containers . run ( * ( args . values ( ) ) ,
remove = True ,
* * kwargs )
return result . decode ( ' utf-8 ' ) . strip ( )
except ContainerError as e :
return f " STDERR: { e } "
@ -206,11 +301,15 @@ class DockerWrapper(BaseModel, extra=Extra.forbid):
def exec_run ( self , query : str , image : str ) - > str :
""" Run arbitrary shell command inside a container.
def exec_run ( self , query : str , * * kwargs : Any ) - > str :
""" Run arbitrary shell command inside an ephemeral container.
This will create a container , run the command , and then remove the
container . the input is sent to the container ' s stdin through a socket
using Docker API . It effectively simulates a tty session .
This is a lower level API that sends the input to the container ' s
stdin through a socket using Docker API . It effectively simulates a tty session .
Args:
* * kwargs : Pass extra parameters to DockerClient . container . exec_ru n.
"""
# it is necessary to open stdin to keep the container running after it's started
# the attach_socket will hold the connection open until the container is stopped or
@ -222,40 +321,65 @@ class DockerWrapper(BaseModel, extra=Extra.forbid):
# parameters to keep stdin open. For example python image needs to be
# started with the command `python3 -i`
kwargs = { * * self . _params , * * kwargs }
if ' default_command ' in kwargs :
kwargs [ ' command ' ] = shlex . join ( kwargs [ ' default_command ' ] )
del kwargs [ ' default_command ' ]
# cmd = _get_command(query, **kwargs)
# kwargs.pop('default_command', None)
# kwargs['command'] = cmd
# print(f"kwargs: {kwargs}")
# return
# TODO: handle both output mode for tty=True/False
container = self . _docker_client . containers . create ( image , stdin_open = True )
logger . debug ( f " running command { kwargs [ ' command ' ] } " )
logger . debug ( f " with params { kwargs } " )
container = self . _docker_client . containers . create ( * * kwargs )
container . start ( )
# input()
# get underlying socket
# important to set 'stream' or attach API does not work
_socket = container . attach_socket ( params = { ' stdout ' : 1 , ' stderr ' : 1 ,
' stdin ' : 1 , ' stream ' : 1 } )
with DockerSocket ( _socket ) as socket :
# TEST: make sure the container is ready ? use a blocking call first
socket . sendall ( query . encode ( ' utf-8 ' ) )
# input()
with DockerSocket ( _socket ) as _socket :
# flush the output buffer (if any prompt)
flush = _socket . recv ( )
_socket . setblocking ( True )
print ( flush )
# TEST: make sure the container is ready ? use a blocking first call
_socket . sendall ( query . encode ( ' utf-8 ' ) )
#FIX: is it possible to know if command is finished ?
sleep ( 0.5 ) #this should be available as a parameter
# read the output
output = None
output = socket . recv ( )
# print(output)
try :
output = _socket . recv ( )
except socket . timeout :
return " ERROR: timeout "
container . kill ( )
container . remove ( )
# output is stored in a list of tuples (stream_type, payload)
df = pd . DataFrame ( output , columns = [ ' stream_type ' , ' payload ' ] )
df [ ' payload ' ] = df [ ' payload ' ] . apply ( lambda x : x . decode ( ' utf-8 ' ) )
df [ ' stream_type ' ] = df [ ' stream_type ' ] . apply ( lambda x : ' stdout ' if x == 1 else ' stderr ' )
payload = df . groupby ( ' stream_type ' ) [ ' payload ' ] . apply ( ' ' . join ) . to_dict ( )
print ( payload )
logger . debug ( f " payload: { payload } " )
#HACK: better output handling when stderr is present
#NOTE: stderr might just contain the prompt
if ' stdout ' in payload and ' stderr ' in payload :
return f " STDOUT: \n { payload [ ' stdout ' ] } \n STDERR: { payload [ ' stderr ' ] } "
el if ' stderr ' in payload :
return f " STDOUT: \n { payload [ ' stdout ' ] } \n STDERR: \n { payload [ ' stderr ' ] } "
if ' stderr ' in payload and not ' stdout ' in payload :
return f " STDERR: { payload [ ' stderr ' ] } "
else :
return payload [ ' stdout ' ]