diff --git a/libs/langchain/langchain/utilities/arxiv.py b/libs/langchain/langchain/utilities/arxiv.py index d958255abf..2ad42daba8 100644 --- a/libs/langchain/langchain/utilities/arxiv.py +++ b/libs/langchain/langchain/utilities/arxiv.py @@ -21,15 +21,30 @@ class ArxivAPIWrapper(BaseModel): It limits the Document content by doc_content_chars_max. Set doc_content_chars_max=None if you don't want to limit the content size. - Parameters: + Args: top_k_results: number of the top-scored document used for the arxiv tool ARXIV_MAX_QUERY_LENGTH: the cut limit on the query used for the arxiv tool. load_max_docs: a limit to the number of loaded documents load_all_available_meta: - if True: the `metadata` of the loaded Documents gets all available meta info - (see https://lukasschwab.me/arxiv.py/index.html#Result), - if False: the `metadata` gets only the most informative fields. - + if True: the `metadata` of the loaded Documents contains all available + meta info (see https://lukasschwab.me/arxiv.py/index.html#Result), + if False: the `metadata` contains only the published date, title, + authors and summary. + doc_content_chars_max: an optional cut limit for the length of a document's + content + + Example: + .. code-block:: python + + from langchain.utilities.arxiv import ArxivAPIWrapper + arxiv = ArxivAPIWrapper( + top_k_results = 3, + ARXIV_MAX_QUERY_LENGTH = 300, + load_max_docs = 3, + load_all_available_meta = False, + doc_content_chars_max = 40000 + ) + arxiv.run("tree of thought llm) """ arxiv_search: Any #: :meta private: @@ -62,11 +77,17 @@ class ArxivAPIWrapper(BaseModel): def run(self, query: str) -> str: """ - Run Arxiv search and get the article meta information. - See https://lukasschwab.me/arxiv.py/index.html#Search - See https://lukasschwab.me/arxiv.py/index.html#Result - It uses only the most informative fields of article meta information. - """ + Performs an arxiv search and A single string + with the publish date, title, authors, and summary + for each article separated by two newlines. + + If an error occurs or no documents found, error text + is returned instead. Wrapper for + https://lukasschwab.me/arxiv.py/index.html#Search + + Args: + query: a plaintext search query + """ # noqa: E501 try: results = self.arxiv_search( # type: ignore query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results @@ -74,7 +95,8 @@ class ArxivAPIWrapper(BaseModel): except self.arxiv_exceptions as ex: return f"Arxiv exception: {ex}" docs = [ - f"Published: {result.updated.date()}\nTitle: {result.title}\n" + f"Published: {result.updated.date()}\n" + f"Title: {result.title}\n" f"Authors: {', '.join(a.name for a in result.authors)}\n" f"Summary: {result.summary}" for result in results @@ -91,7 +113,12 @@ class ArxivAPIWrapper(BaseModel): Returns: a list of documents with the document.page_content in text format - """ + Performs an arxiv search, downloads the top k results as PDFs, loads + them as Documents, and returns them in a List. + + Args: + query: a plaintext search query + """ # noqa: E501 try: import fitz except ImportError: diff --git a/libs/langchain/langchain/utilities/awslambda.py b/libs/langchain/langchain/utilities/awslambda.py index 292277bc45..64782905e3 100644 --- a/libs/langchain/langchain/utilities/awslambda.py +++ b/libs/langchain/langchain/utilities/awslambda.py @@ -7,19 +7,27 @@ from pydantic import BaseModel, Extra, root_validator class LambdaWrapper(BaseModel): """Wrapper for AWS Lambda SDK. + To use, you should have the ``boto3`` package installed + and a lambda functions built from the AWS Console or + CLI. Set up your AWS credentials with ``aws configure`` - Docs for using: + Example: + .. code-block:: bash - 1. pip install boto3 - 2. Create a lambda function using the AWS Console or CLI - 3. Run `aws configure` and enter your AWS credentials + pip install boto3 + + aws configure """ lambda_client: Any #: :meta private: + """The configured boto3 client""" function_name: Optional[str] = None + """The name of your lambda function""" awslambda_tool_name: Optional[str] = None + """If passing to an agent as a tool, the tool name""" awslambda_tool_description: Optional[str] = None + """If passing to an agent as a tool, the description""" class Config: """Configuration for this pydantic object.""" @@ -44,7 +52,15 @@ class LambdaWrapper(BaseModel): return values def run(self, query: str) -> str: - """Invoke Lambda function and parse result.""" + """ + Invokes the lambda function and returns the + result. + + Args: + query: an input to passed to the lambda + function as the ``body`` of a JSON + object. + """ # noqa: E501 res = self.lambda_client.invoke( FunctionName=self.function_name, InvocationType="RequestResponse", diff --git a/libs/langchain/langchain/utilities/bash.py b/libs/langchain/langchain/utilities/bash.py index 8900d55779..bbca0a7ebb 100644 --- a/libs/langchain/langchain/utilities/bash.py +++ b/libs/langchain/langchain/utilities/bash.py @@ -11,23 +11,36 @@ if TYPE_CHECKING: import pexpect -def _lazy_import_pexpect() -> pexpect: - """Import pexpect only when needed.""" - if platform.system() == "Windows": - raise ValueError("Persistent bash processes are not yet supported on Windows.") - try: - import pexpect - - except ImportError: - raise ImportError( - "pexpect required for persistent bash processes." - " To install, run `pip install pexpect`." - ) - return pexpect +class BashProcess: + """ + Wrapper class for starting subprocesses. + Uses the python built-in subprocesses.run() + Persistent processes are **not** available + on Windows systems, as pexpect makes use of + Unix pseudoterminals (ptys). MacOS and Linux + are okay. + + Example: + .. code-block:: python + + from langchain.utilities.bash import BashProcess + bash = BashProcess( + strip_newlines = False, + return_err_output = False, + persistent = False + ) + bash.run('echo \'hello world\'') + """ -class BashProcess: - """Executes bash commands and returns the output.""" + strip_newlines: bool = False + """Whether or not to run .strip() on the output""" + return_err_output: bool = False + """Whether or not to return the output of a failed + command, or just the error message and stacktrace""" + persistent: bool = False + """Whether or not to spawn a persistent session + NOTE: Unavailable for Windows environments""" def __init__( self, @@ -35,20 +48,47 @@ class BashProcess: return_err_output: bool = False, persistent: bool = False, ): - """Initialize with stripping newlines.""" + """ + Initializes with default settings + """ self.strip_newlines = strip_newlines self.return_err_output = return_err_output self.prompt = "" self.process = None if persistent: self.prompt = str(uuid4()) - self.process = self._initialize_persistent_process(self.prompt) + self.process = self._initialize_persistent_process(self, self.prompt) + + @staticmethod + def _lazy_import_pexpect() -> pexpect: + """Import pexpect only when needed.""" + if platform.system() == "Windows": + raise ValueError( + "Persistent bash processes are not yet supported on Windows." + ) + try: + import pexpect + + except ImportError: + raise ImportError( + "pexpect required for persistent bash processes." + " To install, run `pip install pexpect`." + ) + return pexpect @staticmethod - def _initialize_persistent_process(prompt: str) -> pexpect.spawn: + def _initialize_persistent_process(self: BashProcess, prompt: str) -> pexpect.spawn: # Start bash in a clean environment # Doesn't work on windows - pexpect = _lazy_import_pexpect() + """ + Initializes a persistent bash setting in a + clean environment. + NOTE: Unavailable on Windows + + Args: + Prompt(str): the bash command to execute + """ # noqa: E501 + pexpect = self._lazy_import_pexpect() process = pexpect.spawn( "env", ["-i", "bash", "--norc", "--noprofile"], encoding="utf-8" ) @@ -59,7 +99,14 @@ class BashProcess: return process def run(self, commands: Union[str, List[str]]) -> str: - """Run commands and return final output.""" + """ + Run commands in either an existing persistent + subprocess or on in a new subprocess environment. + + Args: + commands(List[str]): a list of commands to + execute in the session + """ # noqa: E501 if isinstance(commands, str): commands = [commands] commands = ";".join(commands) @@ -71,7 +118,13 @@ class BashProcess: return self._run(commands) def _run(self, command: str) -> str: - """Run commands and return final output.""" + """ + Runs a command in a subprocess and returns + the output. + + Args: + command: The command to run + """ # noqa: E501 try: output = subprocess.run( command, @@ -89,14 +142,26 @@ class BashProcess: return output def process_output(self, output: str, command: str) -> str: - # Remove the command from the output using a regular expression + """ + Uses regex to remove the command from the output + + Args: + output: a process' output string + command: the executed command + """ # noqa: E501 pattern = re.escape(command) + r"\s*\n" output = re.sub(pattern, "", output, count=1) return output.strip() def _run_persistent(self, command: str) -> str: - """Run commands and return final output.""" - pexpect = _lazy_import_pexpect() + """ + Runs commands in a persistent environment + and returns the output. + + Args: + command: the command to execute + """ # noqa: E501 + pexpect = self._lazy_import_pexpect() if self.process is None: raise ValueError("Process not initialized") self.process.sendline(command)