Updating docstings in utilities (#8411)

Updating docstrings on utility packages @baskaryan
2024-11-04 06:00:26 +00:00 · 2023-08-01 07:34:53 +08:00 · 2023-08-01 07:34:53 +08:00 · 64d0a0fcc0
commit 64d0a0fcc0
parent bca0749a11
3 changed files with 149 additions and 41 deletions
--- a/libs/langchain/langchain/utilities/arxiv.py
+++ b/libs/langchain/langchain/utilities/arxiv.py
@ -21,15 +21,30 @@ class ArxivAPIWrapper(BaseModel):
    It limits the Document content by doc_content_chars_max.
    Set doc_content_chars_max=None if you don't want to limit the content size.

-    Parameters:
+    Args:
        top_k_results: number of the top-scored document used for the arxiv tool
        ARXIV_MAX_QUERY_LENGTH: the cut limit on the query used for the arxiv tool.
        load_max_docs: a limit to the number of loaded documents
        load_all_available_meta:
-          if True: the `metadata` of the loaded Documents gets all available meta info
-            (see https://lukasschwab.me/arxiv.py/index.html#Result),
-          if False: the `metadata` gets only the most informative fields.
+            if True: the `metadata` of the loaded Documents contains all available
+            meta info (see https://lukasschwab.me/arxiv.py/index.html#Result),
+            if False: the `metadata` contains only the published date, title,
+            authors and summary.
+        doc_content_chars_max: an optional cut limit for the length of a document's
+            content

+    Example:
+        .. code-block:: python
+
+            from langchain.utilities.arxiv import ArxivAPIWrapper
+            arxiv = ArxivAPIWrapper(
+                top_k_results = 3,
+                ARXIV_MAX_QUERY_LENGTH = 300,
+                load_max_docs = 3,
+                load_all_available_meta = False,
+                doc_content_chars_max = 40000
+            )
+            arxiv.run("tree of thought llm)
    """

    arxiv_search: Any  #: :meta private:
@ -62,11 +77,17 @@ class ArxivAPIWrapper(BaseModel):

    def run(self, query: str) -> str:
        """
-        Run Arxiv search and get the article meta information.
-        See https://lukasschwab.me/arxiv.py/index.html#Search
-        See https://lukasschwab.me/arxiv.py/index.html#Result
-        It uses only the most informative fields of article meta information.
-        """
+        Performs an arxiv search and A single string
+        with the publish date, title, authors, and summary
+        for each article separated by two newlines.
+
+        If an error occurs or no documents found, error text
+        is returned instead. Wrapper for
+        https://lukasschwab.me/arxiv.py/index.html#Search
+
+        Args:
+            query: a plaintext search query
+        """  # noqa: E501
        try:
            results = self.arxiv_search(  # type: ignore
                query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
@ -74,7 +95,8 @@ class ArxivAPIWrapper(BaseModel):
        except self.arxiv_exceptions as ex:
            return f"Arxiv exception: {ex}"
        docs = [
-            f"Published: {result.updated.date()}\nTitle: {result.title}\n"
+            f"Published: {result.updated.date()}\n"
+            f"Title: {result.title}\n"
            f"Authors: {', '.join(a.name for a in result.authors)}\n"
            f"Summary: {result.summary}"
            for result in results
@ -91,7 +113,12 @@ class ArxivAPIWrapper(BaseModel):

        Returns: a list of documents with the document.page_content in text format

-        """
+        Performs an arxiv search, downloads the top k results as PDFs, loads
+        them as Documents, and returns them in a List.
+
+        Args:
+            query: a plaintext search query
+        """  # noqa: E501
        try:
            import fitz
        except ImportError:
--- a/libs/langchain/langchain/utilities/awslambda.py
+++ b/libs/langchain/langchain/utilities/awslambda.py
@ -7,19 +7,27 @@ from pydantic import BaseModel, Extra, root_validator

 class LambdaWrapper(BaseModel):
    """Wrapper for AWS Lambda SDK.
+    To use, you should have the ``boto3`` package installed
+    and a lambda functions built from the AWS Console or
+    CLI. Set up your AWS credentials with ``aws configure``

-    Docs for using:
+    Example:
+        .. code-block:: bash

-    1. pip install boto3
-    2. Create a lambda function using the AWS Console or CLI
-    3. Run `aws configure` and enter your AWS credentials
+            pip install boto3
+
+            aws configure

    """

    lambda_client: Any  #: :meta private:
+    """The configured boto3 client"""
    function_name: Optional[str] = None
+    """The name of your lambda function"""
    awslambda_tool_name: Optional[str] = None
+    """If passing to an agent as a tool, the tool name"""
    awslambda_tool_description: Optional[str] = None
+    """If passing to an agent as a tool, the description"""

    class Config:
        """Configuration for this pydantic object."""
@ -44,7 +52,15 @@ class LambdaWrapper(BaseModel):
        return values

    def run(self, query: str) -> str:
-        """Invoke Lambda function and parse result."""
+        """
+        Invokes the lambda function and returns the
+        result.
+
+        Args:
+            query: an input to passed to the lambda
+                function as the ``body`` of a JSON
+                object.
+        """  # noqa: E501
        res = self.lambda_client.invoke(
            FunctionName=self.function_name,
            InvocationType="RequestResponse",
--- a/libs/langchain/langchain/utilities/bash.py
+++ b/libs/langchain/langchain/utilities/bash.py
@ -11,10 +11,61 @@ if TYPE_CHECKING:
    import pexpect


+class BashProcess:
+    """
+    Wrapper class for starting subprocesses.
+    Uses the python built-in subprocesses.run()
+    Persistent processes are **not** available
+    on Windows systems, as pexpect makes use of
+    Unix pseudoterminals (ptys). MacOS and Linux
+    are okay.
+
+    Example:
+        .. code-block:: python
+
+        from langchain.utilities.bash import BashProcess
+            bash = BashProcess(
+                strip_newlines = False,
+                return_err_output = False,
+                persistent = False
+            )
+            bash.run('echo \'hello world\'')
+
+    """
+
+    strip_newlines: bool = False
+    """Whether or not to run .strip() on the output"""
+    return_err_output: bool = False
+    """Whether or not to return the output of a failed
+    command, or just the error message and stacktrace"""
+    persistent: bool = False
+    """Whether or not to spawn a persistent session
+    NOTE: Unavailable for Windows environments"""
+
+    def __init__(
+        self,
+        strip_newlines: bool = False,
+        return_err_output: bool = False,
+        persistent: bool = False,
+    ):
+        """
+        Initializes with default settings
+        """
+        self.strip_newlines = strip_newlines
+        self.return_err_output = return_err_output
+        self.prompt = ""
+        self.process = None
+        if persistent:
+            self.prompt = str(uuid4())
+            self.process = self._initialize_persistent_process(self, self.prompt)
+
+    @staticmethod
    def _lazy_import_pexpect() -> pexpect:
        """Import pexpect only when needed."""
        if platform.system() == "Windows":
-        raise ValueError("Persistent bash processes are not yet supported on Windows.")
+            raise ValueError(
+                "Persistent bash processes are not yet supported on Windows."
+            )
        try:
            import pexpect

@ -25,30 +76,19 @@ def _lazy_import_pexpect() -> pexpect:
            )
        return pexpect

-
-class BashProcess:
-    """Executes bash commands and returns the output."""
-
-    def __init__(
-        self,
-        strip_newlines: bool = False,
-        return_err_output: bool = False,
-        persistent: bool = False,
-    ):
-        """Initialize with stripping newlines."""
-        self.strip_newlines = strip_newlines
-        self.return_err_output = return_err_output
-        self.prompt = ""
-        self.process = None
-        if persistent:
-            self.prompt = str(uuid4())
-            self.process = self._initialize_persistent_process(self.prompt)
-
    @staticmethod
-    def _initialize_persistent_process(prompt: str) -> pexpect.spawn:
+    def _initialize_persistent_process(self: BashProcess, prompt: str) -> pexpect.spawn:
        # Start bash in a clean environment
        # Doesn't work on windows
-        pexpect = _lazy_import_pexpect()
+        """
+        Initializes a persistent bash setting in a
+        clean environment.
+        NOTE: Unavailable on Windows
+
+        Args:
+            Prompt(str): the bash command to execute
+        """  # noqa: E501
+        pexpect = self._lazy_import_pexpect()
        process = pexpect.spawn(
            "env", ["-i", "bash", "--norc", "--noprofile"], encoding="utf-8"
        )
@ -59,7 +99,14 @@ class BashProcess:
        return process

    def run(self, commands: Union[str, List[str]]) -> str:
-        """Run commands and return final output."""
+        """
+        Run commands in either an existing persistent
+        subprocess or on in a new subprocess environment.
+
+        Args:
+            commands(List[str]): a list of commands to
+                execute in the session
+        """  # noqa: E501
        if isinstance(commands, str):
            commands = [commands]
        commands = ";".join(commands)
@ -71,7 +118,13 @@ class BashProcess:
            return self._run(commands)

    def _run(self, command: str) -> str:
-        """Run commands and return final output."""
+        """
+        Runs a command in a subprocess and returns
+        the output.
+
+        Args:
+            command: The command to run
+        """  # noqa: E501
        try:
            output = subprocess.run(
                command,
@ -89,14 +142,26 @@ class BashProcess:
        return output

    def process_output(self, output: str, command: str) -> str:
-        # Remove the command from the output using a regular expression
+        """
+        Uses regex to remove the command from the output
+
+        Args:
+            output: a process' output string
+            command: the executed command
+        """  # noqa: E501
        pattern = re.escape(command) + r"\s*\n"
        output = re.sub(pattern, "", output, count=1)
        return output.strip()

    def _run_persistent(self, command: str) -> str:
-        """Run commands and return final output."""
-        pexpect = _lazy_import_pexpect()
+        """
+        Runs commands in a persistent environment
+        and returns the output.
+
+        Args:
+            command: the command to execute
+        """  # noqa: E501
+        pexpect = self._lazy_import_pexpect()
        if self.process is None:
            raise ValueError("Process not initialized")
        self.process.sendline(command)