Updating docstings in utilities (#8411)

Updating docstrings on utility packages
 @baskaryan
This commit is contained in:
Gordon Clark 2023-08-01 07:34:53 +08:00 committed by GitHub
parent bca0749a11
commit 64d0a0fcc0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 149 additions and 41 deletions

View File

@ -21,15 +21,30 @@ class ArxivAPIWrapper(BaseModel):
It limits the Document content by doc_content_chars_max.
Set doc_content_chars_max=None if you don't want to limit the content size.
Parameters:
Args:
top_k_results: number of the top-scored document used for the arxiv tool
ARXIV_MAX_QUERY_LENGTH: the cut limit on the query used for the arxiv tool.
load_max_docs: a limit to the number of loaded documents
load_all_available_meta:
if True: the `metadata` of the loaded Documents gets all available meta info
(see https://lukasschwab.me/arxiv.py/index.html#Result),
if False: the `metadata` gets only the most informative fields.
if True: the `metadata` of the loaded Documents contains all available
meta info (see https://lukasschwab.me/arxiv.py/index.html#Result),
if False: the `metadata` contains only the published date, title,
authors and summary.
doc_content_chars_max: an optional cut limit for the length of a document's
content
Example:
.. code-block:: python
from langchain.utilities.arxiv import ArxivAPIWrapper
arxiv = ArxivAPIWrapper(
top_k_results = 3,
ARXIV_MAX_QUERY_LENGTH = 300,
load_max_docs = 3,
load_all_available_meta = False,
doc_content_chars_max = 40000
)
arxiv.run("tree of thought llm)
"""
arxiv_search: Any #: :meta private:
@ -62,11 +77,17 @@ class ArxivAPIWrapper(BaseModel):
def run(self, query: str) -> str:
"""
Run Arxiv search and get the article meta information.
See https://lukasschwab.me/arxiv.py/index.html#Search
See https://lukasschwab.me/arxiv.py/index.html#Result
It uses only the most informative fields of article meta information.
"""
Performs an arxiv search and A single string
with the publish date, title, authors, and summary
for each article separated by two newlines.
If an error occurs or no documents found, error text
is returned instead. Wrapper for
https://lukasschwab.me/arxiv.py/index.html#Search
Args:
query: a plaintext search query
""" # noqa: E501
try:
results = self.arxiv_search( # type: ignore
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
@ -74,7 +95,8 @@ class ArxivAPIWrapper(BaseModel):
except self.arxiv_exceptions as ex:
return f"Arxiv exception: {ex}"
docs = [
f"Published: {result.updated.date()}\nTitle: {result.title}\n"
f"Published: {result.updated.date()}\n"
f"Title: {result.title}\n"
f"Authors: {', '.join(a.name for a in result.authors)}\n"
f"Summary: {result.summary}"
for result in results
@ -91,7 +113,12 @@ class ArxivAPIWrapper(BaseModel):
Returns: a list of documents with the document.page_content in text format
"""
Performs an arxiv search, downloads the top k results as PDFs, loads
them as Documents, and returns them in a List.
Args:
query: a plaintext search query
""" # noqa: E501
try:
import fitz
except ImportError:

View File

@ -7,19 +7,27 @@ from pydantic import BaseModel, Extra, root_validator
class LambdaWrapper(BaseModel):
"""Wrapper for AWS Lambda SDK.
To use, you should have the ``boto3`` package installed
and a lambda functions built from the AWS Console or
CLI. Set up your AWS credentials with ``aws configure``
Docs for using:
Example:
.. code-block:: bash
1. pip install boto3
2. Create a lambda function using the AWS Console or CLI
3. Run `aws configure` and enter your AWS credentials
pip install boto3
aws configure
"""
lambda_client: Any #: :meta private:
"""The configured boto3 client"""
function_name: Optional[str] = None
"""The name of your lambda function"""
awslambda_tool_name: Optional[str] = None
"""If passing to an agent as a tool, the tool name"""
awslambda_tool_description: Optional[str] = None
"""If passing to an agent as a tool, the description"""
class Config:
"""Configuration for this pydantic object."""
@ -44,7 +52,15 @@ class LambdaWrapper(BaseModel):
return values
def run(self, query: str) -> str:
"""Invoke Lambda function and parse result."""
"""
Invokes the lambda function and returns the
result.
Args:
query: an input to passed to the lambda
function as the ``body`` of a JSON
object.
""" # noqa: E501
res = self.lambda_client.invoke(
FunctionName=self.function_name,
InvocationType="RequestResponse",

View File

@ -11,10 +11,61 @@ if TYPE_CHECKING:
import pexpect
class BashProcess:
"""
Wrapper class for starting subprocesses.
Uses the python built-in subprocesses.run()
Persistent processes are **not** available
on Windows systems, as pexpect makes use of
Unix pseudoterminals (ptys). MacOS and Linux
are okay.
Example:
.. code-block:: python
from langchain.utilities.bash import BashProcess
bash = BashProcess(
strip_newlines = False,
return_err_output = False,
persistent = False
)
bash.run('echo \'hello world\'')
"""
strip_newlines: bool = False
"""Whether or not to run .strip() on the output"""
return_err_output: bool = False
"""Whether or not to return the output of a failed
command, or just the error message and stacktrace"""
persistent: bool = False
"""Whether or not to spawn a persistent session
NOTE: Unavailable for Windows environments"""
def __init__(
self,
strip_newlines: bool = False,
return_err_output: bool = False,
persistent: bool = False,
):
"""
Initializes with default settings
"""
self.strip_newlines = strip_newlines
self.return_err_output = return_err_output
self.prompt = ""
self.process = None
if persistent:
self.prompt = str(uuid4())
self.process = self._initialize_persistent_process(self, self.prompt)
@staticmethod
def _lazy_import_pexpect() -> pexpect:
"""Import pexpect only when needed."""
if platform.system() == "Windows":
raise ValueError("Persistent bash processes are not yet supported on Windows.")
raise ValueError(
"Persistent bash processes are not yet supported on Windows."
)
try:
import pexpect
@ -25,30 +76,19 @@ def _lazy_import_pexpect() -> pexpect:
)
return pexpect
class BashProcess:
"""Executes bash commands and returns the output."""
def __init__(
self,
strip_newlines: bool = False,
return_err_output: bool = False,
persistent: bool = False,
):
"""Initialize with stripping newlines."""
self.strip_newlines = strip_newlines
self.return_err_output = return_err_output
self.prompt = ""
self.process = None
if persistent:
self.prompt = str(uuid4())
self.process = self._initialize_persistent_process(self.prompt)
@staticmethod
def _initialize_persistent_process(prompt: str) -> pexpect.spawn:
def _initialize_persistent_process(self: BashProcess, prompt: str) -> pexpect.spawn:
# Start bash in a clean environment
# Doesn't work on windows
pexpect = _lazy_import_pexpect()
"""
Initializes a persistent bash setting in a
clean environment.
NOTE: Unavailable on Windows
Args:
Prompt(str): the bash command to execute
""" # noqa: E501
pexpect = self._lazy_import_pexpect()
process = pexpect.spawn(
"env", ["-i", "bash", "--norc", "--noprofile"], encoding="utf-8"
)
@ -59,7 +99,14 @@ class BashProcess:
return process
def run(self, commands: Union[str, List[str]]) -> str:
"""Run commands and return final output."""
"""
Run commands in either an existing persistent
subprocess or on in a new subprocess environment.
Args:
commands(List[str]): a list of commands to
execute in the session
""" # noqa: E501
if isinstance(commands, str):
commands = [commands]
commands = ";".join(commands)
@ -71,7 +118,13 @@ class BashProcess:
return self._run(commands)
def _run(self, command: str) -> str:
"""Run commands and return final output."""
"""
Runs a command in a subprocess and returns
the output.
Args:
command: The command to run
""" # noqa: E501
try:
output = subprocess.run(
command,
@ -89,14 +142,26 @@ class BashProcess:
return output
def process_output(self, output: str, command: str) -> str:
# Remove the command from the output using a regular expression
"""
Uses regex to remove the command from the output
Args:
output: a process' output string
command: the executed command
""" # noqa: E501
pattern = re.escape(command) + r"\s*\n"
output = re.sub(pattern, "", output, count=1)
return output.strip()
def _run_persistent(self, command: str) -> str:
"""Run commands and return final output."""
pexpect = _lazy_import_pexpect()
"""
Runs commands in a persistent environment
and returns the output.
Args:
command: the command to execute
""" # noqa: E501
pexpect = self._lazy_import_pexpect()
if self.process is None:
raise ValueError("Process not initialized")
self.process.sendline(command)