docstrings `experimental` (#7969)

- added/changed docstring for `experimental` - added/changed docstrings for different artifacts - @baskaryan
1 year ago · c580c81cca
parent 3eb4112a1f
commit c580c81cca
31 changed files with 133 additions and 30 deletions
--- a/libs/experimental/langchain_experimental/autonomous_agents/autogpt/output_parser.py
+++ b/libs/experimental/langchain_experimental/autonomous_agents/autogpt/output_parser.py
@ -7,14 +7,20 @@ from langchain.schema import BaseOutputParser


 class AutoGPTAction(NamedTuple):
+    """Action for AutoGPT."""
+
    name: str
+    """Name of the action."""
    args: Dict
+    """Arguments for the action."""


 class BaseAutoGPTOutputParser(BaseOutputParser):
+    """Base class for AutoGPT output parsers."""
+
    @abstractmethod
    def parse(self, text: str) -> AutoGPTAction:
-        """Return AutoGPTAction"""
+        """Parse text and return AutoGPTAction"""


 def preprocess_json_input(input_str: str) -> str:
@ -36,6 +42,8 @@ def preprocess_json_input(input_str: str) -> str:


 class AutoGPTOutputParser(BaseAutoGPTOutputParser):
+    """Output parser for AutoGPT."""
+
    def parse(self, text: str) -> AutoGPTAction:
        try:
            parsed = json.loads(text, strict=False)
--- a/libs/experimental/langchain_experimental/autonomous_agents/autogpt/prompt_generator.py
+++ b/libs/experimental/langchain_experimental/autonomous_agents/autogpt/prompt_generator.py
@ -123,7 +123,7 @@ class PromptGenerator:


 def get_prompt(tools: List[BaseTool]) -> str:
-    """This function generates a prompt string.
+    """Generate a prompt string.

    It includes various constraints, commands, resources, and performance evaluations.

--- a/libs/experimental/langchain_experimental/cpal/constants.py
+++ b/libs/experimental/langchain_experimental/cpal/constants.py
@ -2,6 +2,8 @@ from enum import Enum


 class Constant(Enum):
+    """Enum for constants used in the CPAL."""
+
    narrative_input = "narrative_input"
    chain_answer = "chain_answer"  # natural language answer
    chain_data = "chain_data"  # pydantic instance
--- a/libs/langchain/langchain/callbacks/openai_info.py
+++ b/libs/langchain/langchain/callbacks/openai_info.py
@ -55,6 +55,7 @@ def standardize_model_name(
 ) -> str:
    """
    Standardize the model name to a format that can be used in the OpenAI API.
+
    Args:
        model_name: Model name to standardize.
        is_completion: Whether the model is used for completion or not.
--- a/libs/langchain/langchain/chains/query_constructor/parser.py
+++ b/libs/langchain/langchain/chains/query_constructor/parser.py
@ -53,9 +53,7 @@ GRAMMAR = """

@v_args(inline=True)
 class QueryTransformer(Transformer):
-    """Transforms a query string into an IR representation
-    (intermediate representation).
-    """
+    """Transforms a query string into an intermediate representation."""

    def __init__(
        self,
--- a/libs/langchain/langchain/chat_models/base.py
+++ b/libs/langchain/langchain/chat_models/base.py
@ -33,11 +33,16 @@ def _get_verbosity() -> bool:


 class BaseChatModel(BaseLanguageModel, ABC):
+    """Base class for chat models."""
+
    cache: Optional[bool] = None
+    """Whether to cache the response."""
    verbose: bool = Field(default_factory=_get_verbosity)
    """Whether to print out response text."""
    callbacks: Callbacks = Field(default=None, exclude=True)
+    """Callbacks to add to the run trace."""
    callback_manager: Optional[BaseCallbackManager] = Field(default=None, exclude=True)
+    """Callback manager to add to the run trace."""
    tags: Optional[List[str]] = Field(default=None, exclude=True)
    """Tags to add to the run trace."""
    metadata: Optional[Dict[str, Any]] = Field(default=None, exclude=True)
@ -441,6 +446,8 @@ class BaseChatModel(BaseLanguageModel, ABC):


 class SimpleChatModel(BaseChatModel):
+    """Simple Chat Model."""
+
    def _generate(
        self,
        messages: List[BaseMessage],
--- a/libs/langchain/langchain/document_loaders/notiondb.py
+++ b/libs/langchain/langchain/document_loaders/notiondb.py
@ -15,6 +15,7 @@ BLOCK_URL = NOTION_BASE_URL + "/blocks/{block_id}/children"

 class NotionDBLoader(BaseLoader):
    """Notion DB Loader.
+
    Reads content from pages within a Notion Database.
    Args:
        integration_token (str): Notion integration token.
--- a/libs/langchain/langchain/document_loaders/parsers/grobid.py
+++ b/libs/langchain/langchain/document_loaders/parsers/grobid.py
@ -8,6 +8,8 @@ from langchain.document_loaders.blob_loaders import Blob


 class ServerUnavailableException(Exception):
+    """Exception raised when the GROBID server is unavailable."""
+
    pass


--- a/libs/langchain/langchain/document_loaders/rocksetdb.py
+++ b/libs/langchain/langchain/document_loaders/rocksetdb.py
@ -5,10 +5,13 @@ from langchain.schema import Document


 def default_joiner(docs: List[Tuple[str, Any]]) -> str:
+    """Default joiner for content columns."""
    return "\n".join([doc[1] for doc in docs])


 class ColumnNotFoundError(Exception):
+    """Column not found error."""
+
    def __init__(self, missing_key: str, query: str):
        super().__init__(f'Column "{missing_key}" not selected in query:\n{query}')

--- a/libs/langchain/langchain/evaluation/agents/trajectory_eval_chain.py
+++ b/libs/langchain/langchain/evaluation/agents/trajectory_eval_chain.py
@ -36,6 +36,8 @@ class TrajectoryEval(NamedTuple):


 class TrajectoryOutputParser(BaseOutputParser):
+    """Trajectory output parser."""
+
    @property
    def _type(self) -> str:
        return "agent_trajectory"
--- a/libs/langchain/langchain/experimental/autonomous_agents/autogpt/memory.py
+++ b/libs/langchain/langchain/experimental/autonomous_agents/autogpt/memory.py
@ -7,6 +7,8 @@ from langchain.vectorstores.base import VectorStoreRetriever


 class AutoGPTMemory(BaseChatMemory):
+    """Memory for AutoGPT."""
+
    retriever: VectorStoreRetriever = Field(exclude=True)
    """VectorStoreRetriever object to connect to."""

--- a/libs/langchain/langchain/experimental/autonomous_agents/baby_agi/task_creation.py
+++ b/libs/langchain/langchain/experimental/autonomous_agents/baby_agi/task_creation.py
@ -3,7 +3,7 @@ from langchain.schema.language_model import BaseLanguageModel


 class TaskCreationChain(LLMChain):
-    """Chain to generates tasks."""
+    """Chain generating tasks."""

    @classmethod
    def from_llm(cls, llm: BaseLanguageModel, verbose: bool = True) -> LLMChain:
--- a/libs/langchain/langchain/experimental/generative_agents/generative_agent.py
+++ b/libs/langchain/langchain/experimental/generative_agents/generative_agent.py
@ -11,11 +11,10 @@ from langchain.schema.language_model import BaseLanguageModel


 class GenerativeAgent(BaseModel):
-    """A character with memory and innate characteristics."""
+    """An Agent as a character with memory and innate characteristics."""

    name: str
    """The character's name."""
-
    age: Optional[int] = None
    """The optional age of the character."""
    traits: str = "N/A"
@ -29,13 +28,10 @@ class GenerativeAgent(BaseModel):
    verbose: bool = False
    summary: str = ""  #: :meta private:
    """Stateful self-summary generated via reflection on the character's memory."""
-
    summary_refresh_seconds: int = 3600  #: :meta private:
    """How frequently to re-generate the summary."""
-
    last_refreshed: datetime = Field(default_factory=datetime.now)  # : :meta private:
    """The last time the character's summary was regenerated."""
-
    daily_summaries: List[str] = Field(default_factory=list)  # : :meta private:
    """Summary of the events in the plan that the agent took."""

--- a/libs/langchain/langchain/experimental/generative_agents/memory.py
+++ b/libs/langchain/langchain/experimental/generative_agents/memory.py
@ -14,24 +14,21 @@ logger = logging.getLogger(__name__)


 class GenerativeAgentMemory(BaseMemory):
+    """Memory for the generative agent."""
+
    llm: BaseLanguageModel
    """The core language model."""
-
    memory_retriever: TimeWeightedVectorStoreRetriever
    """The retriever to fetch related memories."""
    verbose: bool = False
-
    reflection_threshold: Optional[float] = None
    """When aggregate_importance exceeds reflection_threshold, stop to reflect."""
-
    current_plan: List[str] = []
    """The current plan of the agent."""
-
    # A weight of 0.15 makes this less important than it
    # would be otherwise, relative to salience and time
    importance_weight: float = 0.15
    """How much weight to assign the memory importance."""
-
    aggregate_importance: float = 0.0  # : :meta private:
    """Track the sum of the 'importance' of recent memories.

--- a/libs/langchain/langchain/experimental/llms/jsonformer_decoder.py
+++ b/libs/langchain/langchain/experimental/llms/jsonformer_decoder.py
@ -18,7 +18,7 @@ def import_jsonformer() -> jsonformer:
    try:
        import jsonformer
    except ImportError:
-        raise ValueError(
+        raise ImportError(
            "Could not import jsonformer python package. "
            "Please install it with `pip install jsonformer`."
        )
@ -26,6 +26,11 @@ def import_jsonformer() -> jsonformer:


 class JsonFormer(HuggingFacePipeline):
+    """Jsonformer wrapped LLM using HuggingFace Pipeline API.
+
+    This pipeline is experimental and not yet stable.
+    """
+
    json_schema: dict = Field(..., description="The JSON Schema to complete.")
    max_new_tokens: int = Field(
        default=200, description="Maximum number of new tokens to generate."
--- a/libs/langchain/langchain/experimental/llms/rellm_decoder.py
+++ b/libs/langchain/langchain/experimental/llms/rellm_decoder.py
@ -24,7 +24,7 @@ def import_rellm() -> rellm:
    try:
        import rellm
    except ImportError:
-        raise ValueError(
+        raise ImportError(
            "Could not import rellm python package. "
            "Please install it with `pip install rellm`."
        )
@ -32,6 +32,8 @@ def import_rellm() -> rellm:


 class RELLM(HuggingFacePipeline):
+    """RELLM wrapped LLM using HuggingFace Pipeline API."""
+
    regex: RegexPattern = Field(..., description="The structured format to complete.")
    max_new_tokens: int = Field(
        default=200, description="Maximum number of new tokens to generate."
--- a/libs/langchain/langchain/experimental/plan_and_execute/agent_executor.py
+++ b/libs/langchain/langchain/experimental/plan_and_execute/agent_executor.py
@ -13,9 +13,14 @@ from langchain.experimental.plan_and_execute.schema import (


 class PlanAndExecute(Chain):
+    """Plan and execute a chain of steps."""
+
    planner: BasePlanner
+    """The planner to use."""
    executor: BaseExecutor
+    """The executor to use."""
    step_container: BaseStepContainer = Field(default_factory=ListStepContainer)
+    """The step container to use."""
    input_key: str = "input"
    output_key: str = "output"

--- a/libs/langchain/langchain/experimental/plan_and_execute/executors/base.py
+++ b/libs/langchain/langchain/experimental/plan_and_execute/executors/base.py
@ -9,6 +9,8 @@ from langchain.experimental.plan_and_execute.schema import StepResponse


 class BaseExecutor(BaseModel):
+    """Base executor."""
+
    @abstractmethod
    def step(
        self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any
@ -19,11 +21,14 @@ class BaseExecutor(BaseModel):
    async def astep(
        self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any
    ) -> StepResponse:
-        """Take step."""
+        """Take async step."""


 class ChainExecutor(BaseExecutor):
+    """Chain executor."""
+
    chain: Chain
+    """The chain to use."""

    def step(
        self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any
--- a/libs/langchain/langchain/experimental/plan_and_execute/planners/base.py
+++ b/libs/langchain/langchain/experimental/plan_and_execute/planners/base.py
@ -9,6 +9,8 @@ from langchain.experimental.plan_and_execute.schema import Plan, PlanOutputParse


 class BasePlanner(BaseModel):
+    """Base planner."""
+
    @abstractmethod
    def plan(self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any) -> Plan:
        """Given input, decide what to do."""
@ -17,13 +19,18 @@ class BasePlanner(BaseModel):
    async def aplan(
        self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any
    ) -> Plan:
-        """Given input, decide what to do."""
+        """Given input, asynchronously decide what to do."""


 class LLMPlanner(BasePlanner):
+    """LLM planner."""
+
    llm_chain: LLMChain
+    """The LLM chain to use."""
    output_parser: PlanOutputParser
+    """The output parser to use."""
    stop: Optional[List] = None
+    """The stop list to use."""

    def plan(self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any) -> Plan:
        """Given input, decide what to do."""
@ -33,7 +40,7 @@ class LLMPlanner(BasePlanner):
    async def aplan(
        self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any
    ) -> Plan:
-        """Given input, decide what to do."""
+        """Given input, asynchronously decide what to do."""
        llm_response = await self.llm_chain.arun(
            **inputs, stop=self.stop, callbacks=callbacks
        )
--- a/libs/langchain/langchain/experimental/plan_and_execute/planners/chat_planner.py
+++ b/libs/langchain/langchain/experimental/plan_and_execute/planners/chat_planner.py
@ -24,6 +24,8 @@ SYSTEM_PROMPT = (


 class PlanningOutputParser(PlanOutputParser):
+    """Planning output parser."""
+
    def parse(self, text: str) -> Plan:
        steps = [Step(value=v) for v in re.split("\n\s*\d+\. ", text)[1:]]
        return Plan(steps=steps)
@ -34,6 +36,7 @@ def load_chat_planner(
 ) -> LLMPlanner:
    """
    Load a chat planner.
+
    Args:
        llm: Language model.
        system_prompt: System prompt.
--- a/libs/langchain/langchain/experimental/plan_and_execute/schema.py
+++ b/libs/langchain/langchain/experimental/plan_and_execute/schema.py
@ -7,18 +7,29 @@ from langchain.schema import BaseOutputParser


 class Step(BaseModel):
+    """Step."""
+
    value: str
+    """The value."""


 class Plan(BaseModel):
+    """Plan."""
+
    steps: List[Step]
+    """The steps."""


 class StepResponse(BaseModel):
+    """Step response."""
+
    response: str
+    """The response."""


 class BaseStepContainer(BaseModel):
+    """Base step container."""
+
    @abstractmethod
    def add_step(self, step: Step, step_response: StepResponse) -> None:
        """Add step and step response to the container."""
@ -29,7 +40,10 @@ class BaseStepContainer(BaseModel):


 class ListStepContainer(BaseStepContainer):
+    """List step container."""
+
    steps: List[Tuple[Step, StepResponse]] = Field(default_factory=list)
+    """The steps."""

    def add_step(self, step: Step, step_response: StepResponse) -> None:
        self.steps.append((step, step_response))
@ -42,6 +56,8 @@ class ListStepContainer(BaseStepContainer):


 class PlanOutputParser(BaseOutputParser):
+    """Plan output parser."""
+
    @abstractmethod
    def parse(self, text: str) -> Plan:
        """Parse into a plan."""
--- a/libs/langchain/langchain/text_splitter.py
+++ b/libs/langchain/langchain/text_splitter.py
@ -259,7 +259,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):


 class CharacterTextSplitter(TextSplitter):
-    """Implementation of splitting text that looks at characters."""
+    """Splitting text that looks at characters."""

    def __init__(self, separator: str = "\n\n", **kwargs: Any) -> None:
        """Create a new TextSplitter."""
@ -290,7 +290,7 @@ class HeaderType(TypedDict):


 class MarkdownHeaderTextSplitter:
-    """Implementation of splitting markdown files based on specified headers."""
+    """Splitting markdown files based on specified headers."""

    def __init__(
        self, headers_to_split_on: List[Tuple[str, str]], return_each_line: bool = False
@ -443,7 +443,7 @@ class Tokenizer:


 def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> List[str]:
-    """Split incoming text and return chunks."""
+    """Split incoming text and return chunks using tokenizer."""
    splits: List[str] = []
    input_ids = tokenizer.encode(text)
    start_idx = 0
@ -458,7 +458,7 @@ def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> List[str]:


 class TokenTextSplitter(TextSplitter):
-    """Implementation of splitting text that looks at tokens."""
+    """Splitting text to tokens using model tokenizer."""

    def __init__(
        self,
@ -506,7 +506,7 @@ class TokenTextSplitter(TextSplitter):


 class SentenceTransformersTokenTextSplitter(TextSplitter):
-    """Implementation of splitting text that looks at tokens."""
+    """Splitting text to tokens using sentence model tokenizer."""

    def __init__(
        self,
@ -599,7 +599,7 @@ class Language(str, Enum):


 class RecursiveCharacterTextSplitter(TextSplitter):
-    """Implementation of splitting text that looks at characters.
+    """Splitting text by recursively look at characters.

    Recursively tries to split by different characters to find one
    that works.
@ -1004,7 +1004,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):


 class NLTKTextSplitter(TextSplitter):
-    """Implementation of splitting text that looks at sentences using NLTK."""
+    """Splitting text using NLTK package."""

    def __init__(self, separator: str = "\n\n", **kwargs: Any) -> None:
        """Initialize the NLTK splitter."""
@ -1027,7 +1027,7 @@ class NLTKTextSplitter(TextSplitter):


 class SpacyTextSplitter(TextSplitter):
-    """Implementation of splitting text that looks at sentences using Spacy.
+    """Splitting text using Spacy package.


    Per default, Spacy's `en_core_web_sm` model is used. For a faster, but
--- a/libs/langchain/langchain/utilities/brave_search.py
+++ b/libs/langchain/langchain/utilities/brave_search.py
@ -8,9 +8,14 @@ from langchain.schema import Document


 class BraveSearchWrapper(BaseModel):
+    """Wrapper around the Brave search engine."""
+
    api_key: str
+    """The API key to use for the Brave search engine."""
    search_kwargs: dict = Field(default_factory=dict)
+    """Additional keyword arguments to pass to the search request."""
    base_url = "https://api.search.brave.com/res/v1/web/search"
+    """The base URL for the Brave search engine."""

    def run(self, query: str) -> str:
        """Query the Brave search engine and return the results as a JSON string.
--- a/libs/langchain/langchain/utilities/dataforseo_api_search.py
+++ b/libs/langchain/langchain/utilities/dataforseo_api_search.py
@ -10,6 +10,8 @@ from langchain.utils import get_from_dict_or_env


 class DataForSeoAPIWrapper(BaseModel):
+    """Wrapper around the DataForSeo API."""
+
    class Config:
        """Configuration for this pydantic object."""

@ -25,13 +27,21 @@ class DataForSeoAPIWrapper(BaseModel):
            "se_type": "organic",
        }
    )
+    """Default parameters to use for the DataForSEO SERP API."""
    params: dict = Field(default={})
+    """Additional parameters to pass to the DataForSEO SERP API."""
    api_login: Optional[str] = None
+    """The API login to use for the DataForSEO SERP API."""
    api_password: Optional[str] = None
+    """The API password to use for the DataForSEO SERP API."""
    json_result_types: Optional[list] = None
+    """The JSON result types."""
    json_result_fields: Optional[list] = None
+    """The JSON result fields."""
    top_count: Optional[int] = None
+    """The number of top results to return."""
    aiosession: Optional[aiohttp.ClientSession] = None
+    """The aiohttp session to use for the DataForSEO SERP API."""

    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
--- a/libs/langchain/langchain/utilities/python.py
+++ b/libs/langchain/langchain/utilities/python.py
@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)

@functools.lru_cache(maxsize=None)
 def warn_once() -> None:
-    # Warn that the PythonREPL
+    """Warn once about the dangers of PythonREPL."""
    logger.warning("Python REPL can execute arbitrary code. Use with caution.")


--- a/libs/langchain/langchain/vectorstores/azuresearch.py
+++ b/libs/langchain/langchain/vectorstores/azuresearch.py
@ -166,6 +166,8 @@ def _get_search_client(


 class AzureSearch(VectorStore):
+    """Azure Cognitive Search vector store."""
+
    def __init__(
        self,
        azure_search_endpoint: str,
@ -481,9 +483,15 @@ class AzureSearch(VectorStore):


 class AzureSearchVectorStoreRetriever(BaseRetriever):
+    """Retriever that uses Azure Search to find similar documents."""
+
    vectorstore: AzureSearch
+    """Azure Search instance used to find similar documents."""
    search_type: str = "hybrid"
+    """Type of search to perform. Options are "similarity", "hybrid", 
+    "semantic_hybrid"."""
    k: int = 4
+    """Number of documents to return."""

    class Config:
        """Configuration for this pydantic object."""
--- a/libs/langchain/langchain/vectorstores/base.py
+++ b/libs/langchain/langchain/vectorstores/base.py
@ -460,9 +460,14 @@ class VectorStore(ABC):


 class VectorStoreRetriever(BaseRetriever):
+    """Retriever class for VectorStore."""
+
    vectorstore: VectorStore
+    """VectorStore to use for retrieval."""
    search_type: str = "similarity"
+    """Type of search to perform. Defaults to "similarity"."""
    search_kwargs: dict = Field(default_factory=dict)
+    """Keyword arguments to pass to the search function."""
    allowed_search_types: ClassVar[Collection[str]] = (
        "similarity",
        "similarity_score_threshold",
--- a/libs/langchain/langchain/vectorstores/pgembedding.py
+++ b/libs/langchain/langchain/vectorstores/pgembedding.py
@ -94,6 +94,7 @@ class QueryResult:
 class PGEmbedding(VectorStore):
    """
    VectorStore implementation using Postgres and the pg_embedding extension.
+
    pg_embedding uses sequential scan by default. but you can create a HNSW index
    using the create_hnsw_index method.
    - `connection_string` is a postgres connection string.
--- a/libs/langchain/langchain/vectorstores/redis.py
+++ b/libs/langchain/langchain/vectorstores/redis.py
@ -612,10 +612,16 @@ class Redis(VectorStore):


 class RedisVectorStoreRetriever(VectorStoreRetriever):
+    """Retriever for Redis VectorStore."""
+
    vectorstore: Redis
+    """Redis VectorStore."""
    search_type: str = "similarity"
+    """Type of search to perform. Can be either 'similarity' or 'similarity_limit'."""
    k: int = 4
+    """Number of documents to return."""
    score_threshold: float = 0.4
+    """Score threshold for similarity_limit search."""

    class Config:
        """Configuration for this pydantic object."""
--- a/libs/langchain/langchain/vectorstores/utils.py
+++ b/libs/langchain/langchain/vectorstores/utils.py
@ -9,6 +9,9 @@ from langchain.utils.math import cosine_similarity


 class DistanceStrategy(str, Enum):
+    """Enumerator of the Distance strategies for calculating distances
+    between vectors."""
+
    EUCLIDEAN_DISTANCE = "EUCLIDEAN_DISTANCE"
    MAX_INNER_PRODUCT = "MAX_INNER_PRODUCT"
    DOT_PRODUCT = "DOT_PRODUCT"
--- a/libs/langchain/langchain/vectorstores/vectara.py
+++ b/libs/langchain/langchain/vectorstores/vectara.py
@ -412,7 +412,10 @@ class Vectara(VectorStore):


 class VectaraRetriever(VectorStoreRetriever):
+    """Retriever class for Vectara."""
+
    vectorstore: Vectara
+    """Vectara vectorstore."""
    search_kwargs: dict = Field(
        default_factory=lambda: {
            "lambda_val": 0.025,