From 3f6bf852ea86da538bed8457c52d8fd0941e9a16 Mon Sep 17 00:00:00 2001 From: Leonid Ganeline Date: Fri, 23 Feb 2024 18:24:16 -0800 Subject: [PATCH] experimental: docstrings update (#18048) Added missed docstrings. Formatted docsctrings to the consistent format. --- .../autonomous_agents/autogpt/agent.py | 2 +- .../autogpt/prompt_generator.py | 2 +- .../hugginggpt/hugginggpt.py | 2 + .../hugginggpt/repsonse_generator.py | 4 ++ .../hugginggpt/task_executor.py | 4 +- .../hugginggpt/task_planner.py | 25 +++++++++- .../chat_models/llm_wrapper.py | 8 +++ .../amazon_comprehend_moderation.py | 7 ++- .../comprehend_moderation/base_moderation.py | 4 ++ .../base_moderation_callbacks.py | 2 + .../base_moderation_config.py | 8 +++ .../base_moderation_exceptions.py | 2 +- .../comprehend_moderation/pii.py | 2 + .../comprehend_moderation/prompt_safety.py | 2 + .../comprehend_moderation/toxicity.py | 2 + .../langchain_experimental/cpal/base.py | 2 +- .../langchain_experimental/cpal/models.py | 23 ++++++--- .../data_anonymizer/base.py | 7 +-- .../data_anonymizer/deanonymizer_mapping.py | 12 +++-- .../deanonymizer_matching_strategies.py | 22 ++++----- .../data_anonymizer/faker_presidio_mapping.py | 2 + .../data_anonymizer/presidio.py | 9 ++++ .../fallacy_removal/base.py | 7 +-- .../fallacy_removal/models.py | 2 +- .../generative_agents/generative_agent.py | 4 +- .../graph_transformers/diffbot.py | 10 ++-- .../langchain_experimental/llm_bash/bash.py | 4 +- .../langchain_experimental/llm_bash/prompt.py | 2 + .../llm_symbolic_math/base.py | 4 ++ .../llms/anthropic_functions.py | 4 ++ .../llms/jsonformer_decoder.py | 2 +- .../langchain_experimental/llms/llamaapi.py | 2 + .../llms/lmformatenforcer_decoder.py | 2 +- .../llms/ollama_functions.py | 2 + .../llms/rellm_decoder.py | 2 +- .../open_clip/open_clip.py | 2 + .../langchain_experimental/pal_chain/base.py | 4 +- .../plan_and_execute/schema.py | 2 +- .../hugging_face_identifier.py | 5 +- .../langchain_experimental/prompts/load.py | 3 +- .../recommenders/amazon_personalize.py | 5 +- .../recommenders/amazon_personalize_chain.py | 12 ++--- .../retrievers/vector_sql_database.py | 2 +- .../langchain_experimental/rl_chain/base.py | 49 +++++++++++++++---- .../rl_chain/metrics.py | 4 ++ .../rl_chain/model_repository.py | 2 + .../rl_chain/pick_best_chain.py | 14 ++++-- .../rl_chain/vw_logger.py | 2 + .../smart_llm/__init__.py | 21 +++++++- .../langchain_experimental/smart_llm/base.py | 5 +- .../langchain_experimental/sql/vector_sql.py | 9 ++-- .../synthetic_data/__init__.py | 4 +- .../tabular_synthetic_data/base.py | 2 +- .../langchain_experimental/text_splitter.py | 22 ++++++++- .../tools/python/tool.py | 7 ++- .../langchain_experimental/tot/__init__.py | 9 ++++ .../langchain_experimental/tot/base.py | 13 +---- .../langchain_experimental/tot/memory.py | 4 +- .../langchain_experimental/tot/prompts.py | 8 ++- .../langchain_experimental/tot/thought.py | 4 ++ .../tot/thought_generation.py | 4 +- 61 files changed, 316 insertions(+), 102 deletions(-) diff --git a/libs/experimental/langchain_experimental/autonomous_agents/autogpt/agent.py b/libs/experimental/langchain_experimental/autonomous_agents/autogpt/agent.py index 5fd94b0c91..84bfb4102d 100644 --- a/libs/experimental/langchain_experimental/autonomous_agents/autogpt/agent.py +++ b/libs/experimental/langchain_experimental/autonomous_agents/autogpt/agent.py @@ -26,7 +26,7 @@ from langchain_experimental.pydantic_v1 import ValidationError class AutoGPT: - """Agent class for interacting with Auto-GPT.""" + """Agent for interacting with AutoGPT.""" def __init__( self, diff --git a/libs/experimental/langchain_experimental/autonomous_agents/autogpt/prompt_generator.py b/libs/experimental/langchain_experimental/autonomous_agents/autogpt/prompt_generator.py index 81e93a7300..09431670d6 100644 --- a/libs/experimental/langchain_experimental/autonomous_agents/autogpt/prompt_generator.py +++ b/libs/experimental/langchain_experimental/autonomous_agents/autogpt/prompt_generator.py @@ -7,7 +7,7 @@ FINISH_NAME = "finish" class PromptGenerator: - """A class for generating custom prompt strings. + """Generator of custom prompt strings. Does this based on constraints, commands, resources, and performance evaluations. """ diff --git a/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/hugginggpt.py b/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/hugginggpt.py index 42c00c3dd1..35f1f1a3a8 100644 --- a/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/hugginggpt.py +++ b/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/hugginggpt.py @@ -15,6 +15,8 @@ from langchain_experimental.autonomous_agents.hugginggpt.task_planner import ( class HuggingGPT: + """Agent for interacting with HuggingGPT.""" + def __init__(self, llm: BaseLanguageModel, tools: List[BaseTool]): self.llm = llm self.tools = tools diff --git a/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/repsonse_generator.py b/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/repsonse_generator.py index 1df95270cf..e47419e808 100644 --- a/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/repsonse_generator.py +++ b/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/repsonse_generator.py @@ -25,6 +25,8 @@ class ResponseGenerationChain(LLMChain): class ResponseGenerator: + """Generates a response based on the input.""" + def __init__(self, llm_chain: LLMChain, stop: Optional[List] = None): self.llm_chain = llm_chain self.stop = stop @@ -36,6 +38,8 @@ class ResponseGenerator: def load_response_generator(llm: BaseLanguageModel) -> ResponseGenerator: + """Load the ResponseGenerator.""" + llm_chain = ResponseGenerationChain.from_llm(llm) return ResponseGenerator( llm_chain=llm_chain, diff --git a/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/task_executor.py b/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/task_executor.py index c6a9be93a5..62a4c95da9 100644 --- a/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/task_executor.py +++ b/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/task_executor.py @@ -9,6 +9,8 @@ from langchain_experimental.autonomous_agents.hugginggpt.task_planner import Pla class Task: + """Task to be executed.""" + def __init__(self, task: str, id: int, dep: List[int], args: Dict, tool: BaseTool): self.task = task self.id = id @@ -74,7 +76,7 @@ class Task: class TaskExecutor: - """Load tools to execute tasks.""" + """Load tools and execute tasks.""" def __init__(self, plan: Plan): self.plan = plan diff --git a/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/task_planner.py b/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/task_planner.py index 34aaab55ab..afda9ab035 100644 --- a/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/task_planner.py +++ b/libs/experimental/langchain_experimental/autonomous_agents/hugginggpt/task_planner.py @@ -76,6 +76,8 @@ class TaskPlaningChain(LLMChain): class Step: + """A step in the plan.""" + def __init__( self, task: str, id: int, dep: List[int], args: Dict[str, str], tool: BaseTool ): @@ -87,6 +89,8 @@ class Step: class Plan: + """A plan to execute.""" + def __init__(self, steps: List[Step]): self.steps = steps @@ -98,6 +102,8 @@ class Plan: class BasePlanner(BaseModel): + """Base class for a planner.""" + @abstractmethod def plan(self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any) -> Plan: """Given input, decide what to do.""" @@ -106,11 +112,22 @@ class BasePlanner(BaseModel): async def aplan( self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any ) -> Plan: - """Given input, decide what to do.""" + """Asynchronous Given input, decide what to do.""" class PlanningOutputParser(BaseModel): + """Parses the output of the planning stage.""" + def parse(self, text: str, hf_tools: List[BaseTool]) -> Plan: + """Parse the output of the planning stage. + + Args: + text: The output of the planning stage. + hf_tools: The tools available. + + Returns: + The plan. + """ steps = [] for v in json.loads(re.findall(r"\[.*\]", text)[0]): choose_tool = None @@ -124,6 +141,8 @@ class PlanningOutputParser(BaseModel): class TaskPlanner(BasePlanner): + """Planner for tasks.""" + llm_chain: LLMChain output_parser: PlanningOutputParser stop: Optional[List] = None @@ -139,7 +158,7 @@ class TaskPlanner(BasePlanner): async def aplan( self, inputs: dict, callbacks: Callbacks = None, **kwargs: Any ) -> Plan: - """Given input, decided what to do.""" + """Asynchronous Given input, decided what to do.""" inputs["hf_tools"] = [ f"{tool.name}: {tool.description}" for tool in inputs["hf_tools"] ] @@ -150,5 +169,7 @@ class TaskPlanner(BasePlanner): def load_chat_planner(llm: BaseLanguageModel) -> TaskPlanner: + """Load the chat planner.""" + llm_chain = TaskPlaningChain.from_llm(llm) return TaskPlanner(llm_chain=llm_chain, output_parser=PlanningOutputParser()) diff --git a/libs/experimental/langchain_experimental/chat_models/llm_wrapper.py b/libs/experimental/langchain_experimental/chat_models/llm_wrapper.py index d739cec69c..06aee3d9c5 100644 --- a/libs/experimental/langchain_experimental/chat_models/llm_wrapper.py +++ b/libs/experimental/langchain_experimental/chat_models/llm_wrapper.py @@ -24,6 +24,8 @@ If a question does not make any sense, or is not factually coherent, explain why class ChatWrapper(BaseChatModel): + """Wrapper for chat LLMs.""" + llm: LLM sys_beg: str sys_end: str @@ -130,6 +132,8 @@ class ChatWrapper(BaseChatModel): class Llama2Chat(ChatWrapper): + """Wrapper for Llama-2-chat model.""" + @property def _llm_type(self) -> str: return "llama-2-chat" @@ -145,6 +149,8 @@ class Llama2Chat(ChatWrapper): class Orca(ChatWrapper): + """Wrapper for Orca-style models.""" + @property def _llm_type(self) -> str: return "orca-style" @@ -158,6 +164,8 @@ class Orca(ChatWrapper): class Vicuna(ChatWrapper): + """Wrapper for Vicuna-style models.""" + @property def _llm_type(self) -> str: return "vicuna-style" diff --git a/libs/experimental/langchain_experimental/comprehend_moderation/amazon_comprehend_moderation.py b/libs/experimental/langchain_experimental/comprehend_moderation/amazon_comprehend_moderation.py index 100e061556..3298f2843f 100644 --- a/libs/experimental/langchain_experimental/comprehend_moderation/amazon_comprehend_moderation.py +++ b/libs/experimental/langchain_experimental/comprehend_moderation/amazon_comprehend_moderation.py @@ -14,7 +14,10 @@ from langchain_experimental.pydantic_v1 import root_validator class AmazonComprehendModerationChain(Chain): - """A subclass of Chain, designed to apply moderation to LLMs.""" + """Moderation Chain, based on `Amazon Comprehend` service. + + See more at https://aws.amazon.com/comprehend/ + """ output_key: str = "output" #: :meta private: """Key used to fetch/store the output in data containers. Defaults to `output`""" @@ -54,7 +57,7 @@ class AmazonComprehendModerationChain(Chain): @root_validator(pre=True) def create_client(cls, values: Dict[str, Any]) -> Dict[str, Any]: """ - Creates an Amazon Comprehend client + Creates an Amazon Comprehend client. Args: values (Dict[str, Any]): A dictionary containing configuration values. diff --git a/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation.py b/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation.py index 165962d473..cbeb6d52f2 100644 --- a/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation.py +++ b/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation.py @@ -13,6 +13,8 @@ from langchain_experimental.comprehend_moderation.toxicity import ComprehendToxi class BaseModeration: + """Base class for moderation.""" + def __init__( self, client: Any, @@ -109,6 +111,8 @@ class BaseModeration: self.run_manager.on_text(message) def moderate(self, prompt: Any) -> str: + """Moderate the input prompt.""" + from langchain_experimental.comprehend_moderation.base_moderation_config import ( # noqa: E501 ModerationPiiConfig, ModerationPromptSafetyConfig, diff --git a/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_callbacks.py b/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_callbacks.py index 260b3026b8..dd39c14608 100644 --- a/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_callbacks.py +++ b/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_callbacks.py @@ -2,6 +2,8 @@ from typing import Any, Callable, Dict class BaseModerationCallbackHandler: + """Base class for moderation callback handlers.""" + def __init__(self) -> None: if ( self._is_method_unchanged( diff --git a/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_config.py b/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_config.py index f20b5b289a..eaa371d99f 100644 --- a/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_config.py +++ b/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_config.py @@ -4,6 +4,8 @@ from pydantic import BaseModel class ModerationPiiConfig(BaseModel): + """Configuration for PII moderation filter.""" + threshold: float = 0.5 """Threshold for PII confidence score, defaults to 0.5 i.e. 50%""" @@ -21,6 +23,8 @@ class ModerationPiiConfig(BaseModel): class ModerationToxicityConfig(BaseModel): + """Configuration for Toxicity moderation filter.""" + threshold: float = 0.5 """Threshold for Toxic label confidence score, defaults to 0.5 i.e. 50%""" @@ -29,6 +33,8 @@ class ModerationToxicityConfig(BaseModel): class ModerationPromptSafetyConfig(BaseModel): + """Configuration for Prompt Safety moderation filter.""" + threshold: float = 0.5 """ Threshold for Prompt Safety classification @@ -37,6 +43,8 @@ class ModerationPromptSafetyConfig(BaseModel): class BaseModerationConfig(BaseModel): + """Base configuration settings for moderation.""" + filters: List[ Union[ ModerationPiiConfig, ModerationToxicityConfig, ModerationPromptSafetyConfig diff --git a/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_exceptions.py b/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_exceptions.py index c3a4e6f72f..52c08f6bd0 100644 --- a/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_exceptions.py +++ b/libs/experimental/langchain_experimental/comprehend_moderation/base_moderation_exceptions.py @@ -27,7 +27,7 @@ class ModerationToxicityError(Exception): class ModerationPromptSafetyError(Exception): - """Exception raised if Intention entities are detected. + """Exception raised if Unsafe prompts are detected. Attributes: message -- explanation of the error diff --git a/libs/experimental/langchain_experimental/comprehend_moderation/pii.py b/libs/experimental/langchain_experimental/comprehend_moderation/pii.py index 0d3e07ae64..88e29ee115 100644 --- a/libs/experimental/langchain_experimental/comprehend_moderation/pii.py +++ b/libs/experimental/langchain_experimental/comprehend_moderation/pii.py @@ -7,6 +7,8 @@ from langchain_experimental.comprehend_moderation.base_moderation_exceptions imp class ComprehendPII: + """Class to handle Personally Identifiable Information (PII) moderation.""" + def __init__( self, client: Any, diff --git a/libs/experimental/langchain_experimental/comprehend_moderation/prompt_safety.py b/libs/experimental/langchain_experimental/comprehend_moderation/prompt_safety.py index 77eca1a1af..3e1f764fbf 100644 --- a/libs/experimental/langchain_experimental/comprehend_moderation/prompt_safety.py +++ b/libs/experimental/langchain_experimental/comprehend_moderation/prompt_safety.py @@ -7,6 +7,8 @@ from langchain_experimental.comprehend_moderation.base_moderation_exceptions imp class ComprehendPromptSafety: + """Class to handle prompt safety moderation.""" + def __init__( self, client: Any, diff --git a/libs/experimental/langchain_experimental/comprehend_moderation/toxicity.py b/libs/experimental/langchain_experimental/comprehend_moderation/toxicity.py index c616e506e7..2e7af07b34 100644 --- a/libs/experimental/langchain_experimental/comprehend_moderation/toxicity.py +++ b/libs/experimental/langchain_experimental/comprehend_moderation/toxicity.py @@ -8,6 +8,8 @@ from langchain_experimental.comprehend_moderation.base_moderation_exceptions imp class ComprehendToxicity: + """Class to handle toxicity moderation.""" + def __init__( self, client: Any, diff --git a/libs/experimental/langchain_experimental/cpal/base.py b/libs/experimental/langchain_experimental/cpal/base.py index 4ee817f357..b72c9c5cd3 100644 --- a/libs/experimental/langchain_experimental/cpal/base.py +++ b/libs/experimental/langchain_experimental/cpal/base.py @@ -105,7 +105,7 @@ class _BaseStoryElementChain(Chain): class NarrativeChain(_BaseStoryElementChain): - """Decompose the narrative into its story elements + """Decompose the narrative into its story elements. - causal model - query diff --git a/libs/experimental/langchain_experimental/cpal/models.py b/libs/experimental/langchain_experimental/cpal/models.py index ca9b222876..6be95bc70f 100644 --- a/libs/experimental/langchain_experimental/cpal/models.py +++ b/libs/experimental/langchain_experimental/cpal/models.py @@ -17,7 +17,7 @@ from langchain_experimental.pydantic_v1 import ( class NarrativeModel(BaseModel): """ - Represent the narrative input as three story elements. + Narrative input as three story elements. """ story_outcome_question: str @@ -33,6 +33,8 @@ class NarrativeModel(BaseModel): class EntityModel(BaseModel): + """Entity in the story.""" + name: str = Field(description="entity name") code: str = Field(description="entity actions") value: float = Field(description="entity initial value") @@ -51,6 +53,8 @@ class EntityModel(BaseModel): class CausalModel(BaseModel): + """Casual data.""" + attribute: str = Field(description="name of the attribute to be calculated") entities: List[EntityModel] = Field(description="entities in the story") @@ -58,7 +62,8 @@ class CausalModel(BaseModel): class EntitySettingModel(BaseModel): - """ + """Entity initial conditions. + Initial conditions for an entity {"name": "bud", "attribute": "pet_count", "value": 12} @@ -75,7 +80,8 @@ class EntitySettingModel(BaseModel): class SystemSettingModel(BaseModel): - """ + """System initial conditions. + Initial global conditions for the system. {"parameter": "interest_rate", "value": .05} @@ -86,8 +92,7 @@ class SystemSettingModel(BaseModel): class InterventionModel(BaseModel): - """ - aka initial conditions + """Intervention data of the story aka initial conditions. >>> intervention.dict() { @@ -110,7 +115,9 @@ class InterventionModel(BaseModel): class QueryModel(BaseModel): - """translate a question about the story outcome into a programmatic expression""" + """Query data of the story. + + translate a question about the story outcome into a programmatic expression""" question: str = Field(alias=Constant.narrative_input.value) # input expression: str # output, part of llm completion @@ -119,11 +126,15 @@ class QueryModel(BaseModel): class ResultModel(BaseModel): + """Result of the story query.""" + question: str = Field(alias=Constant.narrative_input.value) # input _result_table: str = PrivateAttr() # result of the executed query class StoryModel(BaseModel): + """Story data.""" + causal_operations: Any = Field(required=True) intervention: Any = Field(required=True) query: Any = Field(required=True) diff --git a/libs/experimental/langchain_experimental/data_anonymizer/base.py b/libs/experimental/langchain_experimental/data_anonymizer/base.py index b4f75b4256..85282dd3a7 100644 --- a/libs/experimental/langchain_experimental/data_anonymizer/base.py +++ b/libs/experimental/langchain_experimental/data_anonymizer/base.py @@ -10,8 +10,8 @@ DEFAULT_DEANONYMIZER_MATCHING_STRATEGY = exact_matching_strategy class AnonymizerBase(ABC): - """ - Base abstract class for anonymizers. + """Base abstract class for anonymizers. + It is public and non-virtual because it allows wrapping the behavior for all methods in a base class. """ @@ -22,7 +22,8 @@ class AnonymizerBase(ABC): language: Optional[str] = None, allow_list: Optional[List[str]] = None, ) -> str: - """Anonymize text""" + """Anonymize text.""" + return self._anonymize(text, language, allow_list) @abstractmethod diff --git a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py index 72d2350d9d..b19d654654 100644 --- a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py +++ b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_mapping.py @@ -11,7 +11,7 @@ MappingDataType = Dict[str, Dict[str, str]] def format_duplicated_operator(operator_name: str, count: int) -> str: - """Format the operator name with the count""" + """Format the operator name with the count.""" clean_operator_name = re.sub(r"[<>]", "", operator_name) clean_operator_name = re.sub(r"_\d+$", "", clean_operator_name) @@ -24,17 +24,20 @@ def format_duplicated_operator(operator_name: str, count: int) -> str: @dataclass class DeanonymizerMapping: + """Deanonymizer mapping.""" + mapping: MappingDataType = field( default_factory=lambda: defaultdict(lambda: defaultdict(str)) ) @property def data(self) -> MappingDataType: - """Return the deanonymizer mapping""" + """Return the deanonymizer mapping.""" return {k: dict(v) for k, v in self.mapping.items()} def update(self, new_mapping: MappingDataType) -> None: - """Update the deanonymizer mapping with new values + """Update the deanonymizer mapping with new values. + Duplicated values will not be added If there are multiple entities of the same type, the mapping will include a count to differentiate them. For example, if there are @@ -67,7 +70,8 @@ def create_anonymizer_mapping( anonymizer_results: "EngineResult", is_reversed: bool = False, ) -> MappingDataType: - """Creates or updates the mapping used to anonymize and/or deanonymize text. + """Create or update the mapping used to anonymize and/or + deanonymize a text. This method exploits the results returned by the analysis and anonymization processes. diff --git a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_matching_strategies.py b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_matching_strategies.py index da43f95c97..11bb9aca4a 100644 --- a/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_matching_strategies.py +++ b/libs/experimental/langchain_experimental/data_anonymizer/deanonymizer_matching_strategies.py @@ -5,8 +5,8 @@ from langchain_experimental.data_anonymizer.deanonymizer_mapping import MappingD def exact_matching_strategy(text: str, deanonymizer_mapping: MappingDataType) -> str: - """ - Exact matching strategy for deanonymization. + """Exact matching strategy for deanonymization. + It replaces all the anonymized entities with the original ones. Args: @@ -23,8 +23,8 @@ def exact_matching_strategy(text: str, deanonymizer_mapping: MappingDataType) -> def case_insensitive_matching_strategy( text: str, deanonymizer_mapping: MappingDataType ) -> str: - """ - Case insensitive matching strategy for deanonymization. + """Case insensitive matching strategy for deanonymization. + It replaces all the anonymized entities with the original ones irrespective of their letter case. @@ -48,8 +48,8 @@ def case_insensitive_matching_strategy( def fuzzy_matching_strategy( text: str, deanonymizer_mapping: MappingDataType, max_l_dist: int = 3 ) -> str: - """ - Fuzzy matching strategy for deanonymization. + """Fuzzy matching strategy for deanonymization. + It uses fuzzy matching to find the position of the anonymized entity in the text. It replaces all the anonymized entities with the original ones. @@ -93,9 +93,9 @@ def fuzzy_matching_strategy( def combined_exact_fuzzy_matching_strategy( text: str, deanonymizer_mapping: MappingDataType, max_l_dist: int = 3 ) -> str: - """ - RECOMMENDED STRATEGY. - Combined exact and fuzzy matching strategy for deanonymization. + """Combined exact and fuzzy matching strategy for deanonymization. + + It is a RECOMMENDED STRATEGY. Args: text: text to deanonymize @@ -118,8 +118,8 @@ def ngram_fuzzy_matching_strategy( fuzzy_threshold: int = 85, use_variable_length: bool = True, ) -> str: - """ - N-gram fuzzy matching strategy for deanonymization. + """N-gram fuzzy matching strategy for deanonymization. + It replaces all the anonymized entities with the original ones. It uses fuzzy matching to find the position of the anonymized entity in the text. It generates n-grams of the same length as the anonymized entity from the text and diff --git a/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py b/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py index 18f117eff2..e06cccc55a 100644 --- a/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py +++ b/libs/experimental/langchain_experimental/data_anonymizer/faker_presidio_mapping.py @@ -3,6 +3,8 @@ from typing import Callable, Dict, Optional def get_pseudoanonymizer_mapping(seed: Optional[int] = None) -> Dict[str, Callable]: + """Get a mapping of entities to pseudo anonymize them.""" + try: from faker import Faker except ImportError as e: diff --git a/libs/experimental/langchain_experimental/data_anonymizer/presidio.py b/libs/experimental/langchain_experimental/data_anonymizer/presidio.py index fbfad3703e..032c2a65f1 100644 --- a/libs/experimental/langchain_experimental/data_anonymizer/presidio.py +++ b/libs/experimental/langchain_experimental/data_anonymizer/presidio.py @@ -98,6 +98,11 @@ DEFAULT_LANGUAGES_CONFIG = { class PresidioAnonymizerBase(AnonymizerBase): + """Base Anonymizer using Microsoft Presidio. + + See more: https://microsoft.github.io/presidio/ + """ + def __init__( self, analyzed_fields: Optional[List[str]] = None, @@ -180,6 +185,8 @@ class PresidioAnonymizerBase(AnonymizerBase): class PresidioAnonymizer(PresidioAnonymizerBase): + """Anonymizer using Microsoft Presidio.""" + def _anonymize( self, text: str, @@ -258,6 +265,8 @@ class PresidioAnonymizer(PresidioAnonymizerBase): class PresidioReversibleAnonymizer(PresidioAnonymizerBase, ReversibleAnonymizerBase): + """Reversible Anonymizer using Microsoft Presidio.""" + def __init__( self, analyzed_fields: Optional[List[str]] = None, diff --git a/libs/experimental/langchain_experimental/fallacy_removal/base.py b/libs/experimental/langchain_experimental/fallacy_removal/base.py index fd196b525a..a09114295e 100644 --- a/libs/experimental/langchain_experimental/fallacy_removal/base.py +++ b/libs/experimental/langchain_experimental/fallacy_removal/base.py @@ -18,9 +18,10 @@ from langchain_experimental.fallacy_removal.prompts import ( class FallacyChain(Chain): - """Chain for applying logical fallacy evaluations, modeled after Constitutional AI \ - and in same format, but applying logical fallacies as generalized rules to remove \ - in output + """Chain for applying logical fallacy evaluations. + + It is modeled after Constitutional AI and in same format, but + applying logical fallacies as generalized rules to remove in output. Example: .. code-block:: python diff --git a/libs/experimental/langchain_experimental/fallacy_removal/models.py b/libs/experimental/langchain_experimental/fallacy_removal/models.py index b56652c637..78422b91b7 100644 --- a/libs/experimental/langchain_experimental/fallacy_removal/models.py +++ b/libs/experimental/langchain_experimental/fallacy_removal/models.py @@ -3,7 +3,7 @@ from langchain_experimental.pydantic_v1 import BaseModel class LogicalFallacy(BaseModel): - """Class for a logical fallacy.""" + """Logical fallacy.""" fallacy_critique_request: str fallacy_revision_request: str diff --git a/libs/experimental/langchain_experimental/generative_agents/generative_agent.py b/libs/experimental/langchain_experimental/generative_agents/generative_agent.py index 2aca901623..5069a0b7c4 100644 --- a/libs/experimental/langchain_experimental/generative_agents/generative_agent.py +++ b/libs/experimental/langchain_experimental/generative_agents/generative_agent.py @@ -11,7 +11,7 @@ from langchain_experimental.pydantic_v1 import BaseModel, Field class GenerativeAgent(BaseModel): - """An Agent as a character with memory and innate characteristics.""" + """Agent as a character with memory and innate characteristics.""" name: str """The character's name.""" @@ -48,6 +48,8 @@ class GenerativeAgent(BaseModel): return [re.sub(r"^\s*\d+\.\s*", "", line).strip() for line in lines] def chain(self, prompt: PromptTemplate) -> LLMChain: + """Create a chain with the same settings as the agent.""" + return LLMChain( llm=self.llm, prompt=prompt, verbose=self.verbose, memory=self.memory ) diff --git a/libs/experimental/langchain_experimental/graph_transformers/diffbot.py b/libs/experimental/langchain_experimental/graph_transformers/diffbot.py index 58c84094ed..0e8613b3f2 100644 --- a/libs/experimental/langchain_experimental/graph_transformers/diffbot.py +++ b/libs/experimental/langchain_experimental/graph_transformers/diffbot.py @@ -7,6 +7,8 @@ from langchain_core.documents import Document def format_property_key(s: str) -> str: + """Formats a string to be used as a property key.""" + words = s.split() if not words: return s @@ -16,8 +18,7 @@ def format_property_key(s: str) -> str: class NodesList: - """ - Manages a list of nodes with associated properties. + """List of nodes with associated properties. Attributes: nodes (Dict[Tuple, Any]): Stores nodes as keys and their properties as values. @@ -85,8 +86,7 @@ schema_mapping = [ class SimplifiedSchema: - """ - Provides functionality for working with a simplified schema mapping. + """Simplified schema mapping. Attributes: schema (Dict): A dictionary containing the mapping to simplified schema types. @@ -116,7 +116,7 @@ class SimplifiedSchema: class DiffbotGraphTransformer: - """Transforms documents into graph documents using Diffbot's NLP API. + """Transform documents into graph documents using Diffbot NLP API. A graph document transformation system takes a sequence of Documents and returns a sequence of Graph Documents. diff --git a/libs/experimental/langchain_experimental/llm_bash/bash.py b/libs/experimental/langchain_experimental/llm_bash/bash.py index 5a78f5dc97..6221e05e59 100644 --- a/libs/experimental/langchain_experimental/llm_bash/bash.py +++ b/libs/experimental/langchain_experimental/llm_bash/bash.py @@ -12,8 +12,8 @@ if TYPE_CHECKING: class BashProcess: - """ - Wrapper class for starting subprocesses. + """Wrapper for starting subprocesses. + Uses the python built-in subprocesses.run() Persistent processes are **not** available on Windows systems, as pexpect makes use of diff --git a/libs/experimental/langchain_experimental/llm_bash/prompt.py b/libs/experimental/langchain_experimental/llm_bash/prompt.py index 3ba55e8e06..f81d2fcd7f 100644 --- a/libs/experimental/langchain_experimental/llm_bash/prompt.py +++ b/libs/experimental/langchain_experimental/llm_bash/prompt.py @@ -31,6 +31,8 @@ class BashOutputParser(BaseOutputParser): """Parser for bash output.""" def parse(self, text: str) -> List[str]: + """Parse the output of a bash command.""" + if "```bash" in text: return self.get_code_blocks(text) else: diff --git a/libs/experimental/langchain_experimental/llm_symbolic_math/base.py b/libs/experimental/langchain_experimental/llm_symbolic_math/base.py index 7feda59ffd..fa5a29e5f8 100644 --- a/libs/experimental/langchain_experimental/llm_symbolic_math/base.py +++ b/libs/experimental/langchain_experimental/llm_symbolic_math/base.py @@ -20,6 +20,10 @@ from langchain_experimental.pydantic_v1 import Extra class LLMSymbolicMathChain(Chain): """Chain that interprets a prompt and executes python code to do symbolic math. + It is based on the sympy library and can be used to evaluate + mathematical expressions. + See https://www.sympy.org/ for more information. + Example: .. code-block:: python diff --git a/libs/experimental/langchain_experimental/llms/anthropic_functions.py b/libs/experimental/langchain_experimental/llms/anthropic_functions.py index 23f8d5e65c..1da852c4f0 100644 --- a/libs/experimental/langchain_experimental/llms/anthropic_functions.py +++ b/libs/experimental/langchain_experimental/llms/anthropic_functions.py @@ -41,6 +41,8 @@ for the weather in SF you would respond: class TagParser(HTMLParser): + """Parser for the tool tags.""" + def __init__(self) -> None: """A heavy-handed solution, but it's fast for prototyping. @@ -122,6 +124,8 @@ def _destrip(tool_input: Any) -> Any: class AnthropicFunctions(BaseChatModel): + """Chat model for interacting with Anthropic functions.""" + llm: BaseChatModel @root_validator(pre=True) diff --git a/libs/experimental/langchain_experimental/llms/jsonformer_decoder.py b/libs/experimental/langchain_experimental/llms/jsonformer_decoder.py index a562eaf00f..392cb602d6 100644 --- a/libs/experimental/langchain_experimental/llms/jsonformer_decoder.py +++ b/libs/experimental/langchain_experimental/llms/jsonformer_decoder.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: def import_jsonformer() -> jsonformer: - """Lazily import jsonformer.""" + """Lazily import of the jsonformer package.""" try: import jsonformer except ImportError: diff --git a/libs/experimental/langchain_experimental/llms/llamaapi.py b/libs/experimental/langchain_experimental/llms/llamaapi.py index d2abf32862..29e841a658 100644 --- a/libs/experimental/langchain_experimental/llms/llamaapi.py +++ b/libs/experimental/langchain_experimental/llms/llamaapi.py @@ -76,6 +76,8 @@ def _convert_message_to_dict(message: BaseMessage) -> dict: class ChatLlamaAPI(BaseChatModel): + """Chat model using the Llama API.""" + client: Any #: :meta private: def _generate( diff --git a/libs/experimental/langchain_experimental/llms/lmformatenforcer_decoder.py b/libs/experimental/langchain_experimental/llms/lmformatenforcer_decoder.py index 4a7f9eff8a..ab899b5534 100644 --- a/libs/experimental/langchain_experimental/llms/lmformatenforcer_decoder.py +++ b/libs/experimental/langchain_experimental/llms/lmformatenforcer_decoder.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: def import_lmformatenforcer() -> lmformatenforcer: - """Lazily import lmformatenforcer.""" + """Lazily import of the lmformatenforcer package.""" try: import lmformatenforcer except ImportError: diff --git a/libs/experimental/langchain_experimental/llms/ollama_functions.py b/libs/experimental/langchain_experimental/llms/ollama_functions.py index 4b9b743f66..16f858b229 100644 --- a/libs/experimental/langchain_experimental/llms/ollama_functions.py +++ b/libs/experimental/langchain_experimental/llms/ollama_functions.py @@ -42,6 +42,8 @@ DEFAULT_RESPONSE_FUNCTION = { class OllamaFunctions(BaseChatModel): + """Function chat model that uses Ollama API.""" + llm: ChatOllama tool_system_prompt_template: str diff --git a/libs/experimental/langchain_experimental/llms/rellm_decoder.py b/libs/experimental/langchain_experimental/llms/rellm_decoder.py index 02e66c1934..a04c09c2d4 100644 --- a/libs/experimental/langchain_experimental/llms/rellm_decoder.py +++ b/libs/experimental/langchain_experimental/llms/rellm_decoder.py @@ -20,7 +20,7 @@ else: def import_rellm() -> rellm: - """Lazily import rellm.""" + """Lazily import of the rellm package.""" try: import rellm except ImportError: diff --git a/libs/experimental/langchain_experimental/open_clip/open_clip.py b/libs/experimental/langchain_experimental/open_clip/open_clip.py index 6459522314..fe61c98b5f 100644 --- a/libs/experimental/langchain_experimental/open_clip/open_clip.py +++ b/libs/experimental/langchain_experimental/open_clip/open_clip.py @@ -5,6 +5,8 @@ from langchain_core.embeddings import Embeddings class OpenCLIPEmbeddings(BaseModel, Embeddings): + """OpenCLIP Embeddings model.""" + model: Any preprocess: Any tokenizer: Any diff --git a/libs/experimental/langchain_experimental/pal_chain/base.py b/libs/experimental/langchain_experimental/pal_chain/base.py index 58593076f4..d7b7136f65 100644 --- a/libs/experimental/langchain_experimental/pal_chain/base.py +++ b/libs/experimental/langchain_experimental/pal_chain/base.py @@ -34,6 +34,8 @@ COMMAND_EXECUTION_ATTRIBUTES = [ class PALValidation: + """Validation for PAL generated code.""" + SOLUTION_EXPRESSION_TYPE_FUNCTION = ast.FunctionDef SOLUTION_EXPRESSION_TYPE_VARIABLE = ast.Name @@ -95,7 +97,7 @@ class PALValidation: class PALChain(Chain): - """Implements Program-Aided Language Models (PAL). + """Chain that implements Program-Aided Language Models (PAL). This class implements the Program-Aided Language Models (PAL) for generating code solutions. PAL is a technique described in the paper "Program-Aided Language Models" diff --git a/libs/experimental/langchain_experimental/plan_and_execute/schema.py b/libs/experimental/langchain_experimental/plan_and_execute/schema.py index 2fc2660def..2c37f92c91 100644 --- a/libs/experimental/langchain_experimental/plan_and_execute/schema.py +++ b/libs/experimental/langchain_experimental/plan_and_execute/schema.py @@ -40,7 +40,7 @@ class BaseStepContainer(BaseModel): class ListStepContainer(BaseStepContainer): - """List step container.""" + """Container for List of steps.""" steps: List[Tuple[Step, StepResponse]] = Field(default_factory=list) """The steps.""" diff --git a/libs/experimental/langchain_experimental/prompt_injection_identifier/hugging_face_identifier.py b/libs/experimental/langchain_experimental/prompt_injection_identifier/hugging_face_identifier.py index bf28a0b402..4f830d2c7f 100644 --- a/libs/experimental/langchain_experimental/prompt_injection_identifier/hugging_face_identifier.py +++ b/libs/experimental/langchain_experimental/prompt_injection_identifier/hugging_face_identifier.py @@ -11,6 +11,8 @@ if TYPE_CHECKING: class PromptInjectionException(ValueError): + """Exception raised when prompt injection attack is detected.""" + def __init__( self, message: str = "Prompt injection attack detected", score: float = 1.0 ): @@ -48,7 +50,8 @@ def _model_default_factory( class HuggingFaceInjectionIdentifier(BaseTool): - """Tool that uses HF model to detect prompt injection attacks.""" + """Tool that uses HuggingFace Prompt Injection to + detect prompt injection attacks.""" name: str = "hugging_face_injection_identifier" description: str = ( diff --git a/libs/experimental/langchain_experimental/prompts/load.py b/libs/experimental/langchain_experimental/prompts/load.py index 51efdc98db..6cbe02edd6 100644 --- a/libs/experimental/langchain_experimental/prompts/load.py +++ b/libs/experimental/langchain_experimental/prompts/load.py @@ -10,7 +10,8 @@ from langchain_core.prompts import BasePromptTemplate def load_prompt(path: Union[str, Path]) -> BasePromptTemplate: - """Unified method for loading a prompt from LangChainHub or local fs.""" + """Unified method for loading a prompt from LangChainHub or local file system.""" + if hub_result := try_load_from_hub( path, _load_prompt_from_file, "prompts", {"py", "json", "yaml"} ): diff --git a/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py b/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py index b2300f0a19..9b73f2e087 100644 --- a/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py +++ b/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py @@ -84,7 +84,9 @@ class AmazonPersonalize: metadata_columns: Optional[Mapping[str, Sequence[str]]] = None, **kwargs: Any, ) -> Mapping[str, Any]: - """Get recommendations from Amazon Personalize: + """Get recommendations from Amazon Personalize service. + + See more details at: https://docs.aws.amazon.com/personalize/latest/dg/API_RS_GetRecommendations.html Args: @@ -151,6 +153,7 @@ class AmazonPersonalize: **kwargs: Any, ) -> Mapping[str, Any]: """Re-ranks a list of recommended items for the given user. + https://docs.aws.amazon.com/personalize/latest/dg/API_RS_GetPersonalizedRanking.html Args: diff --git a/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py b/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py index 4c187a8006..751da51520 100644 --- a/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py +++ b/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py @@ -39,12 +39,12 @@ RESULT_OUTPUT_KEY = "result" class AmazonPersonalizeChain(Chain): - """Amazon Personalize Chain for retrieving recommendations - from Amazon Personalize, and summarizing - the recommendations in natural language. - It will only return recommendations if return_direct=True. - Can also be used in sequential chains for working with - the output of Amazon Personalize. + """Chain for retrieving recommendations from Amazon Personalize, + and summarizing them. + + It only returns recommendations if return_direct=True. + It can also be used in sequential chains for working with + the output of Amazon Personalize. Example: .. code-block:: python diff --git a/libs/experimental/langchain_experimental/retrievers/vector_sql_database.py b/libs/experimental/langchain_experimental/retrievers/vector_sql_database.py index fdde2da327..5c75c1eaf2 100644 --- a/libs/experimental/langchain_experimental/retrievers/vector_sql_database.py +++ b/libs/experimental/langchain_experimental/retrievers/vector_sql_database.py @@ -13,7 +13,7 @@ from langchain_experimental.sql.vector_sql import VectorSQLDatabaseChain class VectorSQLDatabaseChainRetriever(BaseRetriever): - """Retriever that uses SQLDatabase as Retriever""" + """Retriever that uses Vector SQL Database.""" sql_db_chain: VectorSQLDatabaseChain """SQL Database Chain""" diff --git a/libs/experimental/langchain_experimental/rl_chain/base.py b/libs/experimental/langchain_experimental/rl_chain/base.py index facf977450..329a9dbafc 100644 --- a/libs/experimental/langchain_experimental/rl_chain/base.py +++ b/libs/experimental/langchain_experimental/rl_chain/base.py @@ -51,6 +51,8 @@ class _BasedOn: def BasedOn(anything: Any) -> _BasedOn: + """Wrap a value to indicate that it should be based on.""" + return _BasedOn(anything) @@ -65,6 +67,8 @@ class _ToSelectFrom: def ToSelectFrom(anything: Any) -> _ToSelectFrom: + """Wrap a value to indicate that it should be selected from.""" + if not isinstance(anything, list): raise ValueError("ToSelectFrom must be a list to select from") return _ToSelectFrom(anything) @@ -82,6 +86,8 @@ class _Embed: def Embed(anything: Any, keep: bool = False) -> Any: + """Wrap a value to indicate that it should be embedded.""" + if isinstance(anything, _ToSelectFrom): return ToSelectFrom(Embed(anything.value, keep=keep)) elif isinstance(anything, _BasedOn): @@ -96,6 +102,8 @@ def Embed(anything: Any, keep: bool = False) -> Any: def EmbedAndKeep(anything: Any) -> Any: + """Wrap a value to indicate that it should be embedded and kept.""" + return Embed(anything, keep=True) @@ -103,14 +111,19 @@ def EmbedAndKeep(anything: Any) -> Any: def stringify_embedding(embedding: List) -> str: + """Convert an embedding to a string.""" + return " ".join([f"{i}:{e}" for i, e in enumerate(embedding)]) def parse_lines(parser: "vw.TextFormatParser", input_str: str) -> List["vw.Example"]: + """Parse the input string into a list of examples.""" + return [parser.parse_line(line) for line in input_str.split("\n")] def get_based_on_and_to_select_from(inputs: Dict[str, Any]) -> Tuple[Dict, Dict]: + """Get the BasedOn and ToSelectFrom from the inputs.""" to_select_from = { k: inputs[k].value for k in inputs.keys() @@ -132,8 +145,9 @@ def get_based_on_and_to_select_from(inputs: Dict[str, Any]) -> Tuple[Dict, Dict] def prepare_inputs_for_autoembed(inputs: Dict[str, Any]) -> Dict[str, Any]: - """ - go over all the inputs and if something is either wrapped in _ToSelectFrom or _BasedOn, and if their inner values are not already _Embed, + """Prepare the inputs for auto embedding. + + Go over all the inputs and if something is either wrapped in _ToSelectFrom or _BasedOn, and if their inner values are not already _Embed, then wrap them in EmbedAndKeep while retaining their _ToSelectFrom or _BasedOn status """ # noqa: E501 @@ -149,6 +163,8 @@ def prepare_inputs_for_autoembed(inputs: Dict[str, Any]) -> Dict[str, Any]: class Selected(ABC): + """Abstract class to represent the selected item.""" + pass @@ -156,6 +172,8 @@ TSelected = TypeVar("TSelected", bound=Selected) class Event(Generic[TSelected], ABC): + """Abstract class to represent an event.""" + inputs: Dict[str, Any] selected: Optional[TSelected] @@ -168,6 +186,8 @@ TEvent = TypeVar("TEvent", bound=Event) class Policy(Generic[TEvent], ABC): + """Abstract class to represent a policy.""" + def __init__(self, **kwargs: Any): pass @@ -188,6 +208,8 @@ class Policy(Generic[TEvent], ABC): class VwPolicy(Policy): + """Vowpal Wabbit policy.""" + def __init__( self, model_repo: ModelRepository, @@ -229,6 +251,8 @@ class VwPolicy(Policy): class Embedder(Generic[TEvent], ABC): + """Abstract class to represent an embedder.""" + def __init__(self, *args: Any, **kwargs: Any): pass @@ -238,7 +262,7 @@ class Embedder(Generic[TEvent], ABC): class SelectionScorer(Generic[TEvent], ABC, BaseModel): - """Abstract method to grade the chosen selection or the response of the llm""" + """Abstract class to grade the chosen selection or the response of the llm.""" @abstractmethod def score_response( @@ -248,6 +272,8 @@ class SelectionScorer(Generic[TEvent], ABC, BaseModel): class AutoSelectionScorer(SelectionScorer[Event], BaseModel): + """Auto selection scorer.""" + llm_chain: LLMChain prompt: Union[BasePromptTemplate, None] = None scoring_criteria_template_str: Optional[str] = None @@ -308,8 +334,8 @@ class AutoSelectionScorer(SelectionScorer[Event], BaseModel): class RLChain(Chain, Generic[TEvent]): - """ - The `RLChain` class leverages the Vowpal Wabbit (VW) model as a learned policy for reinforcement learning. + """Chain that leverages the Vowpal Wabbit (VW) model as a learned policy + for reinforcement learning. Attributes: - llm_chain (Chain): Represents the underlying Language Model chain. @@ -547,7 +573,8 @@ class RLChain(Chain, Generic[TEvent]): def is_stringtype_instance(item: Any) -> bool: - """Helper function to check if an item is a string.""" + """Check if an item is a string.""" + return isinstance(item, str) or ( isinstance(item, _Embed) and isinstance(item.value, str) ) @@ -556,7 +583,8 @@ def is_stringtype_instance(item: Any) -> bool: def embed_string_type( item: Union[str, _Embed], model: Any, namespace: Optional[str] = None ) -> Dict[str, Union[str, List[str]]]: - """Helper function to embed a string or an _Embed object.""" + """Embed a string or an _Embed object.""" + keep_str = "" if isinstance(item, _Embed): encoded = stringify_embedding(model.encode(item.value)) @@ -576,7 +604,7 @@ def embed_string_type( def embed_dict_type(item: Dict, model: Any) -> Dict[str, Any]: - """Helper function to embed a dictionary item.""" + """Embed a dictionary item.""" inner_dict: Dict = {} for ns, embed_item in item.items(): if isinstance(embed_item, list): @@ -592,6 +620,8 @@ def embed_dict_type(item: Dict, model: Any) -> Dict[str, Any]: def embed_list_type( item: list, model: Any, namespace: Optional[str] = None ) -> List[Dict[str, Union[str, List[str]]]]: + """Embed a list item.""" + ret_list: List = [] for embed_item in item: if isinstance(embed_item, dict): @@ -614,7 +644,8 @@ def embed( namespace: Optional[str] = None, ) -> List[Dict[str, Union[str, List[str]]]]: """ - Embeds the actions or context using the SentenceTransformer model (or a model that has an `encode` function) + Embed the actions or context using the SentenceTransformer model + (or a model that has an `encode` function). Attributes: to_embed: (Union[Union(str, _Embed(str)), Dict, List[Union(str, _Embed(str))], List[Dict]], required) The text to be embedded, either a string, a list of strings or a dictionary or a list of dictionaries. diff --git a/libs/experimental/langchain_experimental/rl_chain/metrics.py b/libs/experimental/langchain_experimental/rl_chain/metrics.py index 4bd65da3ae..58663a4b15 100644 --- a/libs/experimental/langchain_experimental/rl_chain/metrics.py +++ b/libs/experimental/langchain_experimental/rl_chain/metrics.py @@ -6,6 +6,8 @@ if TYPE_CHECKING: class MetricsTrackerAverage: + """Metrics Tracker Average.""" + def __init__(self, step: int): self.history: List[Dict[str, Union[int, float]]] = [{"step": 0, "score": 0}] self.step: int = step @@ -33,6 +35,8 @@ class MetricsTrackerAverage: class MetricsTrackerRollingWindow: + """Metrics Tracker Rolling Window.""" + def __init__(self, window_size: int, step: int): self.history: List[Dict[str, Union[int, float]]] = [{"step": 0, "score": 0}] self.step: int = step diff --git a/libs/experimental/langchain_experimental/rl_chain/model_repository.py b/libs/experimental/langchain_experimental/rl_chain/model_repository.py index efe96cc0bf..ae5f33a0dc 100644 --- a/libs/experimental/langchain_experimental/rl_chain/model_repository.py +++ b/libs/experimental/langchain_experimental/rl_chain/model_repository.py @@ -13,6 +13,8 @@ logger = logging.getLogger(__name__) class ModelRepository: + """Model Repository.""" + def __init__( self, folder: Union[str, os.PathLike], diff --git a/libs/experimental/langchain_experimental/rl_chain/pick_best_chain.py b/libs/experimental/langchain_experimental/rl_chain/pick_best_chain.py index f9075dc565..c7fd5cc6b3 100644 --- a/libs/experimental/langchain_experimental/rl_chain/pick_best_chain.py +++ b/libs/experimental/langchain_experimental/rl_chain/pick_best_chain.py @@ -18,6 +18,8 @@ SENTINEL = object() class PickBestSelected(base.Selected): + """Selected class for PickBest chain.""" + index: Optional[int] probability: Optional[float] score: Optional[float] @@ -34,6 +36,8 @@ class PickBestSelected(base.Selected): class PickBestEvent(base.Event[PickBestSelected]): + """Event class for PickBest chain.""" + def __init__( self, inputs: Dict[str, Any], @@ -47,8 +51,8 @@ class PickBestEvent(base.Event[PickBestSelected]): class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): - """ - Text Embedder class that embeds the `BasedOn` and `ToSelectFrom` inputs into a format that can be used by the learning policy + """Embed the `BasedOn` and `ToSelectFrom` inputs into a format that can be used + by the learning policy. Attributes: model name (Any, optional): The type of embeddings to be used for feature representation. Defaults to BERT SentenceTransformer. @@ -225,6 +229,8 @@ class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]): class PickBestRandomPolicy(base.Policy[PickBestEvent]): + """Random policy for PickBest chain.""" + def __init__(self, feature_embedder: base.Embedder, **kwargs: Any): self.feature_embedder = feature_embedder @@ -240,8 +246,8 @@ class PickBestRandomPolicy(base.Policy[PickBestEvent]): class PickBest(base.RLChain[PickBestEvent]): - """ - `PickBest` is a class designed to leverage the Vowpal Wabbit (VW) model for reinforcement learning with a context, with the goal of modifying the prompt before the LLM call. + """Chain that leverages the Vowpal Wabbit (VW) model for reinforcement learning + with a context, with the goal of modifying the prompt before the LLM call. Each invocation of the chain's `run()` method should be equipped with a set of potential actions (`ToSelectFrom`) and will result in the selection of a specific action based on the `BasedOn` input. This chosen action then informs the LLM (Language Model) prompt for the subsequent response generation. diff --git a/libs/experimental/langchain_experimental/rl_chain/vw_logger.py b/libs/experimental/langchain_experimental/rl_chain/vw_logger.py index e8d2e1541f..52685e56a3 100644 --- a/libs/experimental/langchain_experimental/rl_chain/vw_logger.py +++ b/libs/experimental/langchain_experimental/rl_chain/vw_logger.py @@ -4,6 +4,8 @@ from typing import Optional, Union class VwLogger: + """Vowpal Wabbit custom logger.""" + def __init__(self, path: Optional[Union[str, PathLike]]): self.path = Path(path) if path else None if self.path: diff --git a/libs/experimental/langchain_experimental/smart_llm/__init__.py b/libs/experimental/langchain_experimental/smart_llm/__init__.py index 925a05cec5..d1f83aab61 100644 --- a/libs/experimental/langchain_experimental/smart_llm/__init__.py +++ b/libs/experimental/langchain_experimental/smart_llm/__init__.py @@ -1,4 +1,23 @@ -"""Generalized implementation of SmartGPT (origin: https://youtu.be/wVzuvf9D9BU)""" +"""Chain for applying self-critique using the SmartGPT workflow. + +See details at https://youtu.be/wVzuvf9D9BU + +The workflow performs these 3 steps: +1. **Ideate**: Pass the user prompt to an ideation LLM n_ideas times, + each result is an "idea" +2. **Critique**: Pass the ideas to a critique LLM which looks for flaws in the ideas + & picks the best one +3. **Resolve**: Pass the critique to a resolver LLM which improves upon the best idea + & outputs only the (improved version of) the best output + +In total, the SmartGPT workflow will use n_ideas+2 LLM calls + +Note that SmartLLMChain will only improve results (compared to a basic LLMChain), +when the underlying models have the capability for reflection, which smaller models +often don't. + +Finally, a SmartLLMChain assumes that each underlying LLM outputs exactly 1 result. +""" from langchain_experimental.smart_llm.base import SmartLLMChain diff --git a/libs/experimental/langchain_experimental/smart_llm/base.py b/libs/experimental/langchain_experimental/smart_llm/base.py index d69fa20801..5cef9fc378 100644 --- a/libs/experimental/langchain_experimental/smart_llm/base.py +++ b/libs/experimental/langchain_experimental/smart_llm/base.py @@ -18,8 +18,9 @@ from langchain_experimental.pydantic_v1 import Extra, root_validator class SmartLLMChain(Chain): - """ - Generalized implementation of SmartGPT (origin: https://youtu.be/wVzuvf9D9BU) + """Chain for applying self-critique using the SmartGPT workflow. + + See details at https://youtu.be/wVzuvf9D9BU A SmartLLMChain is an LLMChain that instead of simply passing the prompt to the LLM performs these 3 steps: diff --git a/libs/experimental/langchain_experimental/sql/vector_sql.py b/libs/experimental/langchain_experimental/sql/vector_sql.py index 07125e1f82..7c28ec8861 100644 --- a/libs/experimental/langchain_experimental/sql/vector_sql.py +++ b/libs/experimental/langchain_experimental/sql/vector_sql.py @@ -19,7 +19,8 @@ from langchain_experimental.sql.base import INTERMEDIATE_STEPS_KEY, SQLDatabaseC class VectorSQLOutputParser(BaseOutputParser[str]): - """Output Parser for Vector SQL + """Output Parser for Vector SQL. + 1. finds for `NeuralArray()` and replace it with the embedding 2. finds for `DISTANCE()` and replace it with the distance name in backend SQL """ @@ -61,8 +62,8 @@ class VectorSQLOutputParser(BaseOutputParser[str]): class VectorSQLRetrieveAllOutputParser(VectorSQLOutputParser): - """Based on VectorSQLOutputParser - It also modify the SQL to get all columns + """Parser based on VectorSQLOutputParser. + It also modifies the SQL to get all columns. """ @property @@ -79,6 +80,8 @@ class VectorSQLRetrieveAllOutputParser(VectorSQLOutputParser): def get_result_from_sqldb(db: SQLDatabase, cmd: str) -> Sequence[Dict[str, Any]]: + """Get result from SQL Database.""" + result = db._execute(cmd, fetch="all") assert isinstance(result, Sequence) return result diff --git a/libs/experimental/langchain_experimental/synthetic_data/__init__.py b/libs/experimental/langchain_experimental/synthetic_data/__init__.py index e3325efb0d..c5762a341c 100644 --- a/libs/experimental/langchain_experimental/synthetic_data/__init__.py +++ b/libs/experimental/langchain_experimental/synthetic_data/__init__.py @@ -12,7 +12,7 @@ def create_data_generation_chain( llm: BaseLanguageModel, prompt: Optional[PromptTemplate] = None, ) -> Chain: - """Creates a chain that generates synthetic sentences with + """Create a chain that generates synthetic sentences with provided fields. Args: @@ -28,7 +28,7 @@ def create_data_generation_chain( class DatasetGenerator: - """Generates synthetic dataset with a given language model.""" + """Generate synthetic dataset with a given language model.""" def __init__( self, diff --git a/libs/experimental/langchain_experimental/tabular_synthetic_data/base.py b/libs/experimental/langchain_experimental/tabular_synthetic_data/base.py index 9d67060609..654bf991a7 100644 --- a/libs/experimental/langchain_experimental/tabular_synthetic_data/base.py +++ b/libs/experimental/langchain_experimental/tabular_synthetic_data/base.py @@ -9,7 +9,7 @@ from langchain_core.language_models import BaseLanguageModel class SyntheticDataGenerator(BaseModel): - """Generates synthetic data using the given LLM and few-shot template. + """Generate synthetic data using the given LLM and few-shot template. Utilizes the provided LLM to produce synthetic data based on the few-shot prompt template. diff --git a/libs/experimental/langchain_experimental/text_splitter.py b/libs/experimental/langchain_experimental/text_splitter.py index c032c33856..d65a80eba6 100644 --- a/libs/experimental/langchain_experimental/text_splitter.py +++ b/libs/experimental/langchain_experimental/text_splitter.py @@ -11,6 +11,16 @@ from langchain_core.embeddings import Embeddings def combine_sentences(sentences: List[dict], buffer_size: int = 1) -> List[dict]: + """Combine sentences based on buffer size. + + Args: + sentences: List of sentences to combine. + buffer_size: Number of sentences to combine. Defaults to 1. + + Returns: + List of sentences with combined sentences. + """ + # Go through each sentence dict for i in range(len(sentences)): # Create a string that will hold the sentences which are joined @@ -42,6 +52,14 @@ def combine_sentences(sentences: List[dict], buffer_size: int = 1) -> List[dict] def calculate_cosine_distances(sentences: List[dict]) -> Tuple[List[float], List[dict]]: + """Calculate cosine distances between sentences. + + Args: + sentences: List of sentences to calculate distances for. + + Returns: + Tuple of distances and sentences. + """ distances = [] for i in range(len(sentences) - 1): embedding_current = sentences[i]["combined_sentence_embedding"] @@ -66,12 +84,12 @@ def calculate_cosine_distances(sentences: List[dict]) -> Tuple[List[float], List class SemanticChunker(BaseDocumentTransformer): - """Splits the text based on semantic similarity. + """Split the text based on semantic similarity. Taken from Greg Kamradt's wonderful notebook: https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/5_Levels_Of_Text_Splitting.ipynb - All credit to him. + All credits to him. At a high level, this splits into sentences, then groups into groups of 3 sentences, and then merges one that are similar in the embedding space. diff --git a/libs/experimental/langchain_experimental/tools/python/tool.py b/libs/experimental/langchain_experimental/tools/python/tool.py index 6f1f6d72c0..2324063fd0 100644 --- a/libs/experimental/langchain_experimental/tools/python/tool.py +++ b/libs/experimental/langchain_experimental/tools/python/tool.py @@ -24,6 +24,7 @@ def _get_default_python_repl() -> PythonREPL: def sanitize_input(query: str) -> str: """Sanitize input to the python REPL. + Remove whitespace, backtick & python (if llm mistakes python console as terminal) Args: @@ -41,7 +42,7 @@ def sanitize_input(query: str) -> str: class PythonREPLTool(BaseTool): - """A tool for running python code in a REPL.""" + """Tool for running python code in a REPL.""" name: str = "Python_REPL" description: str = ( @@ -76,11 +77,13 @@ class PythonREPLTool(BaseTool): class PythonInputs(BaseModel): + """Python inputs.""" + query: str = Field(description="code snippet to run") class PythonAstREPLTool(BaseTool): - """A tool for running python code in a REPL.""" + """Tool for running python code in a REPL.""" name: str = "python_repl_ast" description: str = ( diff --git a/libs/experimental/langchain_experimental/tot/__init__.py b/libs/experimental/langchain_experimental/tot/__init__.py index ecb8d7f017..c20812ccd5 100644 --- a/libs/experimental/langchain_experimental/tot/__init__.py +++ b/libs/experimental/langchain_experimental/tot/__init__.py @@ -1,3 +1,12 @@ +"""Implementation of a Tree of Thought (ToT) chain based on the paper +"Large Language Model Guided Tree-of-Thought" + +https://arxiv.org/pdf/2305.08291.pdf + +The Tree of Thought (ToT) chain uses a tree structure to explore the space of +possible solutions to a problem. + +""" from langchain_experimental.tot.base import ToTChain from langchain_experimental.tot.checker import ToTChecker diff --git a/libs/experimental/langchain_experimental/tot/base.py b/libs/experimental/langchain_experimental/tot/base.py index 82f4f92aff..3c60b15cb3 100644 --- a/libs/experimental/langchain_experimental/tot/base.py +++ b/libs/experimental/langchain_experimental/tot/base.py @@ -1,14 +1,3 @@ -""" -This a Tree of Thought (ToT) chain based on the paper "Large Language Model -Guided Tree-of-Thought" - -https://arxiv.org/pdf/2305.08291.pdf - -The Tree of Thought (ToT) chain uses a tree structure to explore the space of -possible solutions to a problem. - -""" - from __future__ import annotations from textwrap import indent @@ -34,7 +23,7 @@ from langchain_experimental.tot.thought_generation import ( class ToTChain(Chain): """ - A Chain implementing the Tree of Thought (ToT). + Chain implementing the Tree of Thought (ToT). """ llm: BaseLanguageModel diff --git a/libs/experimental/langchain_experimental/tot/memory.py b/libs/experimental/langchain_experimental/tot/memory.py index eb886010f4..5c9de83823 100644 --- a/libs/experimental/langchain_experimental/tot/memory.py +++ b/libs/experimental/langchain_experimental/tot/memory.py @@ -7,7 +7,9 @@ from langchain_experimental.tot.thought import Thought class ToTDFSMemory: """ - Memory for the Tree of Thought (ToT) chain. Implemented as a stack of + Memory for the Tree of Thought (ToT) chain. + + It is implemented as a stack of thoughts. This allows for a depth first search (DFS) of the ToT. """ diff --git a/libs/experimental/langchain_experimental/tot/prompts.py b/libs/experimental/langchain_experimental/tot/prompts.py index a59a2be9e3..f54dbdc0d4 100644 --- a/libs/experimental/langchain_experimental/tot/prompts.py +++ b/libs/experimental/langchain_experimental/tot/prompts.py @@ -9,6 +9,8 @@ from langchain_experimental.tot.thought import ThoughtValidity def get_cot_prompt() -> PromptTemplate: + """Get the prompt for the Chain of Thought (CoT) chain.""" + return PromptTemplate( template_format="jinja2", input_variables=["problem_description", "thoughts"], @@ -36,7 +38,7 @@ def get_cot_prompt() -> PromptTemplate: class JSONListOutputParser(BaseOutputParser): - """Class to parse the output of a PROPOSE_PROMPT response.""" + """Parse the output of a PROPOSE_PROMPT response.""" @property def _type(self) -> str: @@ -53,6 +55,8 @@ class JSONListOutputParser(BaseOutputParser): def get_propose_prompt() -> PromptTemplate: + """Get the prompt for the PROPOSE_PROMPT chain.""" + return PromptTemplate( template_format="jinja2", input_variables=["problem_description", "thoughts", "n"], @@ -95,6 +99,8 @@ def get_propose_prompt() -> PromptTemplate: class CheckerOutputParser(BaseOutputParser): + """Parse and check the output of the language model.""" + def parse(self, text: str) -> ThoughtValidity: """Parse the output of the language model.""" text = text.upper() diff --git a/libs/experimental/langchain_experimental/tot/thought.py b/libs/experimental/langchain_experimental/tot/thought.py index 35bc2dfaff..88344e5167 100644 --- a/libs/experimental/langchain_experimental/tot/thought.py +++ b/libs/experimental/langchain_experimental/tot/thought.py @@ -7,12 +7,16 @@ from langchain_experimental.pydantic_v1 import BaseModel, Field class ThoughtValidity(Enum): + """Enum for the validity of a thought.""" + VALID_INTERMEDIATE = 0 VALID_FINAL = 1 INVALID = 2 class Thought(BaseModel): + """A thought in the ToT.""" + text: str validity: ThoughtValidity children: Set[Thought] = Field(default_factory=set) diff --git a/libs/experimental/langchain_experimental/tot/thought_generation.py b/libs/experimental/langchain_experimental/tot/thought_generation.py index 711b8937a7..f07f067648 100644 --- a/libs/experimental/langchain_experimental/tot/thought_generation.py +++ b/libs/experimental/langchain_experimental/tot/thought_generation.py @@ -39,7 +39,7 @@ class BaseThoughtGenerationStrategy(LLMChain): class SampleCoTStrategy(BaseThoughtGenerationStrategy): """ - Sample thoughts from a Chain-of-Thought (CoT) prompt. + Sample strategy from a Chain-of-Thought (CoT) prompt. This strategy works better when the thought space is rich, such as when each thought is a paragraph. Independent and identically distributed samples @@ -62,7 +62,7 @@ class SampleCoTStrategy(BaseThoughtGenerationStrategy): class ProposePromptStrategy(BaseThoughtGenerationStrategy): """ - Propose thoughts sequentially using a "propose prompt". + Strategy that is sequentially using a "propose prompt". This strategy works better when the thought space is more constrained, such as when each thought is just a word or a line. Proposing different thoughts