diff --git a/langchain/prompts/example_selector/base.py b/langchain/prompts/example_selector/base.py index 00b91cb428..ff2e099c81 100644 --- a/langchain/prompts/example_selector/base.py +++ b/langchain/prompts/example_selector/base.py @@ -1,13 +1,13 @@ """Interface for selecting examples to include in prompts.""" from abc import ABC, abstractmethod -from typing import Dict, List +from typing import Any, Dict, List class BaseExampleSelector(ABC): """Interface for selecting examples to include in prompts.""" @abstractmethod - def add_example(self, example: Dict[str, str]) -> None: + def add_example(self, example: Dict[str, str]) -> Any: """Add new example to store for a key.""" @abstractmethod diff --git a/langchain/prompts/example_selector/semantic_similarity.py b/langchain/prompts/example_selector/semantic_similarity.py index 52a9118cb7..384121c922 100644 --- a/langchain/prompts/example_selector/semantic_similarity.py +++ b/langchain/prompts/example_selector/semantic_similarity.py @@ -31,10 +31,11 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel): extra = Extra.forbid arbitrary_types_allowed = True - def add_example(self, example: Dict[str, str]) -> None: + def add_example(self, example: Dict[str, str]) -> str: """Add new example to vectorstore.""" string_example = " ".join(sorted_values(example)) - self.vectorstore.add_texts([string_example], metadatas=[example]) + ids = self.vectorstore.add_texts([string_example], metadatas=[example]) + return ids[0] def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: """Select which examples to use based on semantic similarity.""" diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py index 429f82469b..d560d396bd 100644 --- a/langchain/vectorstores/base.py +++ b/langchain/vectorstores/base.py @@ -14,8 +14,16 @@ class VectorStore(ABC): @abstractmethod def add_texts( self, texts: Iterable[str], metadatas: Optional[List[dict]] = None - ) -> None: - """Run more texts through the embeddings and add to the vectorstore.""" + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + + Returns: + List of ids from adding the texts into the vectorstore. + """ @abstractmethod def similarity_search(self, query: str, k: int = 4) -> List[Document]: diff --git a/langchain/vectorstores/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py index 20703f5791..61d858b57a 100644 --- a/langchain/vectorstores/elastic_vector_search.py +++ b/langchain/vectorstores/elastic_vector_search.py @@ -55,7 +55,7 @@ class ElasticVectorSearch(VectorStore): except ImportError: raise ValueError( "Could not import elasticsearch python package. " - "Please install it with `pip install elasticearch`." + "Please install it with `pip install elasticsearch`." ) self.embedding_function = embedding_function self.index_name = index_name @@ -69,29 +69,42 @@ class ElasticVectorSearch(VectorStore): def add_texts( self, texts: Iterable[str], metadatas: Optional[List[dict]] = None - ) -> None: - """Run more texts through the embeddings and add to the vectorstore.""" + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + + Returns: + List of ids from adding the texts into the vectorstore. + """ try: from elasticsearch.helpers import bulk except ImportError: raise ValueError( "Could not import elasticsearch python package. " - "Please install it with `pip install elasticearch`." + "Please install it with `pip install elasticsearch`." ) requests = [] + ids = [] for i, text in enumerate(texts): metadata = metadatas[i] if metadatas else {} + _id = str(uuid.uuid4()) request = { "_op_type": "index", "_index": self.index_name, "vector": self.embedding_function(text), "text": text, "metadata": metadata, + "_id": _id, } + ids.append(_id) requests.append(request) bulk(self.client, requests) # TODO: add option not to refresh self.client.indices.refresh(index=self.index_name) + return ids def similarity_search(self, query: str, k: int = 4) -> List[Document]: """Return docs most similar to query. diff --git a/langchain/vectorstores/faiss.py b/langchain/vectorstores/faiss.py index 61ffdf530f..9f0ef01e37 100644 --- a/langchain/vectorstores/faiss.py +++ b/langchain/vectorstores/faiss.py @@ -41,8 +41,16 @@ class FAISS(VectorStore): def add_texts( self, texts: Iterable[str], metadatas: Optional[List[dict]] = None - ) -> None: - """Run more texts through the embeddings and add to the vectorstore.""" + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + + Returns: + List of ids from adding the texts into the vectorstore. + """ if not isinstance(self.docstore, AddableMixin): raise ValueError( "If trying to add texts, the underlying docstore should support " @@ -66,6 +74,7 @@ class FAISS(VectorStore): self.docstore.add({_id: doc for _, _id, doc in full_info}) index_to_id = {index: _id for index, _id, _ in full_info} self.index_to_docstore_id.update(index_to_id) + return [_id for _, _id, _ in full_info] def similarity_search(self, query: str, k: int = 4) -> List[Document]: """Return docs most similar to query.