add return of ids (#254)

not actually sure the desired return in add_example to example selector is actually general/good - whats the use case?
2 years ago · 2163d064f3
parent 8cba5b791a
commit 2163d064f3
5 changed files with 43 additions and 12 deletions
--- a/langchain/prompts/example_selector/base.py
+++ b/langchain/prompts/example_selector/base.py
@ -1,13 +1,13 @@
 """Interface for selecting examples to include in prompts."""
 from abc import ABC, abstractmethod
-from typing import Dict, List
+from typing import Any, Dict, List
 class BaseExampleSelector(ABC):
    """Interface for selecting examples to include in prompts."""
    @abstractmethod
-    def add_example(self, example: Dict[str, str]) -> None:
+    def add_example(self, example: Dict[str, str]) -> Any:
        """Add new example to store for a key."""
    @abstractmethod
--- a/langchain/prompts/example_selector/semantic_similarity.py
+++ b/langchain/prompts/example_selector/semantic_similarity.py
@ -31,10 +31,11 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
        extra = Extra.forbid
        arbitrary_types_allowed = True
-    def add_example(self, example: Dict[str, str]) -> None:
+    def add_example(self, example: Dict[str, str]) -> str:
        """Add new example to vectorstore."""
        string_example = " ".join(sorted_values(example))
-        self.vectorstore.add_texts([string_example], metadatas=[example])
+        ids = self.vectorstore.add_texts([string_example], metadatas=[example])
        return ids[0]
    def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
        """Select which examples to use based on semantic similarity."""
--- a/langchain/vectorstores/base.py
+++ b/langchain/vectorstores/base.py
@ -14,8 +14,16 @@ class VectorStore(ABC):
    @abstractmethod
    def add_texts(
        self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
-    ) -> None:
+    ) -> List[str]:
-        """Run more texts through the embeddings and add to the vectorstore."""
+        """Run more texts through the embeddings and add to the vectorstore.
        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
        Returns:
            List of ids from adding the texts into the vectorstore.
        """
    @abstractmethod
    def similarity_search(self, query: str, k: int = 4) -> List[Document]:
--- a/langchain/vectorstores/elastic_vector_search.py
+++ b/langchain/vectorstores/elastic_vector_search.py
@ -55,7 +55,7 @@ class ElasticVectorSearch(VectorStore):
        except ImportError:
            raise ValueError(
                "Could not import elasticsearch python package. "
-                "Please install it with `pip install elasticearch`."
+                "Please install it with `pip install elasticsearch`."
            )
        self.embedding_function = embedding_function
        self.index_name = index_name
@ -69,29 +69,42 @@ class ElasticVectorSearch(VectorStore):
    def add_texts(
        self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
-    ) -> None:
+    ) -> List[str]:
-        """Run more texts through the embeddings and add to the vectorstore."""
+        """Run more texts through the embeddings and add to the vectorstore.
        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
        Returns:
            List of ids from adding the texts into the vectorstore.
        """
        try:
            from elasticsearch.helpers import bulk
        except ImportError:
            raise ValueError(
                "Could not import elasticsearch python package. "
-                "Please install it with `pip install elasticearch`."
+                "Please install it with `pip install elasticsearch`."
            )
        requests = []
        ids = []
        for i, text in enumerate(texts):
            metadata = metadatas[i] if metadatas else {}
            _id = str(uuid.uuid4())
            request = {
                "_op_type": "index",
                "_index": self.index_name,
                "vector": self.embedding_function(text),
                "text": text,
                "metadata": metadata,
                "_id": _id,
            }
            ids.append(_id)
            requests.append(request)
        bulk(self.client, requests)
        # TODO: add option not to refresh
        self.client.indices.refresh(index=self.index_name)
        return ids
    def similarity_search(self, query: str, k: int = 4) -> List[Document]:
        """Return docs most similar to query.
--- a/langchain/vectorstores/faiss.py
+++ b/langchain/vectorstores/faiss.py
@ -41,8 +41,16 @@ class FAISS(VectorStore):
    def add_texts(
        self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
-    ) -> None:
+    ) -> List[str]:
-        """Run more texts through the embeddings and add to the vectorstore."""
+        """Run more texts through the embeddings and add to the vectorstore.
        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
        Returns:
            List of ids from adding the texts into the vectorstore.
        """
        if not isinstance(self.docstore, AddableMixin):
            raise ValueError(
                "If trying to add texts, the underlying docstore should support "
@ -66,6 +74,7 @@ class FAISS(VectorStore):
        self.docstore.add({_id: doc for _, _id, doc in full_info})
        index_to_id = {index: _id for index, _id, _ in full_info}
        self.index_to_docstore_id.update(index_to_id)
        return [_id for _, _id, _ in full_info]
    def similarity_search(self, query: str, k: int = 4) -> List[Document]:
        """Return docs most similar to query.