From 0c84ce10822c44e8af54f86de5f810abe67161a3 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 20 Feb 2023 21:02:28 -0800 Subject: [PATCH] Harrison/add documents (#1197) Co-authored-by: OmriNach <32659330+OmriNach@users.noreply.github.com> --- langchain/vectorstores/base.py | 21 ++++++++++++++++++- langchain/vectorstores/chroma.py | 1 + .../vectorstores/elastic_vector_search.py | 5 ++++- langchain/vectorstores/faiss.py | 5 ++++- langchain/vectorstores/milvus.py | 1 + .../vectorstores/opensearch_vector_search.py | 1 + langchain/vectorstores/pinecone.py | 1 + langchain/vectorstores/qdrant.py | 5 ++++- langchain/vectorstores/weaviate.py | 5 ++++- 9 files changed, 40 insertions(+), 5 deletions(-) diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py index c7e1a33a..e3b5241e 100644 --- a/langchain/vectorstores/base.py +++ b/langchain/vectorstores/base.py @@ -13,18 +13,37 @@ class VectorStore(ABC): @abstractmethod def add_texts( - self, texts: Iterable[str], metadatas: Optional[List[dict]] = None + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, ) -> List[str]: """Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters Returns: List of ids from adding the texts into the vectorstore. """ + def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: + """Run more documents through the embeddings and add to the vectorstore. + + Args: + documents (List[Document]: Documents to add to the vectorstore. + + + Returns: + List[str]: List of IDs of the added texts. + """ + # TODO: Handle the case where the user doesn't provide ids on the Collection + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + return self.add_texts(texts, metadatas, **kwargs) + @abstractmethod def similarity_search( self, query: str, k: int = 4, **kwargs: Any diff --git a/langchain/vectorstores/chroma.py b/langchain/vectorstores/chroma.py index 2cae7f72..ae2c7ea6 100644 --- a/langchain/vectorstores/chroma.py +++ b/langchain/vectorstores/chroma.py @@ -76,6 +76,7 @@ class Chroma(VectorStore): texts: Iterable[str], metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, + **kwargs: Any, ) -> List[str]: """Run more texts through the embeddings and add to the vectorstore. diff --git a/langchain/vectorstores/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py index eb83225a..4f37800e 100644 --- a/langchain/vectorstores/elastic_vector_search.py +++ b/langchain/vectorstores/elastic_vector_search.py @@ -68,7 +68,10 @@ class ElasticVectorSearch(VectorStore): self.client = es_client def add_texts( - self, texts: Iterable[str], metadatas: Optional[List[dict]] = None + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, ) -> List[str]: """Run more texts through the embeddings and add to the vectorstore. diff --git a/langchain/vectorstores/faiss.py b/langchain/vectorstores/faiss.py index df66c818..3b06ec38 100644 --- a/langchain/vectorstores/faiss.py +++ b/langchain/vectorstores/faiss.py @@ -56,7 +56,10 @@ class FAISS(VectorStore): self.index_to_docstore_id = index_to_docstore_id def add_texts( - self, texts: Iterable[str], metadatas: Optional[List[dict]] = None + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, ) -> List[str]: """Run more texts through the embeddings and add to the vectorstore. diff --git a/langchain/vectorstores/milvus.py b/langchain/vectorstores/milvus.py index b21b5515..67510375 100644 --- a/langchain/vectorstores/milvus.py +++ b/langchain/vectorstores/milvus.py @@ -88,6 +88,7 @@ class Milvus(VectorStore): metadatas: Optional[List[dict]] = None, partition_name: Optional[str] = None, timeout: Optional[int] = None, + **kwargs: Any, ) -> List[str]: """Insert text data into Milvus. diff --git a/langchain/vectorstores/opensearch_vector_search.py b/langchain/vectorstores/opensearch_vector_search.py index 6e4d2545..9218ffaa 100644 --- a/langchain/vectorstores/opensearch_vector_search.py +++ b/langchain/vectorstores/opensearch_vector_search.py @@ -230,6 +230,7 @@ class OpenSearchVectorSearch(VectorStore): texts: Iterable[str], metadatas: Optional[List[dict]] = None, bulk_size: int = 500, + **kwargs: Any, ) -> List[str]: """Run more texts through the embeddings and add to the vectorstore. diff --git a/langchain/vectorstores/pinecone.py b/langchain/vectorstores/pinecone.py index ad82f682..25e26572 100644 --- a/langchain/vectorstores/pinecone.py +++ b/langchain/vectorstores/pinecone.py @@ -56,6 +56,7 @@ class Pinecone(VectorStore): metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, namespace: Optional[str] = None, + **kwargs: Any, ) -> List[str]: """Run more texts through the embeddings and add to the vectorstore. diff --git a/langchain/vectorstores/qdrant.py b/langchain/vectorstores/qdrant.py index c2af88d5..1b53601f 100644 --- a/langchain/vectorstores/qdrant.py +++ b/langchain/vectorstores/qdrant.py @@ -49,7 +49,10 @@ class Qdrant(VectorStore): self.embedding_function = embedding_function def add_texts( - self, texts: Iterable[str], metadatas: Optional[List[dict]] = None + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, ) -> List[str]: """Run more texts through the embeddings and add to the vectorstore. diff --git a/langchain/vectorstores/weaviate.py b/langchain/vectorstores/weaviate.py index 1bd4d689..9302470b 100644 --- a/langchain/vectorstores/weaviate.py +++ b/langchain/vectorstores/weaviate.py @@ -51,7 +51,10 @@ class Weaviate(VectorStore): self._query_attrs.extend(attributes) def add_texts( - self, texts: Iterable[str], metadatas: Optional[List[dict]] = None + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, ) -> List[str]: """Upload texts with metadata (properties) to Weaviate.""" from weaviate.util import get_valid_uuid