add return of ids (#254)

not actually sure the desired return in add_example to example selector
is actually general/good - whats the use case?
pull/257/head^2
Harrison Chase 2 years ago committed by GitHub
parent 8cba5b791a
commit 2163d064f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,13 +1,13 @@
"""Interface for selecting examples to include in prompts.""" """Interface for selecting examples to include in prompts."""
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict, List from typing import Any, Dict, List
class BaseExampleSelector(ABC): class BaseExampleSelector(ABC):
"""Interface for selecting examples to include in prompts.""" """Interface for selecting examples to include in prompts."""
@abstractmethod @abstractmethod
def add_example(self, example: Dict[str, str]) -> None: def add_example(self, example: Dict[str, str]) -> Any:
"""Add new example to store for a key.""" """Add new example to store for a key."""
@abstractmethod @abstractmethod

@ -31,10 +31,11 @@ class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
extra = Extra.forbid extra = Extra.forbid
arbitrary_types_allowed = True arbitrary_types_allowed = True
def add_example(self, example: Dict[str, str]) -> None: def add_example(self, example: Dict[str, str]) -> str:
"""Add new example to vectorstore.""" """Add new example to vectorstore."""
string_example = " ".join(sorted_values(example)) string_example = " ".join(sorted_values(example))
self.vectorstore.add_texts([string_example], metadatas=[example]) ids = self.vectorstore.add_texts([string_example], metadatas=[example])
return ids[0]
def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
"""Select which examples to use based on semantic similarity.""" """Select which examples to use based on semantic similarity."""

@ -14,8 +14,16 @@ class VectorStore(ABC):
@abstractmethod @abstractmethod
def add_texts( def add_texts(
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
) -> None: ) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore.""" """Run more texts through the embeddings and add to the vectorstore.
Args:
texts: Iterable of strings to add to the vectorstore.
metadatas: Optional list of metadatas associated with the texts.
Returns:
List of ids from adding the texts into the vectorstore.
"""
@abstractmethod @abstractmethod
def similarity_search(self, query: str, k: int = 4) -> List[Document]: def similarity_search(self, query: str, k: int = 4) -> List[Document]:

@ -55,7 +55,7 @@ class ElasticVectorSearch(VectorStore):
except ImportError: except ImportError:
raise ValueError( raise ValueError(
"Could not import elasticsearch python package. " "Could not import elasticsearch python package. "
"Please install it with `pip install elasticearch`." "Please install it with `pip install elasticsearch`."
) )
self.embedding_function = embedding_function self.embedding_function = embedding_function
self.index_name = index_name self.index_name = index_name
@ -69,29 +69,42 @@ class ElasticVectorSearch(VectorStore):
def add_texts( def add_texts(
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
) -> None: ) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore.""" """Run more texts through the embeddings and add to the vectorstore.
Args:
texts: Iterable of strings to add to the vectorstore.
metadatas: Optional list of metadatas associated with the texts.
Returns:
List of ids from adding the texts into the vectorstore.
"""
try: try:
from elasticsearch.helpers import bulk from elasticsearch.helpers import bulk
except ImportError: except ImportError:
raise ValueError( raise ValueError(
"Could not import elasticsearch python package. " "Could not import elasticsearch python package. "
"Please install it with `pip install elasticearch`." "Please install it with `pip install elasticsearch`."
) )
requests = [] requests = []
ids = []
for i, text in enumerate(texts): for i, text in enumerate(texts):
metadata = metadatas[i] if metadatas else {} metadata = metadatas[i] if metadatas else {}
_id = str(uuid.uuid4())
request = { request = {
"_op_type": "index", "_op_type": "index",
"_index": self.index_name, "_index": self.index_name,
"vector": self.embedding_function(text), "vector": self.embedding_function(text),
"text": text, "text": text,
"metadata": metadata, "metadata": metadata,
"_id": _id,
} }
ids.append(_id)
requests.append(request) requests.append(request)
bulk(self.client, requests) bulk(self.client, requests)
# TODO: add option not to refresh # TODO: add option not to refresh
self.client.indices.refresh(index=self.index_name) self.client.indices.refresh(index=self.index_name)
return ids
def similarity_search(self, query: str, k: int = 4) -> List[Document]: def similarity_search(self, query: str, k: int = 4) -> List[Document]:
"""Return docs most similar to query. """Return docs most similar to query.

@ -41,8 +41,16 @@ class FAISS(VectorStore):
def add_texts( def add_texts(
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
) -> None: ) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore.""" """Run more texts through the embeddings and add to the vectorstore.
Args:
texts: Iterable of strings to add to the vectorstore.
metadatas: Optional list of metadatas associated with the texts.
Returns:
List of ids from adding the texts into the vectorstore.
"""
if not isinstance(self.docstore, AddableMixin): if not isinstance(self.docstore, AddableMixin):
raise ValueError( raise ValueError(
"If trying to add texts, the underlying docstore should support " "If trying to add texts, the underlying docstore should support "
@ -66,6 +74,7 @@ class FAISS(VectorStore):
self.docstore.add({_id: doc for _, _id, doc in full_info}) self.docstore.add({_id: doc for _, _id, doc in full_info})
index_to_id = {index: _id for index, _id, _ in full_info} index_to_id = {index: _id for index, _id, _ in full_info}
self.index_to_docstore_id.update(index_to_id) self.index_to_docstore_id.update(index_to_id)
return [_id for _, _id, _ in full_info]
def similarity_search(self, query: str, k: int = 4) -> List[Document]: def similarity_search(self, query: str, k: int = 4) -> List[Document]:
"""Return docs most similar to query. """Return docs most similar to query.

Loading…
Cancel
Save