|
|
|
@ -40,7 +40,6 @@ class Pinecone(VectorStore):
|
|
|
|
|
index: Any,
|
|
|
|
|
embedding_function: Callable,
|
|
|
|
|
text_key: str,
|
|
|
|
|
namespace: Optional[str] = None,
|
|
|
|
|
):
|
|
|
|
|
"""Initialize with Pinecone client."""
|
|
|
|
|
try:
|
|
|
|
@ -58,14 +57,12 @@ class Pinecone(VectorStore):
|
|
|
|
|
self._index = index
|
|
|
|
|
self._embedding_function = embedding_function
|
|
|
|
|
self._text_key = text_key
|
|
|
|
|
self._namespace = namespace
|
|
|
|
|
|
|
|
|
|
def add_texts(
|
|
|
|
|
self,
|
|
|
|
|
texts: Iterable[str],
|
|
|
|
|
metadatas: Optional[List[dict]] = None,
|
|
|
|
|
ids: Optional[List[str]] = None,
|
|
|
|
|
namespace: Optional[str] = None,
|
|
|
|
|
batch_size: int = 32,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[str]:
|
|
|
|
@ -75,14 +72,11 @@ class Pinecone(VectorStore):
|
|
|
|
|
texts: Iterable of strings to add to the vectorstore.
|
|
|
|
|
metadatas: Optional list of metadatas associated with the texts.
|
|
|
|
|
ids: Optional list of ids to associate with the texts.
|
|
|
|
|
namespace: Optional pinecone namespace to add the texts to.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of ids from adding the texts into the vectorstore.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
if namespace is None:
|
|
|
|
|
namespace = self._namespace
|
|
|
|
|
# Embed and create the documents
|
|
|
|
|
docs = []
|
|
|
|
|
ids = ids or [str(uuid.uuid4()) for _ in texts]
|
|
|
|
@ -92,7 +86,7 @@ class Pinecone(VectorStore):
|
|
|
|
|
metadata[self._text_key] = text
|
|
|
|
|
docs.append((ids[i], embedding, metadata))
|
|
|
|
|
# upsert to Pinecone
|
|
|
|
|
self._index.upsert(vectors=docs, namespace=namespace, batch_size=batch_size)
|
|
|
|
|
self._index.upsert(vectors=docs, batch_size=batch_size)
|
|
|
|
|
return ids
|
|
|
|
|
|
|
|
|
|
def similarity_search_with_score(
|
|
|
|
@ -100,7 +94,6 @@ class Pinecone(VectorStore):
|
|
|
|
|
query: str,
|
|
|
|
|
k: int = 4,
|
|
|
|
|
filter: Optional[dict] = None,
|
|
|
|
|
namespace: Optional[str] = None,
|
|
|
|
|
) -> List[Tuple[Document, float]]:
|
|
|
|
|
"""Return pinecone documents most similar to query, along with scores.
|
|
|
|
|
|
|
|
|
@ -108,20 +101,16 @@ class Pinecone(VectorStore):
|
|
|
|
|
query: Text to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Dictionary of argument(s) to filter on metadata
|
|
|
|
|
namespace: Namespace to search in. Default will search in '' namespace.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of Documents most similar to the query and score for each
|
|
|
|
|
"""
|
|
|
|
|
if namespace is None:
|
|
|
|
|
namespace = self._namespace
|
|
|
|
|
query_obj = self._embedding_function(query)
|
|
|
|
|
docs = []
|
|
|
|
|
results = self._index.query(
|
|
|
|
|
[query_obj],
|
|
|
|
|
top_k=k,
|
|
|
|
|
include_metadata=True,
|
|
|
|
|
namespace=namespace,
|
|
|
|
|
filter=filter,
|
|
|
|
|
)
|
|
|
|
|
for res in results["matches"]:
|
|
|
|
@ -141,7 +130,6 @@ class Pinecone(VectorStore):
|
|
|
|
|
query: str,
|
|
|
|
|
k: int = 4,
|
|
|
|
|
filter: Optional[dict] = None,
|
|
|
|
|
namespace: Optional[str] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
"""Return pinecone documents most similar to query.
|
|
|
|
@ -150,13 +138,12 @@ class Pinecone(VectorStore):
|
|
|
|
|
query: Text to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Dictionary of argument(s) to filter on metadata
|
|
|
|
|
namespace: Namespace to search in. Default will search in '' namespace.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of Documents most similar to the query and score for each
|
|
|
|
|
"""
|
|
|
|
|
docs_and_scores = self.similarity_search_with_score(
|
|
|
|
|
query, k=k, filter=filter, namespace=namespace, **kwargs
|
|
|
|
|
query, k=k, filter=filter, **kwargs
|
|
|
|
|
)
|
|
|
|
|
return [doc for doc, _ in docs_and_scores]
|
|
|
|
|
|
|
|
|
@ -176,7 +163,6 @@ class Pinecone(VectorStore):
|
|
|
|
|
fetch_k: int = 20,
|
|
|
|
|
lambda_mult: float = 0.5,
|
|
|
|
|
filter: Optional[dict] = None,
|
|
|
|
|
namespace: Optional[str] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
"""Return docs selected using the maximal marginal relevance.
|
|
|
|
@ -195,14 +181,11 @@ class Pinecone(VectorStore):
|
|
|
|
|
Returns:
|
|
|
|
|
List of Documents selected by maximal marginal relevance.
|
|
|
|
|
"""
|
|
|
|
|
if namespace is None:
|
|
|
|
|
namespace = self._namespace
|
|
|
|
|
results = self._index.query(
|
|
|
|
|
[embedding],
|
|
|
|
|
top_k=fetch_k,
|
|
|
|
|
include_values=True,
|
|
|
|
|
include_metadata=True,
|
|
|
|
|
namespace=namespace,
|
|
|
|
|
filter=filter,
|
|
|
|
|
)
|
|
|
|
|
mmr_selected = maximal_marginal_relevance(
|
|
|
|
@ -224,7 +207,6 @@ class Pinecone(VectorStore):
|
|
|
|
|
fetch_k: int = 20,
|
|
|
|
|
lambda_mult: float = 0.5,
|
|
|
|
|
filter: Optional[dict] = None,
|
|
|
|
|
namespace: Optional[str] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
"""Return docs selected using the maximal marginal relevance.
|
|
|
|
@ -245,7 +227,7 @@ class Pinecone(VectorStore):
|
|
|
|
|
"""
|
|
|
|
|
embedding = self._embedding_function(query)
|
|
|
|
|
return self.max_marginal_relevance_search_by_vector(
|
|
|
|
|
embedding, k, fetch_k, lambda_mult, filter, namespace
|
|
|
|
|
embedding, k, fetch_k, lambda_mult, filter
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
@ -258,7 +240,6 @@ class Pinecone(VectorStore):
|
|
|
|
|
batch_size: int = 32,
|
|
|
|
|
text_key: str = "text",
|
|
|
|
|
index_name: Optional[str] = None,
|
|
|
|
|
namespace: Optional[str] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> Pinecone:
|
|
|
|
|
"""Construct Pinecone wrapper from raw documents.
|
|
|
|
@ -331,8 +312,8 @@ class Pinecone(VectorStore):
|
|
|
|
|
to_upsert = zip(ids_batch, embeds, metadata)
|
|
|
|
|
|
|
|
|
|
# upsert to Pinecone
|
|
|
|
|
index.upsert(vectors=list(to_upsert), namespace=namespace)
|
|
|
|
|
return cls(index, embedding.embed_query, text_key, namespace)
|
|
|
|
|
index.upsert(vectors=list(to_upsert))
|
|
|
|
|
return cls(index, embedding.embed_query, text_key)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def from_existing_index(
|
|
|
|
@ -340,7 +321,6 @@ class Pinecone(VectorStore):
|
|
|
|
|
index_name: str,
|
|
|
|
|
embedding: Embeddings,
|
|
|
|
|
text_key: str = "text",
|
|
|
|
|
namespace: Optional[str] = None,
|
|
|
|
|
) -> Pinecone:
|
|
|
|
|
"""Load pinecone vectorstore from index name."""
|
|
|
|
|
try:
|
|
|
|
@ -350,38 +330,21 @@ class Pinecone(VectorStore):
|
|
|
|
|
"Could not import pinecone python package. "
|
|
|
|
|
"Please install it with `pip install pinecone-client`."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return cls(
|
|
|
|
|
pinecone.Index(index_name), embedding.embed_query, text_key, namespace
|
|
|
|
|
)
|
|
|
|
|
return cls(pinecone.Index(index_name), embedding.embed_query, text_key)
|
|
|
|
|
|
|
|
|
|
def delete(
|
|
|
|
|
self,
|
|
|
|
|
ids: Optional[List[str]] = None,
|
|
|
|
|
delete_all: Optional[bool] = None,
|
|
|
|
|
namespace: Optional[str] = None,
|
|
|
|
|
filter: Optional[dict] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Delete by vector IDs or filter.
|
|
|
|
|
"""Delete by vector IDs
|
|
|
|
|
Args:
|
|
|
|
|
ids: List of ids to delete.
|
|
|
|
|
filter: Dictionary of conditions to filter vectors to delete.
|
|
|
|
|
"""
|
|
|
|
|
if ids is None:
|
|
|
|
|
raise ValueError("Ids must be provided.")
|
|
|
|
|
|
|
|
|
|
if namespace is None:
|
|
|
|
|
namespace = self._namespace
|
|
|
|
|
|
|
|
|
|
if delete_all:
|
|
|
|
|
self._index.delete(delete_all=True, namespace=namespace, **kwargs)
|
|
|
|
|
elif ids is not None:
|
|
|
|
|
chunk_size = 1000
|
|
|
|
|
for i in range(0, len(ids), chunk_size):
|
|
|
|
|
chunk = ids[i : i + chunk_size]
|
|
|
|
|
self._index.delete(ids=chunk, namespace=namespace, **kwargs)
|
|
|
|
|
elif filter is not None:
|
|
|
|
|
self._index.delete(filter=filter, namespace=namespace, **kwargs)
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("Either ids, delete_all, or filter must be provided.")
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
chunk_size = 1000
|
|
|
|
|
for i in range(0, len(ids), chunk_size):
|
|
|
|
|
chunk = ids[i : i + chunk_size]
|
|
|
|
|
self._index.delete(ids=chunk, **kwargs)
|
|
|
|
|