|
|
|
@ -33,35 +33,6 @@ CVST = TypeVar("CVST", bound="Cassandra")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Cassandra(VectorStore):
|
|
|
|
|
"""Apache Cassandra(R) for vector-store workloads.
|
|
|
|
|
|
|
|
|
|
To use it, you need a recent installation of the `cassio` library
|
|
|
|
|
and a Cassandra cluster / Astra DB instance supporting vector capabilities.
|
|
|
|
|
|
|
|
|
|
Visit the cassio.org website for extensive quickstarts and code examples.
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
from langchain_community.vectorstores import Cassandra
|
|
|
|
|
from langchain_community.embeddings.openai import OpenAIEmbeddings
|
|
|
|
|
|
|
|
|
|
embeddings = OpenAIEmbeddings()
|
|
|
|
|
session = ... # create your Cassandra session object
|
|
|
|
|
keyspace = 'my_keyspace' # the keyspace should exist already
|
|
|
|
|
table_name = 'my_vector_store'
|
|
|
|
|
vectorstore = Cassandra(embeddings, session, keyspace, table_name)
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
embedding: Embedding function to use.
|
|
|
|
|
session: Cassandra driver session. If not provided, it is resolved from cassio.
|
|
|
|
|
keyspace: Cassandra key space. If not provided, it is resolved from cassio.
|
|
|
|
|
table_name: Cassandra table (required).
|
|
|
|
|
ttl_seconds: Optional time-to-live for the added texts.
|
|
|
|
|
body_index_options: Optional options used to create the body index.
|
|
|
|
|
Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER]
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
_embedding_dimension: Union[int, None]
|
|
|
|
|
|
|
|
|
|
def _get_embedding_dimension(self) -> int:
|
|
|
|
@ -89,6 +60,37 @@ class Cassandra(VectorStore):
|
|
|
|
|
body_index_options: Optional[List[Tuple[str, Any]]] = None,
|
|
|
|
|
setup_mode: SetupMode = SetupMode.SYNC,
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Apache Cassandra(R) for vector-store workloads.
|
|
|
|
|
|
|
|
|
|
To use it, you need a recent installation of the `cassio` library
|
|
|
|
|
and a Cassandra cluster / Astra DB instance supporting vector capabilities.
|
|
|
|
|
|
|
|
|
|
Visit the cassio.org website for extensive quickstarts and code examples.
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
from langchain_community.vectorstores import Cassandra
|
|
|
|
|
from langchain_openai import OpenAIEmbeddings
|
|
|
|
|
|
|
|
|
|
embeddings = OpenAIEmbeddings()
|
|
|
|
|
session = ... # create your Cassandra session object
|
|
|
|
|
keyspace = 'my_keyspace' # the keyspace should exist already
|
|
|
|
|
table_name = 'my_vector_store'
|
|
|
|
|
vectorstore = Cassandra(embeddings, session, keyspace, table_name)
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
embedding: Embedding function to use.
|
|
|
|
|
session: Cassandra driver session. If not provided, it is resolved from
|
|
|
|
|
cassio.
|
|
|
|
|
keyspace: Cassandra key space. If not provided, it is resolved from cassio.
|
|
|
|
|
table_name: Cassandra table (required).
|
|
|
|
|
ttl_seconds: Optional time-to-live for the added texts.
|
|
|
|
|
body_index_options: Optional options used to create the body index.
|
|
|
|
|
Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER]
|
|
|
|
|
setup_mode: mode used to create the Cassandra table (SYNC,
|
|
|
|
|
ASYNC or OFF).
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
from cassio.table import MetadataVectorCassandraTable
|
|
|
|
|
except (ImportError, ModuleNotFoundError):
|
|
|
|
@ -164,9 +166,19 @@ class Cassandra(VectorStore):
|
|
|
|
|
await self.table.aclear()
|
|
|
|
|
|
|
|
|
|
def delete_by_document_id(self, document_id: str) -> None:
|
|
|
|
|
"""Delete by document ID.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
document_id: the document ID to delete.
|
|
|
|
|
"""
|
|
|
|
|
return self.table.delete(row_id=document_id)
|
|
|
|
|
|
|
|
|
|
async def adelete_by_document_id(self, document_id: str) -> None:
|
|
|
|
|
"""Delete by document ID.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
document_id: the document ID to delete.
|
|
|
|
|
"""
|
|
|
|
|
return await self.table.adelete(row_id=document_id)
|
|
|
|
|
|
|
|
|
|
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
|
|
|
|
@ -370,8 +382,8 @@ class Cassandra(VectorStore):
|
|
|
|
|
"""Return docs most similar to embedding vector.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
embedding (str): Embedding to look up documents similar to.
|
|
|
|
|
k (int): Number of Documents to return. Defaults to 4.
|
|
|
|
|
embedding: Embedding to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
@ -399,6 +411,17 @@ class Cassandra(VectorStore):
|
|
|
|
|
filter: Optional[Dict[str, str]] = None,
|
|
|
|
|
body_search: Optional[Union[str, List[str]]] = None,
|
|
|
|
|
) -> List[Tuple[Document, float, str]]:
|
|
|
|
|
"""Return docs most similar to query.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: Text to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
|
Returns:
|
|
|
|
|
List of (Document, score, id), the most similar to the query vector.
|
|
|
|
|
"""
|
|
|
|
|
embedding_vector = self.embedding.embed_query(query)
|
|
|
|
|
return self.similarity_search_with_score_id_by_vector(
|
|
|
|
|
embedding=embedding_vector,
|
|
|
|
@ -414,6 +437,17 @@ class Cassandra(VectorStore):
|
|
|
|
|
filter: Optional[Dict[str, str]] = None,
|
|
|
|
|
body_search: Optional[Union[str, List[str]]] = None,
|
|
|
|
|
) -> List[Tuple[Document, float, str]]:
|
|
|
|
|
"""Return docs most similar to query.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: Text to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
|
Returns:
|
|
|
|
|
List of (Document, score, id), the most similar to the query vector.
|
|
|
|
|
"""
|
|
|
|
|
embedding_vector = await self.embedding.aembed_query(query)
|
|
|
|
|
return await self.asimilarity_search_with_score_id_by_vector(
|
|
|
|
|
embedding=embedding_vector,
|
|
|
|
@ -461,8 +495,8 @@ class Cassandra(VectorStore):
|
|
|
|
|
"""Return docs most similar to embedding vector.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
embedding (str): Embedding to look up documents similar to.
|
|
|
|
|
k (int): Number of Documents to return. Defaults to 4.
|
|
|
|
|
embedding: Embedding to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
@ -491,6 +525,17 @@ class Cassandra(VectorStore):
|
|
|
|
|
body_search: Optional[Union[str, List[str]]] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
"""Return docs most similar to query.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: Text to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
|
Returns:
|
|
|
|
|
List of Document, the most similar to the query vector.
|
|
|
|
|
"""
|
|
|
|
|
embedding_vector = self.embedding.embed_query(query)
|
|
|
|
|
return self.similarity_search_by_vector(
|
|
|
|
|
embedding_vector,
|
|
|
|
@ -507,6 +552,17 @@ class Cassandra(VectorStore):
|
|
|
|
|
body_search: Optional[Union[str, List[str]]] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
"""Return docs most similar to query.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: Text to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
|
Returns:
|
|
|
|
|
List of Document, the most similar to the query vector.
|
|
|
|
|
"""
|
|
|
|
|
embedding_vector = await self.embedding.aembed_query(query)
|
|
|
|
|
return await self.asimilarity_search_by_vector(
|
|
|
|
|
embedding_vector,
|
|
|
|
@ -523,6 +579,17 @@ class Cassandra(VectorStore):
|
|
|
|
|
body_search: Optional[Union[str, List[str]]] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
"""Return docs most similar to embedding vector.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
embedding: Embedding to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
|
Returns:
|
|
|
|
|
List of Document, the most similar to the query vector.
|
|
|
|
|
"""
|
|
|
|
|
return [
|
|
|
|
|
doc
|
|
|
|
|
for doc, _ in self.similarity_search_with_score_by_vector(
|
|
|
|
@ -541,6 +608,17 @@ class Cassandra(VectorStore):
|
|
|
|
|
body_search: Optional[Union[str, List[str]]] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
"""Return docs most similar to embedding vector.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
embedding: Embedding to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
|
Returns:
|
|
|
|
|
List of Document, the most similar to the query vector.
|
|
|
|
|
"""
|
|
|
|
|
return [
|
|
|
|
|
doc
|
|
|
|
|
for doc, _ in await self.asimilarity_search_with_score_by_vector(
|
|
|
|
@ -558,6 +636,17 @@ class Cassandra(VectorStore):
|
|
|
|
|
filter: Optional[Dict[str, str]] = None,
|
|
|
|
|
body_search: Optional[Union[str, List[str]]] = None,
|
|
|
|
|
) -> List[Tuple[Document, float]]:
|
|
|
|
|
"""Return docs most similar to query.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: Text to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
|
Returns:
|
|
|
|
|
List of (Document, score), the most similar to the query vector.
|
|
|
|
|
"""
|
|
|
|
|
embedding_vector = self.embedding.embed_query(query)
|
|
|
|
|
return self.similarity_search_with_score_by_vector(
|
|
|
|
|
embedding_vector,
|
|
|
|
@ -573,6 +662,17 @@ class Cassandra(VectorStore):
|
|
|
|
|
filter: Optional[Dict[str, str]] = None,
|
|
|
|
|
body_search: Optional[Union[str, List[str]]] = None,
|
|
|
|
|
) -> List[Tuple[Document, float]]:
|
|
|
|
|
"""Return docs most similar to query.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: Text to look up documents similar to.
|
|
|
|
|
k: Number of Documents to return. Defaults to 4.
|
|
|
|
|
filter: Filter on the metadata to apply.
|
|
|
|
|
body_search: Document textual search terms to apply.
|
|
|
|
|
Only supported by Astra DB at the moment.
|
|
|
|
|
Returns:
|
|
|
|
|
List of (Document, score), the most similar to the query vector.
|
|
|
|
|
"""
|
|
|
|
|
embedding_vector = await self.embedding.aembed_query(query)
|
|
|
|
|
return await self.asimilarity_search_with_score_by_vector(
|
|
|
|
|
embedding_vector,
|
|
|
|
|