@ -34,6 +34,10 @@ if TYPE_CHECKING:
MetadataFilter = Union [ DictFilter , common_types . Filter ]
class QdrantException ( Exception ) :
""" Base class for all the Qdrant related exceptions """
class Qdrant ( VectorStore ) :
""" Wrapper around Qdrant vector database.
@ -552,6 +556,7 @@ class Qdrant(VectorStore):
wal_config : Optional [ common_types . WalConfigDiff ] = None ,
quantization_config : Optional [ common_types . QuantizationConfig ] = None ,
init_from : Optional [ common_types . InitFrom ] = None ,
force_recreate : bool = False ,
* * kwargs : Any ,
) - > Qdrant :
""" Construct Qdrant wrapper from a list of texts.
@ -636,6 +641,8 @@ class Qdrant(VectorStore):
Params for quantization , if None - quantization will be disabled
init_from :
Use data stored in another collection to initialize this collection
force_recreate :
Force recreating the collection
* * kwargs :
Additional arguments passed directly into REST client initialization
@ -663,7 +670,9 @@ class Qdrant(VectorStore):
" Please install it with `pip install qdrant-client`. "
)
from grpc import RpcError
from qdrant_client . http import models as rest
from qdrant_client . http . exceptions import UnexpectedResponse
# Just do a single quick embedding to get vector size
partial_embeddings = embedding . embed_documents ( texts [ : 1 ] )
@ -687,62 +696,98 @@ class Qdrant(VectorStore):
* * kwargs ,
)
vectors_config = rest . VectorParams (
size = vector_size ,
distance = rest . Distance [ distance_func ] ,
)
# If vector name was provided, we're going to use the named vectors feature
# with just a single vector.
if vector_name is not None :
vectors_config = { # type: ignore[assignment]
vector_name : vectors_config ,
}
try :
# Skip any validation in case of forced collection recreate.
if force_recreate :
raise ValueError
# Get the vector configuration of the existing collection and vector, if it
# was specified. If the old configuration does not match the current one,
# an exception is being thrown.
collection_info = client . get_collection ( collection_name = collection_name )
current_vector_config = collection_info . config . params . vectors
if isinstance ( current_vector_config , dict ) and vector_name is not None :
if vector_name not in current_vector_config :
raise QdrantException (
f " Existing Qdrant collection { collection_name } does not "
f " contain vector named { vector_name } . Did you mean one of the "
f " existing vectors: { ' , ' . join ( current_vector_config . keys ( ) ) } ? "
f " If you want to recreate the collection, set `force_recreate` "
f " parameter to `True`. "
)
current_vector_config = current_vector_config . get (
vector_name
) # type: ignore[assignment]
elif isinstance ( current_vector_config , dict ) and vector_name is None :
raise QdrantException (
f " Existing Qdrant collection { collection_name } uses named vectors. "
f " If you want to reuse it, please set `vector_name` to any of the "
f " existing named vectors: "
f " { ' , ' . join ( current_vector_config . keys ( ) ) } . " # noqa
f " If you want to recreate the collection, set `force_recreate` "
f " parameter to `True`. "
)
elif (
not isinstance ( current_vector_config , dict ) and vector_name is not None
) :
raise QdrantException (
f " Existing Qdrant collection { collection_name } doesn ' t use named "
f " vectors. If you want to reuse it, please set `vector_name` to "
f " `None`. If you want to recreate the collection, set "
f " `force_recreate` parameter to `True`. "
)
client . recreate_collection (
collection_name = collection_name ,
vectors_config = vectors_config ,
shard_number = shard_number ,
replication_factor = replication_factor ,
write_consistency_factor = write_consistency_factor ,
on_disk_payload = on_disk_payload ,
hnsw_config = hnsw_config ,
optimizers_config = optimizers_config ,
wal_config = wal_config ,
quantization_config = quantization_config ,
init_from = init_from ,
timeout = timeout , # type: ignore[arg-type]
)
# Check if the vector configuration has the same dimensionality.
if current_vector_config . size != vector_size : # type: ignore[union-attr]
raise QdrantException (
f " Existing Qdrant collection is configured for vectors with "
f " { current_vector_config . size } " # type: ignore[union-attr]
f " dimensions. Selected embeddings are { vector_size } -dimensional. "
f " If you want to recreate the collection, set `force_recreate` "
f " parameter to `True`. "
)
texts_iterator = iter ( texts )
metadatas_iterator = iter ( metadatas or [ ] )
ids_iterator = iter ( ids or [ uuid . uuid4 ( ) . hex for _ in iter ( texts ) ] )
while batch_texts := list ( islice ( texts_iterator , batch_size ) ) :
# Take the corresponding metadata and id for each text in a batch
batch_metadatas = list ( islice ( metadatas_iterator , batch_size ) ) or None
batch_ids = list ( islice ( ids_iterator , batch_size ) )
current_distance_func = (
current_vector_config . distance . name . upper ( ) # type: ignore[union-attr]
)
if current_distance_func != distance_func :
raise QdrantException (
f " Existing Qdrant collection is configured for "
f " { current_vector_config . distance } " # type: ignore[union-attr]
f " similarity. Please set `distance_func` parameter to "
f " ` { distance_func } ` if you want to reuse it. If you want to "
f " recreate the collection, set `force_recreate` parameter to "
f " `True`. "
)
except ( UnexpectedResponse , RpcError , ValueError ) :
vectors_config = rest . VectorParams (
size = vector_size ,
distance = rest . Distance [ distance_func ] ,
)
# Generate the embeddings for all the texts in a batch
batch_embeddings = embedding . embed_documents ( batch_texts )
# If vector name was provided, we're going to use the named vectors feature
# with just a single vector.
if vector_name is not None :
batch_embeddings = { # type: ignore[assignment]
vector_name : batch_embeddings
vectors_config = { # type: ignore[assignment]
vector_name : vectors_config,
}
points = rest . Batch . construct (
ids = batch_ids ,
vectors = batch_embeddings ,
payloads = cls . _build_payloads (
batch_texts ,
batch_metadatas ,
content_payload_key ,
metadata_payload_key ,
) ,
client . recreate_collection (
collection_name = collection_name ,
vectors_config = vectors_config ,
shard_number = shard_number ,
replication_factor = replication_factor ,
write_consistency_factor = write_consistency_factor ,
on_disk_payload = on_disk_payload ,
hnsw_config = hnsw_config ,
optimizers_config = optimizers_config ,
wal_config = wal_config ,
quantization_config = quantization_config ,
init_from = init_from ,
timeout = timeout , # type: ignore[arg-type]
)
client . upsert ( collection_name = collection_name , points = points )
return cls (
qdrant = cls (
client = client ,
collection_name = collection_name ,
embeddings = embedding ,
@ -751,6 +796,8 @@ class Qdrant(VectorStore):
distance_strategy = distance_func ,
vector_name = vector_name ,
)
qdrant . add_texts ( texts , metadatas , ids , batch_size )
return qdrant
@classmethod
def _build_payloads (