@ -3,7 +3,6 @@ from __future__ import annotations
import uuid
import uuid
import warnings
import warnings
from hashlib import md5
from itertools import islice
from itertools import islice
from operator import itemgetter
from operator import itemgetter
from typing import (
from typing import (
@ -14,6 +13,7 @@ from typing import (
Iterable ,
Iterable ,
List ,
List ,
Optional ,
Optional ,
Sequence ,
Tuple ,
Tuple ,
Type ,
Type ,
Union ,
Union ,
@ -109,57 +109,11 @@ class Qdrant(VectorStore):
self . _embeddings_function = embeddings
self . _embeddings_function = embeddings
self . embeddings = None
self . embeddings = None
def _embed_query ( self , query : str ) - > List [ float ] :
""" Embed query text.
Used to provide backward compatibility with ` embedding_function ` argument .
Args :
query : Query text .
Returns :
List of floats representing the query embedding .
"""
if self . embeddings is not None :
embedding = self . embeddings . embed_query ( query )
else :
if self . _embeddings_function is not None :
embedding = self . _embeddings_function ( query )
else :
raise ValueError ( " Neither of embeddings or embedding_function is set " )
return embedding . tolist ( ) if hasattr ( embedding , " tolist " ) else embedding
def _embed_texts ( self , texts : Iterable [ str ] ) - > List [ List [ float ] ] :
""" Embed search texts.
Used to provide backward compatibility with ` embedding_function ` argument .
Args :
texts : Iterable of texts to embed .
Returns :
List of floats representing the texts embedding .
"""
if self . embeddings is not None :
embeddings = self . embeddings . embed_documents ( list ( texts ) )
if hasattr ( embeddings , " tolist " ) :
embeddings = embeddings . tolist ( )
elif self . _embeddings_function is not None :
embeddings = [ ]
for text in texts :
embedding = self . _embeddings_function ( text )
if hasattr ( embeddings , " tolist " ) :
embedding = embedding . tolist ( )
embeddings . append ( embedding )
else :
raise ValueError ( " Neither of embeddings or embedding_function is set " )
return embeddings
def add_texts (
def add_texts (
self ,
self ,
texts : Iterable [ str ] ,
texts : Iterable [ str ] ,
metadatas : Optional [ List [ dict ] ] = None ,
metadatas : Optional [ List [ dict ] ] = None ,
ids : Optional [ Sequence [ str ] ] = None ,
batch_size : int = 64 ,
batch_size : int = 64 ,
* * kwargs : Any ,
* * kwargs : Any ,
) - > List [ str ] :
) - > List [ str ] :
@ -168,20 +122,26 @@ class Qdrant(VectorStore):
Args :
Args :
texts : Iterable of strings to add to the vectorstore .
texts : Iterable of strings to add to the vectorstore .
metadatas : Optional list of metadatas associated with the texts .
metadatas : Optional list of metadatas associated with the texts .
ids :
Optional list of ids to associate with the texts . Ids have to be
uuid - like strings .
batch_size :
How many vectors upload per - request .
Default : 64
Returns :
Returns :
List of ids from adding the texts into the vectorstore .
List of ids from adding the texts into the vectorstore .
"""
"""
from qdrant_client . http import models as rest
from qdrant_client . http import models as rest
ids = [ ]
added_ ids = [ ]
texts_iterator = iter ( texts )
texts_iterator = iter ( texts )
metadatas_iterator = iter ( metadatas or [ ] )
metadatas_iterator = iter ( metadatas or [ ] )
ids_iterator = iter ( ids or [ uuid . uuid4 ( ) . hex for _ in iter ( texts ) ] )
while batch_texts := list ( islice ( texts_iterator , batch_size ) ) :
while batch_texts := list ( islice ( texts_iterator , batch_size ) ) :
# Take the corresponding metadata for each text in a batch
# Take the corresponding metadata and id for each text in a batch
batch_metadatas = list ( islice ( metadatas_iterator , batch_size ) ) or None
batch_metadatas = list ( islice ( metadatas_iterator , batch_size ) ) or None
batch_ids = list ( islice ( ids_iterator , batch_size ) )
batch_ids = [ md5 ( text . encode ( " utf-8 " ) ) . hexdigest ( ) for text in batch_texts ]
self . client . upsert (
self . client . upsert (
collection_name = self . collection_name ,
collection_name = self . collection_name ,
@ -197,9 +157,9 @@ class Qdrant(VectorStore):
) ,
) ,
)
)
ids. extend ( batch_ids )
added_ ids. extend ( batch_ids )
return ids
return added_ ids
def similarity_search (
def similarity_search (
self ,
self ,
@ -313,6 +273,7 @@ class Qdrant(VectorStore):
texts : List [ str ] ,
texts : List [ str ] ,
embedding : Embeddings ,
embedding : Embeddings ,
metadatas : Optional [ List [ dict ] ] = None ,
metadatas : Optional [ List [ dict ] ] = None ,
ids : Optional [ Sequence [ str ] ] = None ,
location : Optional [ str ] = None ,
location : Optional [ str ] = None ,
url : Optional [ str ] = None ,
url : Optional [ str ] = None ,
port : Optional [ int ] = 6333 ,
port : Optional [ int ] = 6333 ,
@ -339,6 +300,9 @@ class Qdrant(VectorStore):
metadatas :
metadatas :
An optional list of metadata . If provided it has to be of the same
An optional list of metadata . If provided it has to be of the same
length as a list of texts .
length as a list of texts .
ids :
Optional list of ids to associate with the texts . Ids have to be
uuid - like strings .
location :
location :
If ` : memory : ` - use in - memory Qdrant instance .
If ` : memory : ` - use in - memory Qdrant instance .
If ` str ` - use it as a ` url ` parameter .
If ` str ` - use it as a ` url ` parameter .
@ -378,6 +342,9 @@ class Qdrant(VectorStore):
metadata_payload_key :
metadata_payload_key :
A payload key used to store the metadata of the document .
A payload key used to store the metadata of the document .
Default : " metadata "
Default : " metadata "
batch_size :
How many vectors upload per - request .
Default : 64
* * kwargs :
* * kwargs :
Additional arguments passed directly into REST client initialization
Additional arguments passed directly into REST client initialization
@ -439,9 +406,11 @@ class Qdrant(VectorStore):
texts_iterator = iter ( texts )
texts_iterator = iter ( texts )
metadatas_iterator = iter ( metadatas or [ ] )
metadatas_iterator = iter ( metadatas or [ ] )
ids_iterator = iter ( ids or [ uuid . uuid4 ( ) . hex for _ in iter ( texts ) ] )
while batch_texts := list ( islice ( texts_iterator , batch_size ) ) :
while batch_texts := list ( islice ( texts_iterator , batch_size ) ) :
# Take the corresponding metadata for each text in a batch
# Take the corresponding metadata and id for each text in a batch
batch_metadatas = list ( islice ( metadatas_iterator , batch_size ) ) or None
batch_metadatas = list ( islice ( metadatas_iterator , batch_size ) ) or None
batch_ids = list ( islice ( ids_iterator , batch_size ) )
# Generate the embeddings for all the texts in a batch
# Generate the embeddings for all the texts in a batch
batch_embeddings = embedding . embed_documents ( batch_texts )
batch_embeddings = embedding . embed_documents ( batch_texts )
@ -449,7 +418,7 @@ class Qdrant(VectorStore):
client . upsert (
client . upsert (
collection_name = collection_name ,
collection_name = collection_name ,
points = rest . Batch . construct (
points = rest . Batch . construct (
ids = [ md5 ( text . encode ( " utf-8 " ) ) . hexdigest ( ) for text in batch_texts ] ,
ids = batch_ids ,
vectors = batch_embeddings ,
vectors = batch_embeddings ,
payloads = cls . _build_payloads (
payloads = cls . _build_payloads (
batch_texts ,
batch_texts ,
@ -544,3 +513,50 @@ class Qdrant(VectorStore):
for condition in self . _build_condition ( key , value )
for condition in self . _build_condition ( key , value )
]
]
)
)
def _embed_query ( self , query : str ) - > List [ float ] :
""" Embed query text.
Used to provide backward compatibility with ` embedding_function ` argument .
Args :
query : Query text .
Returns :
List of floats representing the query embedding .
"""
if self . embeddings is not None :
embedding = self . embeddings . embed_query ( query )
else :
if self . _embeddings_function is not None :
embedding = self . _embeddings_function ( query )
else :
raise ValueError ( " Neither of embeddings or embedding_function is set " )
return embedding . tolist ( ) if hasattr ( embedding , " tolist " ) else embedding
def _embed_texts ( self , texts : Iterable [ str ] ) - > List [ List [ float ] ] :
""" Embed search texts.
Used to provide backward compatibility with ` embedding_function ` argument .
Args :
texts : Iterable of texts to embed .
Returns :
List of floats representing the texts embedding .
"""
if self . embeddings is not None :
embeddings = self . embeddings . embed_documents ( list ( texts ) )
if hasattr ( embeddings , " tolist " ) :
embeddings = embeddings . tolist ( )
elif self . _embeddings_function is not None :
embeddings = [ ]
for text in texts :
embedding = self . _embeddings_function ( text )
if hasattr ( embeddings , " tolist " ) :
embedding = embedding . tolist ( )
embeddings . append ( embedding )
else :
raise ValueError ( " Neither of embeddings or embedding_function is set " )
return embeddings