@ -175,6 +175,10 @@ class DocumentDBVectorSearch(VectorStore):
The maximum number of supported dimensions is 2000
The maximum number of supported dimensions is 2000
similarity : Similarity algorithm to use with the HNSW index .
similarity : Similarity algorithm to use with the HNSW index .
Possible options are :
- DocumentDBSimilarityType . COS ( cosine distance ) ,
- DocumentDBSimilarityType . EUC ( Euclidean distance ) , and
- DocumentDBSimilarityType . DOT ( dot product ) .
m : Specifies the max number of connections for an HNSW index .
m : Specifies the max number of connections for an HNSW index .
Large impact on memory consumption .
Large impact on memory consumption .
@ -183,10 +187,6 @@ class DocumentDBVectorSearch(VectorStore):
for constructing the graph for HNSW index . Higher values lead
for constructing the graph for HNSW index . Higher values lead
to more accurate results but slower indexing speed .
to more accurate results but slower indexing speed .
Possible options are :
- DocumentDBSimilarityType . COS ( cosine distance ) ,
- DocumentDBSimilarityType . EUC ( Euclidean distance ) , and
- DocumentDBSimilarityType . DOT ( dot product ) .
Returns :
Returns :
An object describing the created index
An object describing the created index
@ -309,7 +309,11 @@ class DocumentDBVectorSearch(VectorStore):
self . _collection . delete_one ( { " _id " : ObjectId ( document_id ) } )
self . _collection . delete_one ( { " _id " : ObjectId ( document_id ) } )
def _similarity_search_without_score (
def _similarity_search_without_score (
self , embeddings : List [ float ] , k : int = 4 , ef_search : int = 40
self ,
embeddings : List [ float ] ,
k : int = 4 ,
ef_search : int = 40 ,
filter : Optional [ Dict [ str , Any ] ] = None ,
) - > List [ Document ] :
) - > List [ Document ] :
""" Returns a list of documents.
""" Returns a list of documents.
@ -319,12 +323,13 @@ class DocumentDBVectorSearch(VectorStore):
ef_search : Specifies the size of the dynamic candidate list
ef_search : Specifies the size of the dynamic candidate list
that HNSW index uses during search . A higher value of
that HNSW index uses during search . A higher value of
efSearch provides better recall at cost of speed .
efSearch provides better recall at cost of speed .
filter ( Optional [ Dict [ str , str ] ] ) : Filter by metadata . Defaults to None .
Returns :
Returns :
A list of documents closest to the query vector
A list of documents closest to the query vector
"""
"""
pipeline : List [ dict [ str , Any ] ] = [
pipeline : List [ dict [ str , Any ] ] = [
{
{
" $match " : filter ,
" $search " : {
" $search " : {
" vectorSearch " : {
" vectorSearch " : {
" vector " : embeddings ,
" vector " : embeddings ,
@ -333,7 +338,7 @@ class DocumentDBVectorSearch(VectorStore):
" k " : k ,
" k " : k ,
" efSearch " : ef_search ,
" efSearch " : ef_search ,
}
}
}
} ,
}
}
]
]
@ -352,10 +357,12 @@ class DocumentDBVectorSearch(VectorStore):
query : str ,
query : str ,
k : int = 4 ,
k : int = 4 ,
ef_search : int = 40 ,
ef_search : int = 40 ,
* ,
filter : Optional [ Dict [ str , Any ] ] = None ,
* * kwargs : Any ,
* * kwargs : Any ,
) - > List [ Document ] :
) - > List [ Document ] :
embeddings = self . _embedding . embed_query ( query )
embeddings = self . _embedding . embed_query ( query )
docs = self . _similarity_search_without_score (
docs = self . _similarity_search_without_score (
embeddings = embeddings , k = k , ef_search = ef_search
embeddings = embeddings , k = k , ef_search = ef_search , filter = filter
)
)
return [ doc for doc in docs ]
return [ doc for doc in docs ]