@ -1,6 +1,7 @@
""" Wrapper around SingleStore DB. """
""" Wrapper around SingleStore DB. """
from __future__ import annotations
from __future__ import annotations
import enum
import json
import json
from typing import (
from typing import (
Any ,
Any ,
@ -20,6 +21,19 @@ from langchain.embeddings.base import Embeddings
from langchain . vectorstores . base import VectorStore , VectorStoreRetriever
from langchain . vectorstores . base import VectorStore , VectorStoreRetriever
class DistanceStrategy ( str , enum . Enum ) :
EUCLIDEAN_DISTANCE = " EUCLIDEAN_DISTANCE "
DOT_PRODUCT = " DOT_PRODUCT "
DEFAULT_DISTANCE_STRATEGY = DistanceStrategy . DOT_PRODUCT
ORDERING_DIRECTIVE : dict = {
DistanceStrategy . EUCLIDEAN_DISTANCE : " " ,
DistanceStrategy . DOT_PRODUCT : " DESC " ,
}
class SingleStoreDB ( VectorStore ) :
class SingleStoreDB ( VectorStore ) :
"""
"""
This class serves as a Pythonic interface to the SingleStore DB database .
This class serves as a Pythonic interface to the SingleStore DB database .
@ -45,6 +59,7 @@ class SingleStoreDB(VectorStore):
self ,
self ,
embedding : Embeddings ,
embedding : Embeddings ,
* ,
* ,
distance_strategy : DistanceStrategy = DEFAULT_DISTANCE_STRATEGY ,
table_name : str = " embeddings " ,
table_name : str = " embeddings " ,
content_field : str = " content " ,
content_field : str = " content " ,
metadata_field : str = " metadata " ,
metadata_field : str = " metadata " ,
@ -59,6 +74,18 @@ class SingleStoreDB(VectorStore):
Args :
Args :
embedding ( Embeddings ) : A text embedding model .
embedding ( Embeddings ) : A text embedding model .
distance_strategy ( DistanceStrategy , optional ) :
Determines the strategy employed for calculating
the distance between vectors in the embedding space .
Defaults to DOT_PRODUCT .
Available options are :
- DOT_PRODUCT : Computes the scalar product of two vectors .
This is the default behavior
- EUCLIDEAN_DISTANCE : Computes the Euclidean distance between
two vectors . This metric considers the geometric distance in
the vector space , and might be more suitable for embeddings
that rely on spatial relationships .
table_name ( str , optional ) : Specifies the name of the table in use .
table_name ( str , optional ) : Specifies the name of the table in use .
Defaults to " embeddings " .
Defaults to " embeddings " .
content_field ( str , optional ) : Specifies the field to store the content .
content_field ( str , optional ) : Specifies the field to store the content .
@ -137,6 +164,7 @@ class SingleStoreDB(VectorStore):
vectorstore = SingleStoreDB (
vectorstore = SingleStoreDB (
OpenAIEmbeddings ( ) ,
OpenAIEmbeddings ( ) ,
distance_strategy = DistanceStrategy . EUCLIDEAN_DISTANCE ,
host = " 127.0.0.1 " ,
host = " 127.0.0.1 " ,
port = 3306 ,
port = 3306 ,
user = " user " ,
user = " user " ,
@ -159,6 +187,7 @@ class SingleStoreDB(VectorStore):
"""
"""
self . embedding = embedding
self . embedding = embedding
self . distance_strategy = distance_strategy
self . table_name = table_name
self . table_name = table_name
self . content_field = content_field
self . content_field = content_field
self . metadata_field = metadata_field
self . metadata_field = metadata_field
@ -167,6 +196,17 @@ class SingleStoreDB(VectorStore):
""" Pass the rest of the kwargs to the connection. """
""" Pass the rest of the kwargs to the connection. """
self . connection_kwargs = kwargs
self . connection_kwargs = kwargs
""" Add program name and version to connection attributes. """
if " conn_attrs " not in self . connection_kwargs :
self . connection_kwargs [ " conn_attrs " ] = dict ( )
if " program_name " not in self . connection_kwargs [ " conn_attrs " ] :
self . connection_kwargs [ " conn_attrs " ] [
" program_name "
] = " langchain python sdk "
self . connection_kwargs [ " conn_attrs " ] [
" program_version "
] = " 0.0.205 " # the version of SingleStoreDB VectorStore implementation
""" Create connection pool. """
""" Create connection pool. """
self . connection_pool = QueuePool (
self . connection_pool = QueuePool (
self . _get_connection ,
self . _get_connection ,
@ -246,7 +286,7 @@ class SingleStoreDB(VectorStore):
return [ ]
return [ ]
def similarity_search (
def similarity_search (
self , query : str , k : int = 4 , * * kwargs : Any
self , query : str , k : int = 4 , filter : Optional [ dict ] = None , * * kwargs : Any
) - > List [ Document ] :
) - > List [ Document ] :
""" Returns the most similar indexed documents to the query text.
""" Returns the most similar indexed documents to the query text.
@ -255,21 +295,38 @@ class SingleStoreDB(VectorStore):
Args :
Args :
query ( str ) : The query text for which to find similar documents .
query ( str ) : The query text for which to find similar documents .
k ( int ) : The number of documents to return . Default is 4.
k ( int ) : The number of documents to return . Default is 4.
filter ( dict ) : A dictionary of metadata fields and values to filter by .
Returns :
Returns :
List [ Document ] : A list of documents that are most similar to the query text .
List [ Document ] : A list of documents that are most similar to the query text .
Examples :
. . code - block : : python
from langchain . vectorstores import SingleStoreDB
from langchain . embeddings import OpenAIEmbeddings
s2 = SingleStoreDB . from_documents (
docs ,
OpenAIEmbeddings ( ) ,
host = " username:password@localhost:3306/database "
)
s2 . similarity_search ( " query text " , 1 ,
{ " metadata_field " : " metadata_value " } )
"""
"""
docs_and_scores = self . similarity_search_with_score ( query , k = k )
docs_and_scores = self . similarity_search_with_score (
query = query , k = k , filter = filter
)
return [ doc for doc , _ in docs_and_scores ]
return [ doc for doc , _ in docs_and_scores ]
def similarity_search_with_score (
def similarity_search_with_score (
self , query : str , k : int = 4
self , query : str , k : int = 4 , filter : Optional [ dict ] = None
) - > List [ Tuple [ Document , float ] ] :
) - > List [ Tuple [ Document , float ] ] :
""" Return docs most similar to query. Uses cosine similarity.
""" Return docs most similar to query. Uses cosine similarity.
Args :
Args :
query : Text to look up documents similar to .
query : Text to look up documents similar to .
k : Number of Documents to return . Defaults to 4.
k : Number of Documents to return . Defaults to 4.
filter : A dictionary of metadata fields and values to filter by .
Defaults to None .
Returns :
Returns :
List of Documents most similar to the query and score for each
List of Documents most similar to the query and score for each
@ -278,21 +335,52 @@ class SingleStoreDB(VectorStore):
embedding = self . embedding . embed_query ( query )
embedding = self . embedding . embed_query ( query )
conn = self . connection_pool . connect ( )
conn = self . connection_pool . connect ( )
result = [ ]
result = [ ]
where_clause : str = " "
where_clause_values : List [ Any ] = [ ]
if filter :
where_clause = " WHERE "
arguments = [ ]
def build_where_clause (
where_clause_values : List [ Any ] ,
sub_filter : dict ,
prefix_args : List [ str ] = [ ] ,
) - > None :
for key in sub_filter . keys ( ) :
if isinstance ( sub_filter [ key ] , dict ) :
build_where_clause (
where_clause_values , sub_filter [ key ] , prefix_args + [ key ]
)
else :
arguments . append (
" JSON_EXTRACT_JSON( {} , {} ) = %s " . format (
self . metadata_field ,
" , " . join ( [ " %s " ] * ( len ( prefix_args ) + 1 ) ) ,
)
)
where_clause_values + = prefix_args + [ key ]
where_clause_values . append ( json . dumps ( sub_filter [ key ] ) )
build_where_clause ( where_clause_values , filter )
where_clause + = " AND " . join ( arguments )
try :
try :
cur = conn . cursor ( )
cur = conn . cursor ( )
try :
try :
cur . execute (
cur . execute (
""" SELECT {} , {} , DOT_PRODUCT( {} , JSON_ARRAY_PACK( %s )) as __score
""" SELECT {} , {} , {} ({} , JSON_ARRAY_PACK( %s )) as __score
FROM { } ORDER BY __score DESC LIMIT % s """ .format(
FROM { } { } ORDER BY __score { } LIMIT % s """ .format(
self . content_field ,
self . content_field ,
self . metadata_field ,
self . metadata_field ,
self . distance_strategy ,
self . vector_field ,
self . vector_field ,
self . table_name ,
self . table_name ,
where_clause ,
ORDERING_DIRECTIVE [ self . distance_strategy ] ,
) ,
) ,
(
( " [ {} ] " . format ( " , " . join ( map ( str , embedding ) ) ) , )
" [ {} ] " . format ( " , " . join ( map ( str , embedding ) ) ) ,
+ tuple ( where_clause_values )
k ,
+ ( k , ) ,
) ,
)
)
for row in cur . fetchall ( ) :
for row in cur . fetchall ( ) :
@ -310,6 +398,7 @@ class SingleStoreDB(VectorStore):
texts : List [ str ] ,
texts : List [ str ] ,
embedding : Embeddings ,
embedding : Embeddings ,
metadatas : Optional [ List [ dict ] ] = None ,
metadatas : Optional [ List [ dict ] ] = None ,
distance_strategy : DistanceStrategy = DEFAULT_DISTANCE_STRATEGY ,
table_name : str = " embeddings " ,
table_name : str = " embeddings " ,
content_field : str = " content " ,
content_field : str = " content " ,
metadata_field : str = " metadata " ,
metadata_field : str = " metadata " ,
@ -338,6 +427,7 @@ class SingleStoreDB(VectorStore):
instance = cls (
instance = cls (
embedding ,
embedding ,
distance_strategy = distance_strategy ,
table_name = table_name ,
table_name = table_name ,
content_field = content_field ,
content_field = content_field ,
metadata_field = metadata_field ,
metadata_field = metadata_field ,