@ -1,6 +1,7 @@
""" Wrapper around FAISS vector database. """
""" Wrapper around FAISS vector database. """
from __future__ import annotations
from __future__ import annotations
import math
import pickle
import pickle
import uuid
import uuid
from pathlib import Path
from pathlib import Path
@ -29,6 +30,20 @@ def dependable_faiss_import() -> Any:
return faiss
return faiss
def _default_relevance_score_fn ( score : float ) - > float :
""" Return a similarity score on a scale [0, 1]. """
# The 'correct' relevance function
# may differ depending on a few things, including:
# - the distance / similarity metric used by the VectorStore
# - the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
# - embedding dimensionality
# - etc.
# This function converts the euclidean norm of normalized embeddings
# (0 is most similar, sqrt(2) most dissimilar)
# to a similarity function (0 to 1)
return 1.0 - score / math . sqrt ( 2 )
class FAISS ( VectorStore ) :
class FAISS ( VectorStore ) :
""" Wrapper around FAISS vector database.
""" Wrapper around FAISS vector database.
@ -48,12 +63,16 @@ class FAISS(VectorStore):
index : Any ,
index : Any ,
docstore : Docstore ,
docstore : Docstore ,
index_to_docstore_id : Dict [ int , str ] ,
index_to_docstore_id : Dict [ int , str ] ,
relevance_score_fn : Optional [
Callable [ [ float ] , float ]
] = _default_relevance_score_fn ,
) :
) :
""" Initialize with necessary components. """
""" Initialize with necessary components. """
self . embedding_function = embedding_function
self . embedding_function = embedding_function
self . index = index
self . index = index
self . docstore = docstore
self . docstore = docstore
self . index_to_docstore_id = index_to_docstore_id
self . index_to_docstore_id = index_to_docstore_id
self . relevance_score_fn = relevance_score_fn
def __add (
def __add (
self ,
self ,
@ -318,7 +337,7 @@ class FAISS(VectorStore):
docstore = InMemoryDocstore (
docstore = InMemoryDocstore (
{ index_to_id [ i ] : doc for i , doc in enumerate ( documents ) }
{ index_to_id [ i ] : doc for i , doc in enumerate ( documents ) }
)
)
return cls ( embedding . embed_query , index , docstore , index_to_id )
return cls ( embedding . embed_query , index , docstore , index_to_id , * * kwargs )
@classmethod
@classmethod
def from_texts (
def from_texts (
@ -346,7 +365,13 @@ class FAISS(VectorStore):
faiss = FAISS . from_texts ( texts , embeddings )
faiss = FAISS . from_texts ( texts , embeddings )
"""
"""
embeddings = embedding . embed_documents ( texts )
embeddings = embedding . embed_documents ( texts )
return cls . __from ( texts , embeddings , embedding , metadatas , * * kwargs )
return cls . __from (
texts ,
embeddings ,
embedding ,
metadatas ,
* * kwargs ,
)
@classmethod
@classmethod
def from_embeddings (
def from_embeddings (
@ -375,7 +400,13 @@ class FAISS(VectorStore):
"""
"""
texts = [ t [ 0 ] for t in text_embeddings ]
texts = [ t [ 0 ] for t in text_embeddings ]
embeddings = [ t [ 1 ] for t in text_embeddings ]
embeddings = [ t [ 1 ] for t in text_embeddings ]
return cls . __from ( texts , embeddings , embedding , metadatas , * * kwargs )
return cls . __from (
texts ,
embeddings ,
embedding ,
metadatas ,
* * kwargs ,
)
def save_local ( self , folder_path : str , index_name : str = " index " ) - > None :
def save_local ( self , folder_path : str , index_name : str = " index " ) - > None :
""" Save FAISS index, docstore, and index_to_docstore_id to disk.
""" Save FAISS index, docstore, and index_to_docstore_id to disk.
@ -421,3 +452,18 @@ class FAISS(VectorStore):
with open ( path / " {index_name} .pkl " . format ( index_name = index_name ) , " rb " ) as f :
with open ( path / " {index_name} .pkl " . format ( index_name = index_name ) , " rb " ) as f :
docstore , index_to_docstore_id = pickle . load ( f )
docstore , index_to_docstore_id = pickle . load ( f )
return cls ( embeddings . embed_query , index , docstore , index_to_docstore_id )
return cls ( embeddings . embed_query , index , docstore , index_to_docstore_id )
def _similarity_search_with_relevance_scores (
self ,
query : str ,
k : int = 4 ,
* * kwargs : Any ,
) - > List [ Tuple [ Document , float ] ] :
""" Return docs and their similarity scores on a scale from 0 to 1. """
if self . relevance_score_fn is None :
raise ValueError (
" normalize_score_fn must be provided to "
" FAISS constructor to normalize scores "
)
docs_and_scores = self . similarity_search_with_score ( query , k = k )
return [ ( doc , self . relevance_score_fn ( score ) ) for doc , score in docs_and_scores ]