2024-01-27 15:25:19 +00:00
|
|
|
from langchain_community.vectorstores import FAISS
|
2023-10-16 09:22:42 +00:00
|
|
|
from application.vectorstore.base import BaseVectorStore
|
2023-09-27 15:25:57 +00:00
|
|
|
from application.core.settings import settings
|
|
|
|
|
|
|
|
class FaissStore(BaseVectorStore):
|
|
|
|
|
2023-09-29 16:17:48 +00:00
|
|
|
def __init__(self, path, embeddings_key, docs_init=None):
|
2023-09-27 15:25:57 +00:00
|
|
|
super().__init__()
|
|
|
|
self.path = path
|
2023-10-15 08:22:00 +00:00
|
|
|
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
|
2023-09-29 16:17:48 +00:00
|
|
|
if docs_init:
|
|
|
|
self.docsearch = FAISS.from_documents(
|
2023-10-15 08:23:09 +00:00
|
|
|
docs_init, embeddings
|
2023-09-29 16:17:48 +00:00
|
|
|
)
|
|
|
|
else:
|
|
|
|
self.docsearch = FAISS.load_local(
|
2023-10-12 18:24:25 +00:00
|
|
|
self.path, embeddings
|
2023-09-29 16:17:48 +00:00
|
|
|
)
|
2023-10-15 08:22:00 +00:00
|
|
|
self.assert_embedding_dimensions(embeddings)
|
2023-09-27 15:25:57 +00:00
|
|
|
|
|
|
|
def search(self, *args, **kwargs):
|
|
|
|
return self.docsearch.similarity_search(*args, **kwargs)
|
2023-09-28 23:32:19 +00:00
|
|
|
|
|
|
|
def add_texts(self, *args, **kwargs):
|
|
|
|
return self.docsearch.add_texts(*args, **kwargs)
|
2023-10-16 09:22:42 +00:00
|
|
|
|
2023-09-29 16:17:48 +00:00
|
|
|
def save_local(self, *args, **kwargs):
|
|
|
|
return self.docsearch.save_local(*args, **kwargs)
|
2023-10-12 07:59:52 +00:00
|
|
|
|
|
|
|
def delete_index(self, *args, **kwargs):
|
|
|
|
return self.docsearch.delete(*args, **kwargs)
|
2023-10-17 12:05:30 +00:00
|
|
|
|
2023-10-16 09:22:42 +00:00
|
|
|
def assert_embedding_dimensions(self, embeddings):
|
2023-10-15 08:22:00 +00:00
|
|
|
"""
|
|
|
|
Check that the word embedding dimension of the docsearch index matches
|
|
|
|
the dimension of the word embeddings used
|
|
|
|
"""
|
|
|
|
if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
|
|
|
try:
|
|
|
|
word_embedding_dimension = embeddings.client[1].word_embedding_dimension
|
|
|
|
except AttributeError as e:
|
|
|
|
raise AttributeError("word_embedding_dimension not found in embeddings.client[1]") from e
|
|
|
|
docsearch_index_dimension = self.docsearch.index.d
|
|
|
|
if word_embedding_dimension != docsearch_index_dimension:
|
|
|
|
raise ValueError(f"word_embedding_dimension ({word_embedding_dimension}) " +
|
2023-10-17 12:05:30 +00:00
|
|
|
f"!= docsearch_index_word_embedding_dimension ({docsearch_index_dimension})")
|