From 9b6ae46e922e1f663ff8049929acaf092f5eadda Mon Sep 17 00:00:00 2001 From: asoderlind Date: Thu, 12 Oct 2023 20:24:25 +0200 Subject: [PATCH] raise more legible error if the embedding vector dims don't match --- application/vectorstore/faiss.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py index 217b045..c5af0e6 100644 --- a/application/vectorstore/faiss.py +++ b/application/vectorstore/faiss.py @@ -2,6 +2,7 @@ from application.vectorstore.base import BaseVectorStore from langchain.vectorstores import FAISS from application.core.settings import settings +HUGGINGFACE_MODEL_NAME = "huggingface_sentence-transformers/all-mpnet-base-v2" class FaissStore(BaseVectorStore): def __init__(self, path, embeddings_key, docs_init=None): @@ -12,9 +13,19 @@ class FaissStore(BaseVectorStore): docs_init, self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) ) else: + embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) self.docsearch = FAISS.load_local( - self.path, self._get_embeddings(settings.EMBEDDINGS_NAME, settings.EMBEDDINGS_KEY) + self.path, embeddings ) + + # Check that the word_embedding_dimension of the index matches the word_embedding_dimension of the embeddings + if settings.EMBEDDINGS_NAME == HUGGINGFACE_MODEL_NAME: + try: + word_embedding_dimension = embeddings.client[1].word_embedding_dimension + except AttributeError as e: + raise AttributeError("word_embedding_dimension not found in embeddings.client[1]") from e + if word_embedding_dimension != self.docsearch.index.d: + raise ValueError("word_embedding_dimension != docsearch_index_word_embedding_dimension") def search(self, *args, **kwargs): return self.docsearch.similarity_search(*args, **kwargs)