From c5015d77e23b24b3b65d803271f1fa9018d53a05 Mon Sep 17 00:00:00 2001 From: Andrew White Date: Mon, 20 Feb 2023 19:39:13 -0500 Subject: [PATCH] Allow k to be higher than doc size in max_marginal_relevance_search (#1187) Fixes issue #1186. For some reason, #1117 didn't seem to fix it. --- langchain/vectorstores/faiss.py | 4 ++-- tests/integration_tests/vectorstores/test_faiss.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/langchain/vectorstores/faiss.py b/langchain/vectorstores/faiss.py index e4bbd8db..df66c818 100644 --- a/langchain/vectorstores/faiss.py +++ b/langchain/vectorstores/faiss.py @@ -188,10 +188,10 @@ class FAISS(VectorStore): selected_indices = [indices[0][i] for i in mmr_selected] docs = [] for i in selected_indices: - _id = self.index_to_docstore_id[i] - if _id == -1: + if i == -1: # This happens when not enough docs are returned. continue + _id = self.index_to_docstore_id[i] doc = self.docstore.search(_id) if not isinstance(doc, Document): raise ValueError(f"Could not find document for id {_id}, got {doc}") diff --git a/tests/integration_tests/vectorstores/test_faiss.py b/tests/integration_tests/vectorstores/test_faiss.py index 4cfe18ed..d1fc9c5e 100644 --- a/tests/integration_tests/vectorstores/test_faiss.py +++ b/tests/integration_tests/vectorstores/test_faiss.py @@ -44,6 +44,10 @@ def test_faiss_vector_sim() -> None: output = docsearch.similarity_search_by_vector(query_vec, k=1) assert output == [Document(page_content="foo")] + # make sure we can have k > docstore size + output = docsearch.max_marginal_relevance_search_by_vector(query_vec, k=10) + assert len(output) == len(texts) + def test_faiss_with_metadatas() -> None: """Test end to end construction and search."""