Allow k to be higher than doc size in max_marginal_relevance_search (#1187)

Fixes issue #1186. For some reason, #1117 didn't seem to fix it.
This commit is contained in:
Andrew White 2023-02-20 19:39:13 -05:00 committed by GitHub
parent 159c560c95
commit c5015d77e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 2 deletions

View File

@ -188,10 +188,10 @@ class FAISS(VectorStore):
selected_indices = [indices[0][i] for i in mmr_selected] selected_indices = [indices[0][i] for i in mmr_selected]
docs = [] docs = []
for i in selected_indices: for i in selected_indices:
_id = self.index_to_docstore_id[i] if i == -1:
if _id == -1:
# This happens when not enough docs are returned. # This happens when not enough docs are returned.
continue continue
_id = self.index_to_docstore_id[i]
doc = self.docstore.search(_id) doc = self.docstore.search(_id)
if not isinstance(doc, Document): if not isinstance(doc, Document):
raise ValueError(f"Could not find document for id {_id}, got {doc}") raise ValueError(f"Could not find document for id {_id}, got {doc}")

View File

@ -44,6 +44,10 @@ def test_faiss_vector_sim() -> None:
output = docsearch.similarity_search_by_vector(query_vec, k=1) output = docsearch.similarity_search_by_vector(query_vec, k=1)
assert output == [Document(page_content="foo")] assert output == [Document(page_content="foo")]
# make sure we can have k > docstore size
output = docsearch.max_marginal_relevance_search_by_vector(query_vec, k=10)
assert len(output) == len(texts)
def test_faiss_with_metadatas() -> None: def test_faiss_with_metadatas() -> None:
"""Test end to end construction and search.""" """Test end to end construction and search."""