Add/test faiss (#7809)

# What
- Add missing test cases to faiss vectore stores
This commit is contained in:
shibuiwilliam 2023-07-19 00:30:35 +09:00 committed by GitHub
parent 5de7815310
commit 235264a246
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -47,6 +47,25 @@ def test_faiss_vector_sim() -> None:
assert output == [Document(page_content="foo")] assert output == [Document(page_content="foo")]
def test_similarity_search_with_score_by_vector() -> None:
"""Test vector similarity with score by vector."""
texts = ["foo", "bar", "baz"]
docsearch = FAISS.from_texts(texts, FakeEmbeddings())
index_to_id = docsearch.index_to_docstore_id
expected_docstore = InMemoryDocstore(
{
index_to_id[0]: Document(page_content="foo"),
index_to_id[1]: Document(page_content="bar"),
index_to_id[2]: Document(page_content="baz"),
}
)
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
query_vec = FakeEmbeddings().embed_query(text="foo")
output = docsearch.similarity_search_with_score_by_vector(query_vec, k=1)
assert len(output) == 1
assert output[0][0] == Document(page_content="foo")
def test_faiss_mmr() -> None: def test_faiss_mmr() -> None:
texts = ["foo", "foo", "fou", "foy"] texts = ["foo", "foo", "fou", "foy"]
docsearch = FAISS.from_texts(texts, FakeEmbeddings()) docsearch = FAISS.from_texts(texts, FakeEmbeddings())
@ -61,6 +80,48 @@ def test_faiss_mmr() -> None:
assert output[1][0] != Document(page_content="foo") assert output[1][0] != Document(page_content="foo")
def test_faiss_mmr_with_metadatas() -> None:
texts = ["foo", "foo", "fou", "foy"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas)
query_vec = FakeEmbeddings().embed_query(text="foo")
output = docsearch.max_marginal_relevance_search_with_score_by_vector(
query_vec, k=10, lambda_mult=0.1
)
assert len(output) == len(texts)
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
assert output[0][1] == 0.0
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
def test_faiss_mmr_with_metadatas_and_filter() -> None:
texts = ["foo", "foo", "fou", "foy"]
metadatas = [{"page": i} for i in range(len(texts))]
docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas)
query_vec = FakeEmbeddings().embed_query(text="foo")
output = docsearch.max_marginal_relevance_search_with_score_by_vector(
query_vec, k=10, lambda_mult=0.1, filter={"page": 1}
)
assert len(output) == len(texts)
assert output[0][0] == Document(page_content="foo", metadata={"page": 1})
assert output[0][1] == 0.0
assert output[1][0] != Document(page_content="foo", metadata={"page": 1})
def test_faiss_mmr_with_metadatas_and_list_filter() -> None:
texts = ["foo", "foo", "fou", "foy"]
metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))]
docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas)
query_vec = FakeEmbeddings().embed_query(text="foo")
output = docsearch.max_marginal_relevance_search_with_score_by_vector(
query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]}
)
assert len(output) == len(texts)
assert output[0][0] == Document(page_content="foo", metadata={"page": 0})
assert output[0][1] == 0.0
assert output[1][0] != Document(page_content="foo", metadata={"page": 0})
def test_faiss_with_metadatas() -> None: def test_faiss_with_metadatas() -> None:
"""Test end to end construction and search.""" """Test end to end construction and search."""
texts = ["foo", "bar", "baz"] texts = ["foo", "bar", "baz"]