"""Test FAISS functionality.""" import datetime import math import tempfile from typing import Union import pytest from langchain_core.documents import Document from langchain_community.docstore.base import Docstore from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.vectorstores.faiss import FAISS from langchain_community.vectorstores.utils import DistanceStrategy from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings _PAGE_CONTENT = """This is a page about LangChain. It is a really cool framework. What isn't there to love about langchain? Made in 2022.""" class FakeDocstore(Docstore): """Fake docstore for testing purposes.""" def search(self, search: str) -> Union[str, Document]: """Return the fake document.""" document = Document(page_content=_PAGE_CONTENT) return document @pytest.mark.requires("faiss") def test_faiss() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = docsearch.similarity_search("foo", k=1) assert output == [Document(page_content="foo")] @pytest.mark.requires("faiss") async def test_faiss_afrom_texts() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = await docsearch.asimilarity_search("foo", k=1) assert output == [Document(page_content="foo")] @pytest.mark.requires("faiss") def test_faiss_vector_sim() -> None: """Test vector similarity.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ query_vec = FakeEmbeddings().embed_query(text="foo") output = docsearch.similarity_search_by_vector(query_vec, k=1) assert output == [Document(page_content="foo")] @pytest.mark.requires("faiss") async def test_faiss_async_vector_sim() -> None: """Test vector similarity.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ query_vec = await FakeEmbeddings().aembed_query(text="foo") output = await docsearch.asimilarity_search_by_vector(query_vec, k=1) assert output == [Document(page_content="foo")] @pytest.mark.requires("faiss") def test_faiss_vector_sim_with_score_threshold() -> None: """Test vector similarity.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ query_vec = FakeEmbeddings().embed_query(text="foo") output = docsearch.similarity_search_by_vector(query_vec, k=2, score_threshold=0.2) assert output == [Document(page_content="foo")] @pytest.mark.requires("faiss") async def test_faiss_vector_async_sim_with_score_threshold() -> None: """Test vector similarity.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ query_vec = await FakeEmbeddings().aembed_query(text="foo") output = await docsearch.asimilarity_search_by_vector( query_vec, k=2, score_threshold=0.2 ) assert output == [Document(page_content="foo")] @pytest.mark.requires("faiss") def test_similarity_search_with_score_by_vector() -> None: """Test vector similarity with score by vector.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ query_vec = FakeEmbeddings().embed_query(text="foo") output = docsearch.similarity_search_with_score_by_vector(query_vec, k=1) assert len(output) == 1 assert output[0][0] == Document(page_content="foo") @pytest.mark.requires("faiss") async def test_similarity_async_search_with_score_by_vector() -> None: """Test vector similarity with score by vector.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ query_vec = await FakeEmbeddings().aembed_query(text="foo") output = await docsearch.asimilarity_search_with_score_by_vector(query_vec, k=1) assert len(output) == 1 assert output[0][0] == Document(page_content="foo") @pytest.mark.requires("faiss") def test_similarity_search_with_score_by_vector_with_score_threshold() -> None: """Test vector similarity with score by vector.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ query_vec = FakeEmbeddings().embed_query(text="foo") output = docsearch.similarity_search_with_score_by_vector( query_vec, k=2, score_threshold=0.2, ) assert len(output) == 1 assert output[0][0] == Document(page_content="foo") assert output[0][1] < 0.2 @pytest.mark.requires("faiss") async def test_sim_asearch_with_score_by_vector_with_score_threshold() -> None: """Test vector similarity with score by vector.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) index_to_id = docsearch.index_to_docstore_id expected_docstore = InMemoryDocstore( { index_to_id[0]: Document(page_content="foo"), index_to_id[1]: Document(page_content="bar"), index_to_id[2]: Document(page_content="baz"), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ query_vec = await FakeEmbeddings().aembed_query(text="foo") output = await docsearch.asimilarity_search_with_score_by_vector( query_vec, k=2, score_threshold=0.2, ) assert len(output) == 1 assert output[0][0] == Document(page_content="foo") assert output[0][1] < 0.2 @pytest.mark.requires("faiss") def test_faiss_mmr() -> None: texts = ["foo", "foo", "fou", "foy"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) query_vec = FakeEmbeddings().embed_query(text="foo") # make sure we can have k > docstore size output = docsearch.max_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1 ) assert len(output) == len(texts) assert output[0][0] == Document(page_content="foo") assert output[0][1] == 0.0 assert output[1][0] != Document(page_content="foo") @pytest.mark.requires("faiss") async def test_faiss_async_mmr() -> None: texts = ["foo", "foo", "fou", "foy"] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) query_vec = await FakeEmbeddings().aembed_query(text="foo") # make sure we can have k > docstore size output = await docsearch.amax_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1 ) assert len(output) == len(texts) assert output[0][0] == Document(page_content="foo") assert output[0][1] == 0.0 assert output[1][0] != Document(page_content="foo") @pytest.mark.requires("faiss") def test_faiss_mmr_with_metadatas() -> None: texts = ["foo", "foo", "fou", "foy"] metadatas = [{"page": i} for i in range(len(texts))] docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) query_vec = FakeEmbeddings().embed_query(text="foo") output = docsearch.max_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1 ) assert len(output) == len(texts) assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) assert output[0][1] == 0.0 assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) @pytest.mark.requires("faiss") async def test_faiss_async_mmr_with_metadatas() -> None: texts = ["foo", "foo", "fou", "foy"] metadatas = [{"page": i} for i in range(len(texts))] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) query_vec = await FakeEmbeddings().aembed_query(text="foo") output = await docsearch.amax_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1 ) assert len(output) == len(texts) assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) assert output[0][1] == 0.0 assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) @pytest.mark.requires("faiss") def test_faiss_mmr_with_metadatas_and_filter() -> None: texts = ["foo", "foo", "fou", "foy"] metadatas = [{"page": i} for i in range(len(texts))] docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) query_vec = FakeEmbeddings().embed_query(text="foo") output = docsearch.max_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter={"page": 1} ) assert len(output) == 1 assert output[0][0] == Document(page_content="foo", metadata={"page": 1}) assert output[0][1] == 0.0 assert output == docsearch.max_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] == 1 ) @pytest.mark.requires("faiss") async def test_faiss_async_mmr_with_metadatas_and_filter() -> None: texts = ["foo", "foo", "fou", "foy"] metadatas = [{"page": i} for i in range(len(texts))] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) query_vec = await FakeEmbeddings().aembed_query(text="foo") output = await docsearch.amax_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter={"page": 1} ) assert len(output) == 1 assert output[0][0] == Document(page_content="foo", metadata={"page": 1}) assert output[0][1] == 0.0 assert ( output == await docsearch.amax_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] == 1 ) ) @pytest.mark.requires("faiss") def test_faiss_mmr_with_metadatas_and_list_filter() -> None: texts = ["foo", "foo", "fou", "foy"] metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) query_vec = FakeEmbeddings().embed_query(text="foo") output = docsearch.max_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]} ) assert len(output) == 3 assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) assert output[0][1] == 0.0 assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) assert output == docsearch.max_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] in [0, 1, 2] ) @pytest.mark.requires("faiss") async def test_faiss_async_mmr_with_metadatas_and_list_filter() -> None: texts = ["foo", "foo", "fou", "foy"] metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) query_vec = await FakeEmbeddings().aembed_query(text="foo") output = await docsearch.amax_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter={"page": [0, 1, 2]} ) assert len(output) == 3 assert output[0][0] == Document(page_content="foo", metadata={"page": 0}) assert output[0][1] == 0.0 assert output[1][0] != Document(page_content="foo", metadata={"page": 0}) assert output == ( await docsearch.amax_marginal_relevance_search_with_score_by_vector( query_vec, k=10, lambda_mult=0.1, filter=lambda di: di["page"] in [0, 1, 2] ) ) @pytest.mark.requires("faiss") def test_faiss_with_metadatas() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": i} for i in range(len(texts))] docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) expected_docstore = InMemoryDocstore( { docsearch.index_to_docstore_id[0]: Document( page_content="foo", metadata={"page": 0} ), docsearch.index_to_docstore_id[1]: Document( page_content="bar", metadata={"page": 1} ), docsearch.index_to_docstore_id[2]: Document( page_content="baz", metadata={"page": 2} ), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = docsearch.similarity_search("foo", k=1) assert output == [Document(page_content="foo", metadata={"page": 0})] @pytest.mark.requires("faiss") async def test_faiss_async_with_metadatas() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] metadatas = [{"page": i} for i in range(len(texts))] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) expected_docstore = InMemoryDocstore( { docsearch.index_to_docstore_id[0]: Document( page_content="foo", metadata={"page": 0} ), docsearch.index_to_docstore_id[1]: Document( page_content="bar", metadata={"page": 1} ), docsearch.index_to_docstore_id[2]: Document( page_content="baz", metadata={"page": 2} ), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = await docsearch.asimilarity_search("foo", k=1) assert output == [Document(page_content="foo", metadata={"page": 0})] @pytest.mark.requires("faiss") def test_faiss_with_metadatas_and_filter() -> None: texts = ["foo", "bar", "baz"] metadatas = [{"page": i} for i in range(len(texts))] docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) expected_docstore = InMemoryDocstore( { docsearch.index_to_docstore_id[0]: Document( page_content="foo", metadata={"page": 0} ), docsearch.index_to_docstore_id[1]: Document( page_content="bar", metadata={"page": 1} ), docsearch.index_to_docstore_id[2]: Document( page_content="baz", metadata={"page": 2} ), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = docsearch.similarity_search("foo", k=1, filter={"page": 1}) # make sure it returns the result that matches the filter. # Not the one who's text matches better. assert output == [Document(page_content="bar", metadata={"page": 1})] assert output == docsearch.similarity_search( "foo", k=1, filter=lambda di: di["page"] == 1 ) @pytest.mark.requires("faiss") async def test_faiss_async_with_metadatas_and_filter() -> None: texts = ["foo", "bar", "baz"] metadatas = [{"page": i} for i in range(len(texts))] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) expected_docstore = InMemoryDocstore( { docsearch.index_to_docstore_id[0]: Document( page_content="foo", metadata={"page": 0} ), docsearch.index_to_docstore_id[1]: Document( page_content="bar", metadata={"page": 1} ), docsearch.index_to_docstore_id[2]: Document( page_content="baz", metadata={"page": 2} ), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1}) # make sure it returns the result that matches the filter. # Not the one who's text matches better. assert output == [Document(page_content="bar", metadata={"page": 1})] assert output == await docsearch.asimilarity_search( "foo", k=1, filter=lambda di: di["page"] == 1 ) @pytest.mark.requires("faiss") def test_faiss_with_metadatas_and_list_filter() -> None: texts = ["foo", "bar", "baz", "foo", "qux"] metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) expected_docstore = InMemoryDocstore( { docsearch.index_to_docstore_id[0]: Document( page_content="foo", metadata={"page": 0} ), docsearch.index_to_docstore_id[1]: Document( page_content="bar", metadata={"page": 1} ), docsearch.index_to_docstore_id[2]: Document( page_content="baz", metadata={"page": 2} ), docsearch.index_to_docstore_id[3]: Document( page_content="foo", metadata={"page": 3} ), docsearch.index_to_docstore_id[4]: Document( page_content="qux", metadata={"page": 3} ), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = docsearch.similarity_search("foor", k=1, filter={"page": [0, 1, 2]}) assert output == [Document(page_content="foo", metadata={"page": 0})] assert output == docsearch.similarity_search( "foor", k=1, filter=lambda di: di["page"] in [0, 1, 2] ) @pytest.mark.requires("faiss") async def test_faiss_async_with_metadatas_and_list_filter() -> None: texts = ["foo", "bar", "baz", "foo", "qux"] metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings(), metadatas=metadatas) expected_docstore = InMemoryDocstore( { docsearch.index_to_docstore_id[0]: Document( page_content="foo", metadata={"page": 0} ), docsearch.index_to_docstore_id[1]: Document( page_content="bar", metadata={"page": 1} ), docsearch.index_to_docstore_id[2]: Document( page_content="baz", metadata={"page": 2} ), docsearch.index_to_docstore_id[3]: Document( page_content="foo", metadata={"page": 3} ), docsearch.index_to_docstore_id[4]: Document( page_content="qux", metadata={"page": 3} ), } ) assert docsearch.docstore.__dict__ == expected_docstore.__dict__ output = await docsearch.asimilarity_search("foor", k=1, filter={"page": [0, 1, 2]}) assert output == [Document(page_content="foo", metadata={"page": 0})] assert output == await docsearch.asimilarity_search( "foor", k=1, filter=lambda di: di["page"] in [0, 1, 2] ) @pytest.mark.requires("faiss") def test_faiss_search_not_found() -> None: """Test what happens when document is not found.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) # Get rid of the docstore to purposefully induce errors. docsearch.docstore = InMemoryDocstore({}) with pytest.raises(ValueError): docsearch.similarity_search("foo") @pytest.mark.requires("faiss") async def test_faiss_async_search_not_found() -> None: """Test what happens when document is not found.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) # Get rid of the docstore to purposefully induce errors. docsearch.docstore = InMemoryDocstore({}) with pytest.raises(ValueError): await docsearch.asimilarity_search("foo") @pytest.mark.requires("faiss") def test_faiss_add_texts() -> None: """Test end to end adding of texts.""" # Create initial doc store. texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) # Test adding a similar document as before. docsearch.add_texts(["foo"]) output = docsearch.similarity_search("foo", k=2) assert output == [Document(page_content="foo"), Document(page_content="foo")] @pytest.mark.requires("faiss") async def test_faiss_async_add_texts() -> None: """Test end to end adding of texts.""" # Create initial doc store. texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) # Test adding a similar document as before. await docsearch.aadd_texts(["foo"]) output = await docsearch.asimilarity_search("foo", k=2) assert output == [Document(page_content="foo"), Document(page_content="foo")] @pytest.mark.requires("faiss") def test_faiss_add_texts_not_supported() -> None: """Test adding of texts to a docstore that doesn't support it.""" docsearch = FAISS(FakeEmbeddings(), None, FakeDocstore(), {}) with pytest.raises(ValueError): docsearch.add_texts(["foo"]) @pytest.mark.requires("faiss") async def test_faiss_async_add_texts_not_supported() -> None: """Test adding of texts to a docstore that doesn't support it.""" docsearch = FAISS(FakeEmbeddings(), None, FakeDocstore(), {}) with pytest.raises(ValueError): await docsearch.aadd_texts(["foo"]) @pytest.mark.requires("faiss") def test_faiss_local_save_load() -> None: """Test end to end serialization.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts(texts, FakeEmbeddings()) temp_timestamp = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S") with tempfile.TemporaryDirectory(suffix="_" + temp_timestamp + "/") as temp_folder: docsearch.save_local(temp_folder) new_docsearch = FAISS.load_local( temp_folder, FakeEmbeddings(), allow_dangerous_deserialization=True ) assert new_docsearch.index is not None @pytest.mark.requires("faiss") async def test_faiss_async_local_save_load() -> None: """Test end to end serialization.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts(texts, FakeEmbeddings()) temp_timestamp = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S") with tempfile.TemporaryDirectory(suffix="_" + temp_timestamp + "/") as temp_folder: docsearch.save_local(temp_folder) new_docsearch = FAISS.load_local( temp_folder, FakeEmbeddings(), allow_dangerous_deserialization=True ) assert new_docsearch.index is not None @pytest.mark.requires("faiss") def test_faiss_similarity_search_with_relevance_scores() -> None: """Test the similarity search with normalized similarities.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts( texts, FakeEmbeddings(), relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2), ) outputs = docsearch.similarity_search_with_relevance_scores("foo", k=1) output, score = outputs[0] assert output == Document(page_content="foo") assert score == 1.0 @pytest.mark.requires("faiss") async def test_faiss_async_similarity_search_with_relevance_scores() -> None: """Test the similarity search with normalized similarities.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts( texts, FakeEmbeddings(), relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2), ) outputs = await docsearch.asimilarity_search_with_relevance_scores("foo", k=1) output, score = outputs[0] assert output == Document(page_content="foo") assert score == 1.0 @pytest.mark.requires("faiss") def test_faiss_similarity_search_with_relevance_scores_with_threshold() -> None: """Test the similarity search with normalized similarities with score threshold.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts( texts, FakeEmbeddings(), relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2), ) outputs = docsearch.similarity_search_with_relevance_scores( "foo", k=2, score_threshold=0.5 ) assert len(outputs) == 1 output, score = outputs[0] assert output == Document(page_content="foo") assert score == 1.0 @pytest.mark.requires("faiss") async def test_faiss_asimilarity_search_with_relevance_scores_with_threshold() -> None: """Test the similarity search with normalized similarities with score threshold.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts( texts, FakeEmbeddings(), relevance_score_fn=lambda score: 1.0 - score / math.sqrt(2), ) outputs = await docsearch.asimilarity_search_with_relevance_scores( "foo", k=2, score_threshold=0.5 ) assert len(outputs) == 1 output, score = outputs[0] assert output == Document(page_content="foo") assert score == 1.0 @pytest.mark.requires("faiss") def test_faiss_invalid_normalize_fn() -> None: """Test the similarity search with normalized similarities.""" texts = ["foo", "bar", "baz"] docsearch = FAISS.from_texts( texts, FakeEmbeddings(), relevance_score_fn=lambda _: 2.0 ) with pytest.warns(Warning, match="scores must be between"): docsearch.similarity_search_with_relevance_scores("foo", k=1) @pytest.mark.requires("faiss") async def test_faiss_async_invalid_normalize_fn() -> None: """Test the similarity search with normalized similarities.""" texts = ["foo", "bar", "baz"] docsearch = await FAISS.afrom_texts( texts, FakeEmbeddings(), relevance_score_fn=lambda _: 2.0 ) with pytest.warns(Warning, match="scores must be between"): await docsearch.asimilarity_search_with_relevance_scores("foo", k=1) @pytest.mark.requires("faiss") def test_missing_normalize_score_fn() -> None: """Test doesn't perform similarity search without a valid distance strategy.""" texts = ["foo", "bar", "baz"] faiss_instance = FAISS.from_texts(texts, FakeEmbeddings(), distance_strategy="fake") with pytest.raises(ValueError): faiss_instance.similarity_search_with_relevance_scores("foo", k=2) @pytest.mark.skip(reason="old relevance score feature") @pytest.mark.requires("faiss") def test_ip_score() -> None: embedding = FakeEmbeddings() vector = embedding.embed_query("hi") assert vector == [1] * 9 + [0], f"FakeEmbeddings() has changed, produced {vector}" db = FAISS.from_texts( ["sundays coming so i drive my car"], embedding=FakeEmbeddings(), distance_strategy=DistanceStrategy.MAX_INNER_PRODUCT, ) scores = db.similarity_search_with_relevance_scores("sundays", k=1) assert len(scores) == 1, "only one vector should be in db" _, score = scores[0] assert ( score == 1 ), f"expected inner product of equivalent vectors to be 1, not {score}" @pytest.mark.requires("faiss") async def test_async_missing_normalize_score_fn() -> None: """Test doesn't perform similarity search without a valid distance strategy.""" texts = ["foo", "bar", "baz"] faiss_instance = await FAISS.afrom_texts( texts, FakeEmbeddings(), distance_strategy="fake" ) with pytest.raises(ValueError): await faiss_instance.asimilarity_search_with_relevance_scores("foo", k=2) @pytest.mark.requires("faiss") def test_delete() -> None: """Test the similarity search with normalized similarities.""" ids = ["a", "b", "c"] docsearch = FAISS.from_texts(["foo", "bar", "baz"], FakeEmbeddings(), ids=ids) docsearch.delete(ids[1:2]) result = docsearch.similarity_search("bar", k=2) assert sorted([d.page_content for d in result]) == ["baz", "foo"] assert docsearch.index_to_docstore_id == {0: ids[0], 1: ids[2]} @pytest.mark.requires("faiss") async def test_async_delete() -> None: """Test the similarity search with normalized similarities.""" ids = ["a", "b", "c"] docsearch = await FAISS.afrom_texts( ["foo", "bar", "baz"], FakeEmbeddings(), ids=ids ) docsearch.delete(ids[1:2]) result = await docsearch.asimilarity_search("bar", k=2) assert sorted([d.page_content for d in result]) == ["baz", "foo"] assert docsearch.index_to_docstore_id == {0: ids[0], 1: ids[2]} @pytest.mark.requires("faiss") def test_faiss_with_duplicate_ids() -> None: """Test whether FAISS raises an exception for duplicate ids.""" texts = ["foo", "bar", "baz"] duplicate_ids = ["id1", "id1", "id2"] with pytest.raises(ValueError) as exc_info: FAISS.from_texts(texts, FakeEmbeddings(), ids=duplicate_ids) assert "Duplicate ids found in the ids list." in str(exc_info.value)