mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
f0a258555b
Alternate implementation to PR #960 Again - only FAISS is implemented. If accepted can add this to other vectorstores or leave as NotImplemented? Suggestions welcome...
108 lines
4.0 KiB
Python
108 lines
4.0 KiB
Python
"""Test FAISS functionality."""
|
|
import tempfile
|
|
|
|
import pytest
|
|
|
|
from langchain.docstore.document import Document
|
|
from langchain.docstore.in_memory import InMemoryDocstore
|
|
from langchain.docstore.wikipedia import Wikipedia
|
|
from langchain.vectorstores.faiss import FAISS
|
|
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
|
|
|
|
|
def test_faiss() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
docsearch = FAISS.from_texts(texts, FakeEmbeddings())
|
|
index_to_id = docsearch.index_to_docstore_id
|
|
expected_docstore = InMemoryDocstore(
|
|
{
|
|
index_to_id[0]: Document(page_content="foo"),
|
|
index_to_id[1]: Document(page_content="bar"),
|
|
index_to_id[2]: Document(page_content="baz"),
|
|
}
|
|
)
|
|
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
assert output == [Document(page_content="foo")]
|
|
|
|
|
|
def test_faiss_vector_sim() -> None:
|
|
"""Test vector similarity."""
|
|
texts = ["foo", "bar", "baz"]
|
|
docsearch = FAISS.from_texts(texts, FakeEmbeddings())
|
|
index_to_id = docsearch.index_to_docstore_id
|
|
expected_docstore = InMemoryDocstore(
|
|
{
|
|
index_to_id[0]: Document(page_content="foo"),
|
|
index_to_id[1]: Document(page_content="bar"),
|
|
index_to_id[2]: Document(page_content="baz"),
|
|
}
|
|
)
|
|
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
|
query_vec = FakeEmbeddings().embed_query(text="foo")
|
|
output = docsearch.similarity_search_by_vector(query_vec, k=1)
|
|
assert output == [Document(page_content="foo")]
|
|
|
|
|
|
def test_faiss_with_metadatas() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": i} for i in range(len(texts))]
|
|
docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas)
|
|
expected_docstore = InMemoryDocstore(
|
|
{
|
|
docsearch.index_to_docstore_id[0]: Document(
|
|
page_content="foo", metadata={"page": 0}
|
|
),
|
|
docsearch.index_to_docstore_id[1]: Document(
|
|
page_content="bar", metadata={"page": 1}
|
|
),
|
|
docsearch.index_to_docstore_id[2]: Document(
|
|
page_content="baz", metadata={"page": 2}
|
|
),
|
|
}
|
|
)
|
|
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
|
output = docsearch.similarity_search("foo", k=1)
|
|
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
|
|
|
|
|
def test_faiss_search_not_found() -> None:
|
|
"""Test what happens when document is not found."""
|
|
texts = ["foo", "bar", "baz"]
|
|
docsearch = FAISS.from_texts(texts, FakeEmbeddings())
|
|
# Get rid of the docstore to purposefully induce errors.
|
|
docsearch.docstore = InMemoryDocstore({})
|
|
with pytest.raises(ValueError):
|
|
docsearch.similarity_search("foo")
|
|
|
|
|
|
def test_faiss_add_texts() -> None:
|
|
"""Test end to end adding of texts."""
|
|
# Create initial doc store.
|
|
texts = ["foo", "bar", "baz"]
|
|
docsearch = FAISS.from_texts(texts, FakeEmbeddings())
|
|
# Test adding a similar document as before.
|
|
docsearch.add_texts(["foo"])
|
|
output = docsearch.similarity_search("foo", k=2)
|
|
assert output == [Document(page_content="foo"), Document(page_content="foo")]
|
|
|
|
|
|
def test_faiss_add_texts_not_supported() -> None:
|
|
"""Test adding of texts to a docstore that doesn't support it."""
|
|
docsearch = FAISS(FakeEmbeddings().embed_query, None, Wikipedia(), {})
|
|
with pytest.raises(ValueError):
|
|
docsearch.add_texts(["foo"])
|
|
|
|
|
|
def test_faiss_local_save_load() -> None:
|
|
"""Test end to end serialization."""
|
|
texts = ["foo", "bar", "baz"]
|
|
docsearch = FAISS.from_texts(texts, FakeEmbeddings())
|
|
|
|
with tempfile.NamedTemporaryFile() as temp_file:
|
|
docsearch.save_local(temp_file.name)
|
|
new_docsearch = FAISS.load_local(temp_file.name, FakeEmbeddings())
|
|
assert new_docsearch.index is not None
|