mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
315b0c09c6
this will break atm but wanted to get thoughts on implementation. 1. should add() be on docstore interface? 2. should InMemoryDocstore change to take a list of documents as init? (makes this slightly easier to implement in FAISS -- if we think it is less clean then could expose a method to get the number of documents currently in the dict, and perform the logic of creating the necessary dictionary in the FAISS.add_texts method. Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
"""Test in memory docstore."""
|
|
import pytest
|
|
|
|
from langchain.docstore.document import Document
|
|
from langchain.docstore.in_memory import InMemoryDocstore
|
|
|
|
|
|
def test_document_found() -> None:
|
|
"""Test document found."""
|
|
_dict = {"foo": Document(page_content="bar")}
|
|
docstore = InMemoryDocstore(_dict)
|
|
output = docstore.search("foo")
|
|
assert isinstance(output, Document)
|
|
assert output.page_content == "bar"
|
|
|
|
|
|
def test_document_not_found() -> None:
|
|
"""Test when document is not found."""
|
|
_dict = {"foo": Document(page_content="bar")}
|
|
docstore = InMemoryDocstore(_dict)
|
|
output = docstore.search("bar")
|
|
assert output == "ID bar not found."
|
|
|
|
|
|
def test_adding_document() -> None:
|
|
"""Test that documents are added correctly."""
|
|
_dict = {"foo": Document(page_content="bar")}
|
|
docstore = InMemoryDocstore(_dict)
|
|
new_dict = {"bar": Document(page_content="foo")}
|
|
docstore.add(new_dict)
|
|
|
|
# Test that you can find new document.
|
|
foo_output = docstore.search("bar")
|
|
assert isinstance(foo_output, Document)
|
|
assert foo_output.page_content == "foo"
|
|
|
|
# Test that old document is the same.
|
|
bar_output = docstore.search("foo")
|
|
assert isinstance(bar_output, Document)
|
|
assert bar_output.page_content == "bar"
|
|
|
|
|
|
def test_adding_document_already_exists() -> None:
|
|
"""Test that error is raised if document id already exists."""
|
|
_dict = {"foo": Document(page_content="bar")}
|
|
docstore = InMemoryDocstore(_dict)
|
|
new_dict = {"foo": Document(page_content="foo")}
|
|
|
|
# Test that error is raised.
|
|
with pytest.raises(ValueError):
|
|
docstore.add(new_dict)
|
|
|
|
# Test that old document is the same.
|
|
bar_output = docstore.search("foo")
|
|
assert isinstance(bar_output, Document)
|
|
assert bar_output.page_content == "bar"
|