diff --git a/langchain/docstore/arbitrary_fn.py b/langchain/docstore/arbitrary_fn.py new file mode 100644 index 00000000..f062f6ec --- /dev/null +++ b/langchain/docstore/arbitrary_fn.py @@ -0,0 +1,30 @@ +from typing import Callable, Union + +from langchain.docstore.base import Docstore +from langchain.schema import Document + + +class DocstoreFn(Docstore): + """ + Langchain Docstore via arbitrary lookup function. + + This is useful when: + * it's expensive to construct an InMemoryDocstore/dict + * you retrieve documents from remote sources + * you just want to reuse existing objects + """ + + def __init__( + self, + lookup_fn: Callable[[str], Union[Document, str]], + ): + self._lookup_fn = lookup_fn + + def search(self, search: str) -> Document: + r = self._lookup_fn(search) + if isinstance(r, str): + # NOTE: assume the search string is the source ID + return Document(page_content=r, metadata={"source": search}) + elif isinstance(r, Document): + return r + raise ValueError(f"Unexpected type of document {type(r)}") diff --git a/tests/unit_tests/docstore/test_arbitrary_fn.py b/tests/unit_tests/docstore/test_arbitrary_fn.py new file mode 100644 index 00000000..728bfded --- /dev/null +++ b/tests/unit_tests/docstore/test_arbitrary_fn.py @@ -0,0 +1,12 @@ +from langchain.docstore.arbitrary_fn import DocstoreFn +from langchain.schema import Document + + +def test_document_found() -> None: + # we use a dict here for simiplicity, but this could be any function + # including a remote lookup + dummy_dict = {"foo": Document(page_content="bar")} + docstore = DocstoreFn(lambda x: dummy_dict[x]) + output = docstore.search("foo") + assert isinstance(output, Document) + assert output.page_content == "bar"