Add `DocstoreFn` - lookup doc via arbitrary function (#3760)

This **partially** addresses
https://github.com/hwchase17/langchain/issues/1524, but it's also useful
for some of our use cases.

This `DocstoreFn` allows to lookup a document given a function that
accepts the `search` string without the need to implement a custom
`Docstore`.

This could be useful when:
* you don't want to implement a `Docstore` just to provide a custom
`search`
 * it's expensive to construct an `InMemoryDocstore`/dict
 * you retrieve documents from remote sources
 * you just want to reuse existing objects
fix_agent_callbacks
Rafal Wojdyla 1 year ago committed by GitHub
parent c55ba43093
commit 160bfae93f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,30 @@
from typing import Callable, Union
from langchain.docstore.base import Docstore
from langchain.schema import Document
class DocstoreFn(Docstore):
"""
Langchain Docstore via arbitrary lookup function.
This is useful when:
* it's expensive to construct an InMemoryDocstore/dict
* you retrieve documents from remote sources
* you just want to reuse existing objects
"""
def __init__(
self,
lookup_fn: Callable[[str], Union[Document, str]],
):
self._lookup_fn = lookup_fn
def search(self, search: str) -> Document:
r = self._lookup_fn(search)
if isinstance(r, str):
# NOTE: assume the search string is the source ID
return Document(page_content=r, metadata={"source": search})
elif isinstance(r, Document):
return r
raise ValueError(f"Unexpected type of document {type(r)}")

@ -0,0 +1,12 @@
from langchain.docstore.arbitrary_fn import DocstoreFn
from langchain.schema import Document
def test_document_found() -> None:
# we use a dict here for simiplicity, but this could be any function
# including a remote lookup
dummy_dict = {"foo": Document(page_content="bar")}
docstore = DocstoreFn(lambda x: dummy_dict[x])
output = docstore.search("foo")
assert isinstance(output, Document)
assert output.page_content == "bar"
Loading…
Cancel
Save