mirror of https://github.com/hwchase17/langchain
Add `DocstoreFn` - lookup doc via arbitrary function (#3760)
This **partially** addresses https://github.com/hwchase17/langchain/issues/1524, but it's also useful for some of our use cases. This `DocstoreFn` allows to lookup a document given a function that accepts the `search` string without the need to implement a custom `Docstore`. This could be useful when: * you don't want to implement a `Docstore` just to provide a custom `search` * it's expensive to construct an `InMemoryDocstore`/dict * you retrieve documents from remote sources * you just want to reuse existing objectspull/3692/head^2
parent
c55ba43093
commit
160bfae93f
@ -0,0 +1,30 @@
|
||||
from typing import Callable, Union
|
||||
|
||||
from langchain.docstore.base import Docstore
|
||||
from langchain.schema import Document
|
||||
|
||||
|
||||
class DocstoreFn(Docstore):
|
||||
"""
|
||||
Langchain Docstore via arbitrary lookup function.
|
||||
|
||||
This is useful when:
|
||||
* it's expensive to construct an InMemoryDocstore/dict
|
||||
* you retrieve documents from remote sources
|
||||
* you just want to reuse existing objects
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lookup_fn: Callable[[str], Union[Document, str]],
|
||||
):
|
||||
self._lookup_fn = lookup_fn
|
||||
|
||||
def search(self, search: str) -> Document:
|
||||
r = self._lookup_fn(search)
|
||||
if isinstance(r, str):
|
||||
# NOTE: assume the search string is the source ID
|
||||
return Document(page_content=r, metadata={"source": search})
|
||||
elif isinstance(r, Document):
|
||||
return r
|
||||
raise ValueError(f"Unexpected type of document {type(r)}")
|
@ -0,0 +1,12 @@
|
||||
from langchain.docstore.arbitrary_fn import DocstoreFn
|
||||
from langchain.schema import Document
|
||||
|
||||
|
||||
def test_document_found() -> None:
|
||||
# we use a dict here for simiplicity, but this could be any function
|
||||
# including a remote lookup
|
||||
dummy_dict = {"foo": Document(page_content="bar")}
|
||||
docstore = DocstoreFn(lambda x: dummy_dict[x])
|
||||
output = docstore.search("foo")
|
||||
assert isinstance(output, Document)
|
||||
assert output.page_content == "bar"
|
Loading…
Reference in New Issue