mirror of https://github.com/hwchase17/langchain
Add `DocstoreFn` - lookup doc via arbitrary function (#3760)
This **partially** addresses https://github.com/hwchase17/langchain/issues/1524, but it's also useful for some of our use cases. This `DocstoreFn` allows to lookup a document given a function that accepts the `search` string without the need to implement a custom `Docstore`. This could be useful when: * you don't want to implement a `Docstore` just to provide a custom `search` * it's expensive to construct an `InMemoryDocstore`/dict * you retrieve documents from remote sources * you just want to reuse existing objectspull/3692/head^2
parent
c55ba43093
commit
160bfae93f
@ -0,0 +1,30 @@
|
|||||||
|
from typing import Callable, Union
|
||||||
|
|
||||||
|
from langchain.docstore.base import Docstore
|
||||||
|
from langchain.schema import Document
|
||||||
|
|
||||||
|
|
||||||
|
class DocstoreFn(Docstore):
|
||||||
|
"""
|
||||||
|
Langchain Docstore via arbitrary lookup function.
|
||||||
|
|
||||||
|
This is useful when:
|
||||||
|
* it's expensive to construct an InMemoryDocstore/dict
|
||||||
|
* you retrieve documents from remote sources
|
||||||
|
* you just want to reuse existing objects
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
lookup_fn: Callable[[str], Union[Document, str]],
|
||||||
|
):
|
||||||
|
self._lookup_fn = lookup_fn
|
||||||
|
|
||||||
|
def search(self, search: str) -> Document:
|
||||||
|
r = self._lookup_fn(search)
|
||||||
|
if isinstance(r, str):
|
||||||
|
# NOTE: assume the search string is the source ID
|
||||||
|
return Document(page_content=r, metadata={"source": search})
|
||||||
|
elif isinstance(r, Document):
|
||||||
|
return r
|
||||||
|
raise ValueError(f"Unexpected type of document {type(r)}")
|
@ -0,0 +1,12 @@
|
|||||||
|
from langchain.docstore.arbitrary_fn import DocstoreFn
|
||||||
|
from langchain.schema import Document
|
||||||
|
|
||||||
|
|
||||||
|
def test_document_found() -> None:
|
||||||
|
# we use a dict here for simiplicity, but this could be any function
|
||||||
|
# including a remote lookup
|
||||||
|
dummy_dict = {"foo": Document(page_content="bar")}
|
||||||
|
docstore = DocstoreFn(lambda x: dummy_dict[x])
|
||||||
|
output = docstore.search("foo")
|
||||||
|
assert isinstance(output, Document)
|
||||||
|
assert output.page_content == "bar"
|
Loading…
Reference in New Issue