mirror of https://github.com/hwchase17/langchain
Make document serializable, create utility to create a docstore (#9674)
This PR makes the following changes: 1. Documents become serializable using langhchain serialization 2. Make a utility to create a docstore kw store Will help to address issue here: https://github.com/langchain-ai/langchain/issues/9345pull/9789/head
parent
a28e888b36
commit
588237ef30
@ -0,0 +1,88 @@
|
||||
"""Create a key-value store for any langchain serializable object."""
|
||||
from typing import Callable, Optional
|
||||
|
||||
from langchain.load.dump import dumps
|
||||
from langchain.load.load import loads
|
||||
from langchain.load.serializable import Serializable
|
||||
from langchain.schema import BaseStore, Document
|
||||
from langchain.storage.encoder_backed import EncoderBackedStore
|
||||
|
||||
|
||||
def _dump_as_bytes(obj: Serializable) -> bytes:
|
||||
"""Return a bytes representation of a document."""
|
||||
return dumps(obj).encode("utf-8")
|
||||
|
||||
|
||||
def _dump_document_as_bytes(obj: Document) -> bytes:
|
||||
"""Return a bytes representation of a document."""
|
||||
if not isinstance(obj, Document):
|
||||
raise TypeError("Expected a Document instance")
|
||||
return dumps(obj).encode("utf-8")
|
||||
|
||||
|
||||
def _load_document_from_bytes(serialized: bytes) -> Document:
|
||||
"""Return a document from a bytes representation."""
|
||||
obj = loads(serialized.decode("utf-8"))
|
||||
if not isinstance(obj, Document):
|
||||
raise TypeError(f"Expected a Document instance. Got {type(obj)}")
|
||||
return obj
|
||||
|
||||
|
||||
def _load_from_bytes(serialized: bytes) -> Serializable:
|
||||
"""Return a document from a bytes representation."""
|
||||
return loads(serialized.decode("utf-8"))
|
||||
|
||||
|
||||
def _identity(x: str) -> str:
|
||||
"""Return the same object."""
|
||||
return x
|
||||
|
||||
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
def create_lc_store(
|
||||
store: BaseStore[str, bytes],
|
||||
*,
|
||||
key_encoder: Optional[Callable[[str], str]] = None,
|
||||
) -> BaseStore[str, Serializable]:
|
||||
"""Create a store for langchain serializable objects from a bytes store.
|
||||
|
||||
Args:
|
||||
store: A bytes store to use as the underlying store.
|
||||
key_encoder: A function to encode keys; if None uses identity function.
|
||||
|
||||
Returns:
|
||||
A key-value store for documents.
|
||||
"""
|
||||
return EncoderBackedStore(
|
||||
store,
|
||||
key_encoder or _identity,
|
||||
_dump_as_bytes,
|
||||
_load_from_bytes,
|
||||
)
|
||||
|
||||
|
||||
def create_kv_docstore(
|
||||
store: BaseStore[str, bytes],
|
||||
*,
|
||||
key_encoder: Optional[Callable[[str], str]] = None,
|
||||
) -> BaseStore[str, Document]:
|
||||
"""Create a store for langchain Document objects from a bytes store.
|
||||
|
||||
This store does run time type checking to ensure that the values are
|
||||
Document objects.
|
||||
|
||||
Args:
|
||||
store: A bytes store to use as the underlying store.
|
||||
key_encoder: A function to encode keys; if None uses identity function.
|
||||
|
||||
Returns:
|
||||
A key-value store for documents.
|
||||
"""
|
||||
return EncoderBackedStore(
|
||||
store,
|
||||
key_encoder or _identity,
|
||||
_dump_document_as_bytes,
|
||||
_load_document_from_bytes,
|
||||
)
|
@ -0,0 +1,36 @@
|
||||
import tempfile
|
||||
from typing import Generator, cast
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.storage._lc_store import create_kv_docstore, create_lc_store
|
||||
from langchain.storage.file_system import LocalFileStore
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def file_store() -> Generator[LocalFileStore, None, None]:
|
||||
# Create a temporary directory for testing
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Instantiate the LocalFileStore with the temporary directory as the root path
|
||||
store = LocalFileStore(temp_dir)
|
||||
yield store
|
||||
|
||||
|
||||
def test_create_lc_store(file_store: LocalFileStore) -> None:
|
||||
"""Test that a docstore is created from a base store."""
|
||||
docstore = create_lc_store(file_store)
|
||||
docstore.mset([("key1", Document(page_content="hello", metadata={"key": "value"}))])
|
||||
fetched_doc = cast(Document, docstore.mget(["key1"])[0])
|
||||
assert fetched_doc.page_content == "hello"
|
||||
assert fetched_doc.metadata == {"key": "value"}
|
||||
|
||||
|
||||
def test_create_kv_store(file_store: LocalFileStore) -> None:
|
||||
"""Test that a docstore is created from a base store."""
|
||||
docstore = create_kv_docstore(file_store)
|
||||
docstore.mset([("key1", Document(page_content="hello", metadata={"key": "value"}))])
|
||||
fetched_doc = docstore.mget(["key1"])[0]
|
||||
assert isinstance(fetched_doc, Document)
|
||||
assert fetched_doc.page_content == "hello"
|
||||
assert fetched_doc.metadata == {"key": "value"}
|
Loading…
Reference in New Issue