mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Make document serializable, create utility to create a docstore (#9674)
This PR makes the following changes: 1. Documents become serializable using langhchain serialization 2. Make a utility to create a docstore kw store Will help to address issue here: https://github.com/langchain-ai/langchain/issues/9345
This commit is contained in:
parent
a28e888b36
commit
588237ef30
@ -17,6 +17,11 @@ class Document(Serializable):
|
|||||||
documents, etc.).
|
documents, etc.).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def lc_serializable(self) -> bool:
|
||||||
|
"""Return whether or not the class is serializable."""
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class BaseDocumentTransformer(ABC):
|
class BaseDocumentTransformer(ABC):
|
||||||
"""Abstract base class for document transformation systems.
|
"""Abstract base class for document transformation systems.
|
||||||
|
@ -6,6 +6,7 @@ to a simple key-value interface.
|
|||||||
The primary goal of these storages is to support implementation of caching.
|
The primary goal of these storages is to support implementation of caching.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from langchain.storage._lc_store import create_kv_docstore, create_lc_store
|
||||||
from langchain.storage.encoder_backed import EncoderBackedStore
|
from langchain.storage.encoder_backed import EncoderBackedStore
|
||||||
from langchain.storage.file_system import LocalFileStore
|
from langchain.storage.file_system import LocalFileStore
|
||||||
from langchain.storage.in_memory import InMemoryStore
|
from langchain.storage.in_memory import InMemoryStore
|
||||||
@ -16,4 +17,6 @@ __all__ = [
|
|||||||
"InMemoryStore",
|
"InMemoryStore",
|
||||||
"LocalFileStore",
|
"LocalFileStore",
|
||||||
"RedisStore",
|
"RedisStore",
|
||||||
|
"create_lc_store",
|
||||||
|
"create_kv_docstore",
|
||||||
]
|
]
|
||||||
|
88
libs/langchain/langchain/storage/_lc_store.py
Normal file
88
libs/langchain/langchain/storage/_lc_store.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
"""Create a key-value store for any langchain serializable object."""
|
||||||
|
from typing import Callable, Optional
|
||||||
|
|
||||||
|
from langchain.load.dump import dumps
|
||||||
|
from langchain.load.load import loads
|
||||||
|
from langchain.load.serializable import Serializable
|
||||||
|
from langchain.schema import BaseStore, Document
|
||||||
|
from langchain.storage.encoder_backed import EncoderBackedStore
|
||||||
|
|
||||||
|
|
||||||
|
def _dump_as_bytes(obj: Serializable) -> bytes:
|
||||||
|
"""Return a bytes representation of a document."""
|
||||||
|
return dumps(obj).encode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def _dump_document_as_bytes(obj: Document) -> bytes:
|
||||||
|
"""Return a bytes representation of a document."""
|
||||||
|
if not isinstance(obj, Document):
|
||||||
|
raise TypeError("Expected a Document instance")
|
||||||
|
return dumps(obj).encode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def _load_document_from_bytes(serialized: bytes) -> Document:
|
||||||
|
"""Return a document from a bytes representation."""
|
||||||
|
obj = loads(serialized.decode("utf-8"))
|
||||||
|
if not isinstance(obj, Document):
|
||||||
|
raise TypeError(f"Expected a Document instance. Got {type(obj)}")
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
def _load_from_bytes(serialized: bytes) -> Serializable:
|
||||||
|
"""Return a document from a bytes representation."""
|
||||||
|
return loads(serialized.decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def _identity(x: str) -> str:
|
||||||
|
"""Return the same object."""
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
# PUBLIC API
|
||||||
|
|
||||||
|
|
||||||
|
def create_lc_store(
|
||||||
|
store: BaseStore[str, bytes],
|
||||||
|
*,
|
||||||
|
key_encoder: Optional[Callable[[str], str]] = None,
|
||||||
|
) -> BaseStore[str, Serializable]:
|
||||||
|
"""Create a store for langchain serializable objects from a bytes store.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
store: A bytes store to use as the underlying store.
|
||||||
|
key_encoder: A function to encode keys; if None uses identity function.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A key-value store for documents.
|
||||||
|
"""
|
||||||
|
return EncoderBackedStore(
|
||||||
|
store,
|
||||||
|
key_encoder or _identity,
|
||||||
|
_dump_as_bytes,
|
||||||
|
_load_from_bytes,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_kv_docstore(
|
||||||
|
store: BaseStore[str, bytes],
|
||||||
|
*,
|
||||||
|
key_encoder: Optional[Callable[[str], str]] = None,
|
||||||
|
) -> BaseStore[str, Document]:
|
||||||
|
"""Create a store for langchain Document objects from a bytes store.
|
||||||
|
|
||||||
|
This store does run time type checking to ensure that the values are
|
||||||
|
Document objects.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
store: A bytes store to use as the underlying store.
|
||||||
|
key_encoder: A function to encode keys; if None uses identity function.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A key-value store for documents.
|
||||||
|
"""
|
||||||
|
return EncoderBackedStore(
|
||||||
|
store,
|
||||||
|
key_encoder or _identity,
|
||||||
|
_dump_document_as_bytes,
|
||||||
|
_load_document_from_bytes,
|
||||||
|
)
|
36
libs/langchain/tests/unit_tests/storage/test_lc_store.py
Normal file
36
libs/langchain/tests/unit_tests/storage/test_lc_store.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import tempfile
|
||||||
|
from typing import Generator, cast
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.schema import Document
|
||||||
|
from langchain.storage._lc_store import create_kv_docstore, create_lc_store
|
||||||
|
from langchain.storage.file_system import LocalFileStore
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def file_store() -> Generator[LocalFileStore, None, None]:
|
||||||
|
# Create a temporary directory for testing
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
# Instantiate the LocalFileStore with the temporary directory as the root path
|
||||||
|
store = LocalFileStore(temp_dir)
|
||||||
|
yield store
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_lc_store(file_store: LocalFileStore) -> None:
|
||||||
|
"""Test that a docstore is created from a base store."""
|
||||||
|
docstore = create_lc_store(file_store)
|
||||||
|
docstore.mset([("key1", Document(page_content="hello", metadata={"key": "value"}))])
|
||||||
|
fetched_doc = cast(Document, docstore.mget(["key1"])[0])
|
||||||
|
assert fetched_doc.page_content == "hello"
|
||||||
|
assert fetched_doc.metadata == {"key": "value"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_kv_store(file_store: LocalFileStore) -> None:
|
||||||
|
"""Test that a docstore is created from a base store."""
|
||||||
|
docstore = create_kv_docstore(file_store)
|
||||||
|
docstore.mset([("key1", Document(page_content="hello", metadata={"key": "value"}))])
|
||||||
|
fetched_doc = docstore.mget(["key1"])[0]
|
||||||
|
assert isinstance(fetched_doc, Document)
|
||||||
|
assert fetched_doc.page_content == "hello"
|
||||||
|
assert fetched_doc.metadata == {"key": "value"}
|
Loading…
Reference in New Issue
Block a user