mirror of https://github.com/hwchase17/langchain
Add base storage interface, 2 implementations and utility encoder (#8895)
This PR defines an abstract interface for key value stores. It provides 2 implementations: 1. Local File System 2. In memory -- used to facilitate testing It also provides an encoder utility to help take care of serialization from arbitrary data to data that can be stored by the given storewfh/async_eval_default
parent
7543a3d70e
commit
15f650ae8c
@ -0,0 +1,2 @@
|
||||
class LangChainException(Exception):
|
||||
"""General LangChain exception."""
|
@ -0,0 +1,53 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Generic, Iterator, List, Optional, Sequence, Tuple, TypeVar, Union
|
||||
|
||||
K = TypeVar("K")
|
||||
V = TypeVar("V")
|
||||
|
||||
|
||||
class BaseStore(Generic[K, V], ABC):
|
||||
"""Abstract interface for a key-value store."""
|
||||
|
||||
@abstractmethod
|
||||
def mget(self, keys: Sequence[K]) -> List[Optional[V]]:
|
||||
"""Get the values associated with the given keys.
|
||||
|
||||
Args:
|
||||
keys (Sequence[K]): A sequence of keys.
|
||||
|
||||
Returns:
|
||||
A sequence of optional values associated with the keys.
|
||||
If a key is not found, the corresponding value will be None.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def mset(self, key_value_pairs: Sequence[Tuple[K, V]]) -> None:
|
||||
"""Set the values for the given keys.
|
||||
|
||||
Args:
|
||||
key_value_pairs (Sequence[Tuple[K, V]]): A sequence of key-value pairs.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def mdelete(self, keys: Sequence[K]) -> None:
|
||||
"""Delete the given keys and their associated values.
|
||||
|
||||
Args:
|
||||
keys (Sequence[K]): A sequence of keys to delete.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def yield_keys(
|
||||
self, *, prefix: Optional[str] = None
|
||||
) -> Union[Iterator[K], Iterator[str]]:
|
||||
"""Get an iterator over keys that match the given prefix.
|
||||
|
||||
Args:
|
||||
prefix (str): The prefix to match.
|
||||
|
||||
Returns:
|
||||
Iterator[K | str]: An iterator over keys that match the given prefix.
|
||||
|
||||
This method is allowed to return an iterator over either K or str
|
||||
depending on what makes more sense for the given store.
|
||||
"""
|
@ -0,0 +1,17 @@
|
||||
"""Implementations of key-value stores and storage helpers.
|
||||
|
||||
Module provides implementations of various key-value stores that conform
|
||||
to a simple key-value interface.
|
||||
|
||||
The primary goal of these storages is to support implementation of caching.
|
||||
"""
|
||||
|
||||
from langchain.storage.encoder_backed import EncoderBackedStore
|
||||
from langchain.storage.file_system import LocalFileStore
|
||||
from langchain.storage.in_memory import InMemoryStore
|
||||
|
||||
__all__ = [
|
||||
"EncoderBackedStore",
|
||||
"LocalFileStore",
|
||||
"InMemoryStore",
|
||||
]
|
@ -0,0 +1,95 @@
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
from langchain.schema import BaseStore
|
||||
|
||||
K = TypeVar("K")
|
||||
V = TypeVar("V")
|
||||
|
||||
|
||||
class EncoderBackedStore(BaseStore[K, V]):
|
||||
"""Wraps a store with key and value encoders/decoders.
|
||||
|
||||
Examples that uses JSON for encoding/decoding:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import json
|
||||
|
||||
def key_encoder(key: int) -> str:
|
||||
return json.dumps(key)
|
||||
|
||||
def value_serializer(value: float) -> str:
|
||||
return json.dumps(value)
|
||||
|
||||
def value_deserializer(serialized_value: str) -> float:
|
||||
return json.loads(serialized_value)
|
||||
|
||||
# Create an instance of the abstract store
|
||||
abstract_store = MyCustomStore()
|
||||
|
||||
# Create an instance of the encoder-backed store
|
||||
store = EncoderBackedStore(
|
||||
store=abstract_store,
|
||||
key_encoder=key_encoder,
|
||||
value_serializer=value_serializer,
|
||||
value_deserializer=value_deserializer
|
||||
)
|
||||
|
||||
# Use the encoder-backed store methods
|
||||
store.mset([(1, 3.14), (2, 2.718)])
|
||||
values = store.mget([1, 2]) # Retrieves [3.14, 2.718]
|
||||
store.mdelete([1, 2]) # Deletes the keys 1 and 2
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: BaseStore[str, Any],
|
||||
key_encoder: Callable[[K], str],
|
||||
value_serializer: Callable[[V], bytes],
|
||||
value_deserializer: Callable[[Any], V],
|
||||
) -> None:
|
||||
"""Initialize an EncodedStore."""
|
||||
self.store = store
|
||||
self.key_encoder = key_encoder
|
||||
self.value_serializer = value_serializer
|
||||
self.value_deserializer = value_deserializer
|
||||
|
||||
def mget(self, keys: Sequence[K]) -> List[Optional[V]]:
|
||||
"""Get the values associated with the given keys."""
|
||||
encoded_keys: List[str] = [self.key_encoder(key) for key in keys]
|
||||
values = self.store.mget(encoded_keys)
|
||||
return [
|
||||
self.value_deserializer(value) if value is not None else value
|
||||
for value in values
|
||||
]
|
||||
|
||||
def mset(self, key_value_pairs: Sequence[Tuple[K, V]]) -> None:
|
||||
"""Set the values for the given keys."""
|
||||
encoded_pairs = [
|
||||
(self.key_encoder(key), self.value_serializer(value))
|
||||
for key, value in key_value_pairs
|
||||
]
|
||||
self.store.mset(encoded_pairs)
|
||||
|
||||
def mdelete(self, keys: Sequence[K]) -> None:
|
||||
"""Delete the given keys and their associated values."""
|
||||
encoded_keys = [self.key_encoder(key) for key in keys]
|
||||
self.store.mdelete(encoded_keys)
|
||||
|
||||
def yield_keys(
|
||||
self, *, prefix: Optional[str] = None
|
||||
) -> Union[Iterator[K], Iterator[str]]:
|
||||
"""Get an iterator over keys that match the given prefix."""
|
||||
# For the time being this does not return K, but str
|
||||
# it's for debugging purposes. Should fix this.
|
||||
yield from self.store.yield_keys(prefix=prefix)
|
@ -0,0 +1,5 @@
|
||||
from langchain.schema import LangChainException
|
||||
|
||||
|
||||
class InvalidKeyException(LangChainException):
|
||||
"""Raised when a key is invalid; e.g., uses incorrect characters."""
|
@ -0,0 +1,120 @@
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Iterator, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
from langchain.schema import BaseStore
|
||||
from langchain.storage.exceptions import InvalidKeyException
|
||||
|
||||
|
||||
class LocalFileStore(BaseStore[str, bytes]):
|
||||
"""BaseStore interface that works on the local file system.
|
||||
|
||||
Examples:
|
||||
Create a LocalFileStore instance and perform operations on it:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.storage import LocalFileStore
|
||||
|
||||
# Instantiate the LocalFileStore with the root path
|
||||
file_store = LocalFileStore("/path/to/root")
|
||||
|
||||
# Set values for keys
|
||||
file_store.mset([("key1", b"value1"), ("key2", b"value2")])
|
||||
|
||||
# Get values for keys
|
||||
values = file_store.mget(["key1", "key2"]) # Returns [b"value1", b"value2"]
|
||||
|
||||
# Delete keys
|
||||
file_store.mdelete(["key1"])
|
||||
|
||||
# Iterate over keys
|
||||
for key in file_store.yield_keys():
|
||||
print(key)
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, root_path: Union[str, Path]) -> None:
|
||||
"""Implement the BaseStore interface for the local file system.
|
||||
|
||||
Args:
|
||||
root_path (Union[str, Path]): The root path of the file store. All keys are
|
||||
interpreted as paths relative to this root.
|
||||
"""
|
||||
self.root_path = Path(root_path)
|
||||
|
||||
def _get_full_path(self, key: str) -> Path:
|
||||
"""Get the full path for a given key relative to the root path.
|
||||
|
||||
Args:
|
||||
key (str): The key relative to the root path.
|
||||
|
||||
Returns:
|
||||
Path: The full path for the given key.
|
||||
"""
|
||||
if not re.match(r"^[a-zA-Z0-9_.\-/]+$", key):
|
||||
raise InvalidKeyException(f"Invalid characters in key: {key}")
|
||||
return self.root_path / key
|
||||
|
||||
def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]:
|
||||
"""Get the values associated with the given keys.
|
||||
|
||||
Args:
|
||||
keys: A sequence of keys.
|
||||
|
||||
Returns:
|
||||
A sequence of optional values associated with the keys.
|
||||
If a key is not found, the corresponding value will be None.
|
||||
"""
|
||||
values: List[Optional[bytes]] = []
|
||||
for key in keys:
|
||||
full_path = self._get_full_path(key)
|
||||
if full_path.exists():
|
||||
value = full_path.read_bytes()
|
||||
values.append(value)
|
||||
else:
|
||||
values.append(None)
|
||||
return values
|
||||
|
||||
def mset(self, key_value_pairs: Sequence[Tuple[str, bytes]]) -> None:
|
||||
"""Set the values for the given keys.
|
||||
|
||||
Args:
|
||||
key_value_pairs: A sequence of key-value pairs.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
for key, value in key_value_pairs:
|
||||
full_path = self._get_full_path(key)
|
||||
full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
full_path.write_bytes(value)
|
||||
|
||||
def mdelete(self, keys: Sequence[str]) -> None:
|
||||
"""Delete the given keys and their associated values.
|
||||
|
||||
Args:
|
||||
keys (Sequence[str]): A sequence of keys to delete.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
for key in keys:
|
||||
full_path = self._get_full_path(key)
|
||||
if full_path.exists():
|
||||
full_path.unlink()
|
||||
|
||||
def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]:
|
||||
"""Get an iterator over keys that match the given prefix.
|
||||
|
||||
Args:
|
||||
prefix (Optional[str]): The prefix to match.
|
||||
|
||||
Returns:
|
||||
Iterator[str]: An iterator over keys that match the given prefix.
|
||||
"""
|
||||
prefix_path = self._get_full_path(prefix) if prefix else self.root_path
|
||||
for file in prefix_path.rglob("*"):
|
||||
if file.is_file():
|
||||
relative_path = file.relative_to(self.root_path)
|
||||
yield str(relative_path)
|
@ -0,0 +1,85 @@
|
||||
"""In memory store that is not thread safe and has no eviction policy.
|
||||
|
||||
This is a simple implementation of the BaseStore using a dictionary that is useful
|
||||
primarily for unit testing purposes.
|
||||
"""
|
||||
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
|
||||
|
||||
from langchain.schema import BaseStore
|
||||
|
||||
|
||||
class InMemoryStore(BaseStore[str, Any]):
|
||||
"""In-memory implementation of the BaseStore using a dictionary.
|
||||
|
||||
Attributes:
|
||||
store (Dict[str, Any]): The underlying dictionary that stores
|
||||
the key-value pairs.
|
||||
|
||||
Examples:
|
||||
... code-block:: python
|
||||
|
||||
from langchain.storage import InMemoryStore
|
||||
|
||||
store = InMemoryStore()
|
||||
store.mset([('key1', 'value1'), ('key2', 'value2')])
|
||||
store.mget(['key1', 'key2'])
|
||||
# ['value1', 'value2']
|
||||
store.mdelete(['key1'])
|
||||
list(store.yield_keys())
|
||||
# ['key2']
|
||||
list(store.yield_keys(prefix='k'))
|
||||
# ['key2']
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize an empty store."""
|
||||
self.store: Dict[str, Any] = {}
|
||||
|
||||
def mget(self, keys: Sequence[str]) -> List[Optional[Any]]:
|
||||
"""Get the values associated with the given keys.
|
||||
|
||||
Args:
|
||||
keys (Sequence[str]): A sequence of keys.
|
||||
|
||||
Returns:
|
||||
A sequence of optional values associated with the keys.
|
||||
If a key is not found, the corresponding value will be None.
|
||||
"""
|
||||
return [self.store.get(key) for key in keys]
|
||||
|
||||
def mset(self, key_value_pairs: Sequence[Tuple[str, Any]]) -> None:
|
||||
"""Set the values for the given keys.
|
||||
|
||||
Args:
|
||||
key_value_pairs (Sequence[Tuple[str, V]]): A sequence of key-value pairs.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
for key, value in key_value_pairs:
|
||||
self.store[key] = value
|
||||
|
||||
def mdelete(self, keys: Sequence[str]) -> None:
|
||||
"""Delete the given keys and their associated values.
|
||||
|
||||
Args:
|
||||
keys (Sequence[str]): A sequence of keys to delete.
|
||||
"""
|
||||
for key in keys:
|
||||
self.store.pop(key, None)
|
||||
|
||||
def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]:
|
||||
"""Get an iterator over keys that match the given prefix.
|
||||
|
||||
Args:
|
||||
prefix (str, optional): The prefix to match. Defaults to None.
|
||||
|
||||
Returns:
|
||||
Iterator[str]: An iterator over keys that match the given prefix.
|
||||
"""
|
||||
if prefix is None:
|
||||
yield from self.store.keys()
|
||||
else:
|
||||
for key in self.store.keys():
|
||||
if key.startswith(prefix):
|
||||
yield key
|
@ -0,0 +1,78 @@
|
||||
import tempfile
|
||||
from typing import Generator
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.storage.exceptions import InvalidKeyException
|
||||
from langchain.storage.file_system import LocalFileStore
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def file_store() -> Generator[LocalFileStore, None, None]:
|
||||
# Create a temporary directory for testing
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Instantiate the LocalFileStore with the temporary directory as the root path
|
||||
store = LocalFileStore(temp_dir)
|
||||
yield store
|
||||
|
||||
|
||||
def test_mset_and_mget(file_store: LocalFileStore) -> None:
|
||||
# Set values for keys
|
||||
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]
|
||||
file_store.mset(key_value_pairs)
|
||||
|
||||
# Get values for keys
|
||||
values = file_store.mget(["key1", "key2"])
|
||||
|
||||
# Assert that the retrieved values match the original values
|
||||
assert values == [b"value1", b"value2"]
|
||||
|
||||
|
||||
def test_mdelete(file_store: LocalFileStore) -> None:
|
||||
# Set values for keys
|
||||
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]
|
||||
file_store.mset(key_value_pairs)
|
||||
|
||||
# Delete keys
|
||||
file_store.mdelete(["key1"])
|
||||
|
||||
# Check if the deleted key is present
|
||||
values = file_store.mget(["key1"])
|
||||
|
||||
# Assert that the value is None after deletion
|
||||
assert values == [None]
|
||||
|
||||
|
||||
def test_set_invalid_key(file_store: LocalFileStore) -> None:
|
||||
"""Test that an exception is raised when an invalid key is set."""
|
||||
# Set a key-value pair
|
||||
key = "crying-cat/😿"
|
||||
value = b"This is a test value"
|
||||
with pytest.raises(InvalidKeyException):
|
||||
file_store.mset([(key, value)])
|
||||
|
||||
|
||||
def test_set_key_and_verify_content(file_store: LocalFileStore) -> None:
|
||||
"""Test that the content of the file is the same as the value set."""
|
||||
# Set a key-value pair
|
||||
key = "test_key"
|
||||
value = b"This is a test value"
|
||||
file_store.mset([(key, value)])
|
||||
|
||||
# Verify the content of the actual file
|
||||
full_path = file_store._get_full_path(key)
|
||||
assert full_path.exists()
|
||||
assert full_path.read_bytes() == b"This is a test value"
|
||||
|
||||
|
||||
def test_yield_keys(file_store: LocalFileStore) -> None:
|
||||
# Set values for keys
|
||||
key_value_pairs = [("key1", b"value1"), ("subdir/key2", b"value2")]
|
||||
file_store.mset(key_value_pairs)
|
||||
|
||||
# Iterate over keys
|
||||
keys = list(file_store.yield_keys())
|
||||
|
||||
# Assert that the yielded keys match the expected keys
|
||||
expected_keys = ["key1", "subdir/key2"]
|
||||
assert keys == expected_keys
|
@ -0,0 +1,48 @@
|
||||
from langchain.storage.in_memory import InMemoryStore
|
||||
|
||||
|
||||
def test_mget() -> None:
|
||||
store = InMemoryStore()
|
||||
store.mset([("key1", "value1"), ("key2", "value2")])
|
||||
|
||||
values = store.mget(["key1", "key2"])
|
||||
assert values == ["value1", "value2"]
|
||||
|
||||
# Test non-existent key
|
||||
non_existent_value = store.mget(["key3"])
|
||||
assert non_existent_value == [None]
|
||||
|
||||
|
||||
def test_mset() -> None:
|
||||
store = InMemoryStore()
|
||||
store.mset([("key1", "value1"), ("key2", "value2")])
|
||||
|
||||
values = store.mget(["key1", "key2"])
|
||||
assert values == ["value1", "value2"]
|
||||
|
||||
|
||||
def test_mdelete() -> None:
|
||||
store = InMemoryStore()
|
||||
store.mset([("key1", "value1"), ("key2", "value2")])
|
||||
|
||||
store.mdelete(["key1"])
|
||||
|
||||
values = store.mget(["key1", "key2"])
|
||||
assert values == [None, "value2"]
|
||||
|
||||
# Test deleting non-existent key
|
||||
store.mdelete(["key3"]) # No error should be raised
|
||||
|
||||
|
||||
def test_yield_keys() -> None:
|
||||
store = InMemoryStore()
|
||||
store.mset([("key1", "value1"), ("key2", "value2"), ("key3", "value3")])
|
||||
|
||||
keys = list(store.yield_keys())
|
||||
assert set(keys) == {"key1", "key2", "key3"}
|
||||
|
||||
keys_with_prefix = list(store.yield_keys(prefix="key"))
|
||||
assert set(keys_with_prefix) == {"key1", "key2", "key3"}
|
||||
|
||||
keys_with_invalid_prefix = list(store.yield_keys(prefix="x"))
|
||||
assert keys_with_invalid_prefix == []
|
Loading…
Reference in New Issue