diff --git a/libs/langchain/langchain/schema/__init__.py b/libs/langchain/langchain/schema/__init__.py index bba898f0bc..15557f5f3c 100644 --- a/libs/langchain/langchain/schema/__init__.py +++ b/libs/langchain/langchain/schema/__init__.py @@ -1,6 +1,7 @@ """**Schemas** are the LangChain Base Classes and Interfaces.""" from langchain.schema.agent import AgentAction, AgentFinish from langchain.schema.document import BaseDocumentTransformer, Document +from langchain.schema.exceptions import LangChainException from langchain.schema.memory import BaseChatMessageHistory, BaseMemory from langchain.schema.messages import ( AIMessage, @@ -31,12 +32,14 @@ from langchain.schema.output_parser import ( from langchain.schema.prompt import PromptValue from langchain.schema.prompt_template import BasePromptTemplate, format_document from langchain.schema.retriever import BaseRetriever +from langchain.schema.storage import BaseStore RUN_KEY = "__run" Memory = BaseMemory __all__ = [ "BaseMemory", + "BaseStore", "BaseChatMessageHistory", "AgentFinish", "AgentAction", @@ -59,6 +62,7 @@ __all__ = [ "ChatGeneration", "Generation", "PromptValue", + "LangChainException", "BaseRetriever", "RUN_KEY", "Memory", diff --git a/libs/langchain/langchain/schema/exceptions.py b/libs/langchain/langchain/schema/exceptions.py new file mode 100644 index 0000000000..27ed0d07dc --- /dev/null +++ b/libs/langchain/langchain/schema/exceptions.py @@ -0,0 +1,2 @@ +class LangChainException(Exception): + """General LangChain exception.""" diff --git a/libs/langchain/langchain/schema/storage.py b/libs/langchain/langchain/schema/storage.py new file mode 100644 index 0000000000..bae5adc2b8 --- /dev/null +++ b/libs/langchain/langchain/schema/storage.py @@ -0,0 +1,53 @@ +from abc import ABC, abstractmethod +from typing import Generic, Iterator, List, Optional, Sequence, Tuple, TypeVar, Union + +K = TypeVar("K") +V = TypeVar("V") + + +class BaseStore(Generic[K, V], ABC): + """Abstract interface for a key-value store.""" + + @abstractmethod + def mget(self, keys: Sequence[K]) -> List[Optional[V]]: + """Get the values associated with the given keys. + + Args: + keys (Sequence[K]): A sequence of keys. + + Returns: + A sequence of optional values associated with the keys. + If a key is not found, the corresponding value will be None. + """ + + @abstractmethod + def mset(self, key_value_pairs: Sequence[Tuple[K, V]]) -> None: + """Set the values for the given keys. + + Args: + key_value_pairs (Sequence[Tuple[K, V]]): A sequence of key-value pairs. + """ + + @abstractmethod + def mdelete(self, keys: Sequence[K]) -> None: + """Delete the given keys and their associated values. + + Args: + keys (Sequence[K]): A sequence of keys to delete. + """ + + @abstractmethod + def yield_keys( + self, *, prefix: Optional[str] = None + ) -> Union[Iterator[K], Iterator[str]]: + """Get an iterator over keys that match the given prefix. + + Args: + prefix (str): The prefix to match. + + Returns: + Iterator[K | str]: An iterator over keys that match the given prefix. + + This method is allowed to return an iterator over either K or str + depending on what makes more sense for the given store. + """ diff --git a/libs/langchain/langchain/storage/__init__.py b/libs/langchain/langchain/storage/__init__.py new file mode 100644 index 0000000000..6b0511aba3 --- /dev/null +++ b/libs/langchain/langchain/storage/__init__.py @@ -0,0 +1,17 @@ +"""Implementations of key-value stores and storage helpers. + +Module provides implementations of various key-value stores that conform +to a simple key-value interface. + +The primary goal of these storages is to support implementation of caching. +""" + +from langchain.storage.encoder_backed import EncoderBackedStore +from langchain.storage.file_system import LocalFileStore +from langchain.storage.in_memory import InMemoryStore + +__all__ = [ + "EncoderBackedStore", + "LocalFileStore", + "InMemoryStore", +] diff --git a/libs/langchain/langchain/storage/encoder_backed.py b/libs/langchain/langchain/storage/encoder_backed.py new file mode 100644 index 0000000000..4a713cc391 --- /dev/null +++ b/libs/langchain/langchain/storage/encoder_backed.py @@ -0,0 +1,95 @@ +from typing import ( + Any, + Callable, + Iterator, + List, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) + +from langchain.schema import BaseStore + +K = TypeVar("K") +V = TypeVar("V") + + +class EncoderBackedStore(BaseStore[K, V]): + """Wraps a store with key and value encoders/decoders. + + Examples that uses JSON for encoding/decoding: + + .. code-block:: python + + import json + + def key_encoder(key: int) -> str: + return json.dumps(key) + + def value_serializer(value: float) -> str: + return json.dumps(value) + + def value_deserializer(serialized_value: str) -> float: + return json.loads(serialized_value) + + # Create an instance of the abstract store + abstract_store = MyCustomStore() + + # Create an instance of the encoder-backed store + store = EncoderBackedStore( + store=abstract_store, + key_encoder=key_encoder, + value_serializer=value_serializer, + value_deserializer=value_deserializer + ) + + # Use the encoder-backed store methods + store.mset([(1, 3.14), (2, 2.718)]) + values = store.mget([1, 2]) # Retrieves [3.14, 2.718] + store.mdelete([1, 2]) # Deletes the keys 1 and 2 + """ + + def __init__( + self, + store: BaseStore[str, Any], + key_encoder: Callable[[K], str], + value_serializer: Callable[[V], bytes], + value_deserializer: Callable[[Any], V], + ) -> None: + """Initialize an EncodedStore.""" + self.store = store + self.key_encoder = key_encoder + self.value_serializer = value_serializer + self.value_deserializer = value_deserializer + + def mget(self, keys: Sequence[K]) -> List[Optional[V]]: + """Get the values associated with the given keys.""" + encoded_keys: List[str] = [self.key_encoder(key) for key in keys] + values = self.store.mget(encoded_keys) + return [ + self.value_deserializer(value) if value is not None else value + for value in values + ] + + def mset(self, key_value_pairs: Sequence[Tuple[K, V]]) -> None: + """Set the values for the given keys.""" + encoded_pairs = [ + (self.key_encoder(key), self.value_serializer(value)) + for key, value in key_value_pairs + ] + self.store.mset(encoded_pairs) + + def mdelete(self, keys: Sequence[K]) -> None: + """Delete the given keys and their associated values.""" + encoded_keys = [self.key_encoder(key) for key in keys] + self.store.mdelete(encoded_keys) + + def yield_keys( + self, *, prefix: Optional[str] = None + ) -> Union[Iterator[K], Iterator[str]]: + """Get an iterator over keys that match the given prefix.""" + # For the time being this does not return K, but str + # it's for debugging purposes. Should fix this. + yield from self.store.yield_keys(prefix=prefix) diff --git a/libs/langchain/langchain/storage/exceptions.py b/libs/langchain/langchain/storage/exceptions.py new file mode 100644 index 0000000000..2f36a7615c --- /dev/null +++ b/libs/langchain/langchain/storage/exceptions.py @@ -0,0 +1,5 @@ +from langchain.schema import LangChainException + + +class InvalidKeyException(LangChainException): + """Raised when a key is invalid; e.g., uses incorrect characters.""" diff --git a/libs/langchain/langchain/storage/file_system.py b/libs/langchain/langchain/storage/file_system.py new file mode 100644 index 0000000000..d197497d8a --- /dev/null +++ b/libs/langchain/langchain/storage/file_system.py @@ -0,0 +1,120 @@ +import re +from pathlib import Path +from typing import Iterator, List, Optional, Sequence, Tuple, Union + +from langchain.schema import BaseStore +from langchain.storage.exceptions import InvalidKeyException + + +class LocalFileStore(BaseStore[str, bytes]): + """BaseStore interface that works on the local file system. + + Examples: + Create a LocalFileStore instance and perform operations on it: + + .. code-block:: python + + from langchain.storage import LocalFileStore + + # Instantiate the LocalFileStore with the root path + file_store = LocalFileStore("/path/to/root") + + # Set values for keys + file_store.mset([("key1", b"value1"), ("key2", b"value2")]) + + # Get values for keys + values = file_store.mget(["key1", "key2"]) # Returns [b"value1", b"value2"] + + # Delete keys + file_store.mdelete(["key1"]) + + # Iterate over keys + for key in file_store.yield_keys(): + print(key) + + """ + + def __init__(self, root_path: Union[str, Path]) -> None: + """Implement the BaseStore interface for the local file system. + + Args: + root_path (Union[str, Path]): The root path of the file store. All keys are + interpreted as paths relative to this root. + """ + self.root_path = Path(root_path) + + def _get_full_path(self, key: str) -> Path: + """Get the full path for a given key relative to the root path. + + Args: + key (str): The key relative to the root path. + + Returns: + Path: The full path for the given key. + """ + if not re.match(r"^[a-zA-Z0-9_.\-/]+$", key): + raise InvalidKeyException(f"Invalid characters in key: {key}") + return self.root_path / key + + def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]: + """Get the values associated with the given keys. + + Args: + keys: A sequence of keys. + + Returns: + A sequence of optional values associated with the keys. + If a key is not found, the corresponding value will be None. + """ + values: List[Optional[bytes]] = [] + for key in keys: + full_path = self._get_full_path(key) + if full_path.exists(): + value = full_path.read_bytes() + values.append(value) + else: + values.append(None) + return values + + def mset(self, key_value_pairs: Sequence[Tuple[str, bytes]]) -> None: + """Set the values for the given keys. + + Args: + key_value_pairs: A sequence of key-value pairs. + + Returns: + None + """ + for key, value in key_value_pairs: + full_path = self._get_full_path(key) + full_path.parent.mkdir(parents=True, exist_ok=True) + full_path.write_bytes(value) + + def mdelete(self, keys: Sequence[str]) -> None: + """Delete the given keys and their associated values. + + Args: + keys (Sequence[str]): A sequence of keys to delete. + + Returns: + None + """ + for key in keys: + full_path = self._get_full_path(key) + if full_path.exists(): + full_path.unlink() + + def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]: + """Get an iterator over keys that match the given prefix. + + Args: + prefix (Optional[str]): The prefix to match. + + Returns: + Iterator[str]: An iterator over keys that match the given prefix. + """ + prefix_path = self._get_full_path(prefix) if prefix else self.root_path + for file in prefix_path.rglob("*"): + if file.is_file(): + relative_path = file.relative_to(self.root_path) + yield str(relative_path) diff --git a/libs/langchain/langchain/storage/in_memory.py b/libs/langchain/langchain/storage/in_memory.py new file mode 100644 index 0000000000..906d08e853 --- /dev/null +++ b/libs/langchain/langchain/storage/in_memory.py @@ -0,0 +1,85 @@ +"""In memory store that is not thread safe and has no eviction policy. + +This is a simple implementation of the BaseStore using a dictionary that is useful +primarily for unit testing purposes. +""" +from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple + +from langchain.schema import BaseStore + + +class InMemoryStore(BaseStore[str, Any]): + """In-memory implementation of the BaseStore using a dictionary. + + Attributes: + store (Dict[str, Any]): The underlying dictionary that stores + the key-value pairs. + + Examples: + ... code-block:: python + + from langchain.storage import InMemoryStore + + store = InMemoryStore() + store.mset([('key1', 'value1'), ('key2', 'value2')]) + store.mget(['key1', 'key2']) + # ['value1', 'value2'] + store.mdelete(['key1']) + list(store.yield_keys()) + # ['key2'] + list(store.yield_keys(prefix='k')) + # ['key2'] + """ + + def __init__(self) -> None: + """Initialize an empty store.""" + self.store: Dict[str, Any] = {} + + def mget(self, keys: Sequence[str]) -> List[Optional[Any]]: + """Get the values associated with the given keys. + + Args: + keys (Sequence[str]): A sequence of keys. + + Returns: + A sequence of optional values associated with the keys. + If a key is not found, the corresponding value will be None. + """ + return [self.store.get(key) for key in keys] + + def mset(self, key_value_pairs: Sequence[Tuple[str, Any]]) -> None: + """Set the values for the given keys. + + Args: + key_value_pairs (Sequence[Tuple[str, V]]): A sequence of key-value pairs. + + Returns: + None + """ + for key, value in key_value_pairs: + self.store[key] = value + + def mdelete(self, keys: Sequence[str]) -> None: + """Delete the given keys and their associated values. + + Args: + keys (Sequence[str]): A sequence of keys to delete. + """ + for key in keys: + self.store.pop(key, None) + + def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]: + """Get an iterator over keys that match the given prefix. + + Args: + prefix (str, optional): The prefix to match. Defaults to None. + + Returns: + Iterator[str]: An iterator over keys that match the given prefix. + """ + if prefix is None: + yield from self.store.keys() + else: + for key in self.store.keys(): + if key.startswith(prefix): + yield key diff --git a/libs/langchain/tests/unit_tests/storage/__init__.py b/libs/langchain/tests/unit_tests/storage/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/langchain/tests/unit_tests/storage/test_filesystem.py b/libs/langchain/tests/unit_tests/storage/test_filesystem.py new file mode 100644 index 0000000000..25ba3866bf --- /dev/null +++ b/libs/langchain/tests/unit_tests/storage/test_filesystem.py @@ -0,0 +1,78 @@ +import tempfile +from typing import Generator + +import pytest + +from langchain.storage.exceptions import InvalidKeyException +from langchain.storage.file_system import LocalFileStore + + +@pytest.fixture +def file_store() -> Generator[LocalFileStore, None, None]: + # Create a temporary directory for testing + with tempfile.TemporaryDirectory() as temp_dir: + # Instantiate the LocalFileStore with the temporary directory as the root path + store = LocalFileStore(temp_dir) + yield store + + +def test_mset_and_mget(file_store: LocalFileStore) -> None: + # Set values for keys + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] + file_store.mset(key_value_pairs) + + # Get values for keys + values = file_store.mget(["key1", "key2"]) + + # Assert that the retrieved values match the original values + assert values == [b"value1", b"value2"] + + +def test_mdelete(file_store: LocalFileStore) -> None: + # Set values for keys + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] + file_store.mset(key_value_pairs) + + # Delete keys + file_store.mdelete(["key1"]) + + # Check if the deleted key is present + values = file_store.mget(["key1"]) + + # Assert that the value is None after deletion + assert values == [None] + + +def test_set_invalid_key(file_store: LocalFileStore) -> None: + """Test that an exception is raised when an invalid key is set.""" + # Set a key-value pair + key = "crying-cat/😿" + value = b"This is a test value" + with pytest.raises(InvalidKeyException): + file_store.mset([(key, value)]) + + +def test_set_key_and_verify_content(file_store: LocalFileStore) -> None: + """Test that the content of the file is the same as the value set.""" + # Set a key-value pair + key = "test_key" + value = b"This is a test value" + file_store.mset([(key, value)]) + + # Verify the content of the actual file + full_path = file_store._get_full_path(key) + assert full_path.exists() + assert full_path.read_bytes() == b"This is a test value" + + +def test_yield_keys(file_store: LocalFileStore) -> None: + # Set values for keys + key_value_pairs = [("key1", b"value1"), ("subdir/key2", b"value2")] + file_store.mset(key_value_pairs) + + # Iterate over keys + keys = list(file_store.yield_keys()) + + # Assert that the yielded keys match the expected keys + expected_keys = ["key1", "subdir/key2"] + assert keys == expected_keys diff --git a/libs/langchain/tests/unit_tests/storage/test_in_memory.py b/libs/langchain/tests/unit_tests/storage/test_in_memory.py new file mode 100644 index 0000000000..4e969a01dd --- /dev/null +++ b/libs/langchain/tests/unit_tests/storage/test_in_memory.py @@ -0,0 +1,48 @@ +from langchain.storage.in_memory import InMemoryStore + + +def test_mget() -> None: + store = InMemoryStore() + store.mset([("key1", "value1"), ("key2", "value2")]) + + values = store.mget(["key1", "key2"]) + assert values == ["value1", "value2"] + + # Test non-existent key + non_existent_value = store.mget(["key3"]) + assert non_existent_value == [None] + + +def test_mset() -> None: + store = InMemoryStore() + store.mset([("key1", "value1"), ("key2", "value2")]) + + values = store.mget(["key1", "key2"]) + assert values == ["value1", "value2"] + + +def test_mdelete() -> None: + store = InMemoryStore() + store.mset([("key1", "value1"), ("key2", "value2")]) + + store.mdelete(["key1"]) + + values = store.mget(["key1", "key2"]) + assert values == [None, "value2"] + + # Test deleting non-existent key + store.mdelete(["key3"]) # No error should be raised + + +def test_yield_keys() -> None: + store = InMemoryStore() + store.mset([("key1", "value1"), ("key2", "value2"), ("key3", "value3")]) + + keys = list(store.yield_keys()) + assert set(keys) == {"key1", "key2", "key3"} + + keys_with_prefix = list(store.yield_keys(prefix="key")) + assert set(keys_with_prefix) == {"key1", "key2", "key3"} + + keys_with_invalid_prefix = list(store.yield_keys(prefix="x")) + assert keys_with_invalid_prefix == []