From 3c064a757fcb7fd7253caaf748070d49f71e44a5 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Tue, 30 Apr 2024 13:14:26 -0400 Subject: [PATCH] core[minor],langchain[patch],community[patch]: Move storage interfaces to core (#20750) * Move storage interface to core * Move in memory and file system implementation to core --- .../langchain_community/storage/exceptions.py | 6 +- libs/core/langchain_core/stores.py | 138 ++++++++++++++++ libs/core/tests/unit_tests/stores/__init__.py | 0 .../unit_tests/stores}/test_in_memory.py | 2 +- libs/langchain/langchain/storage/__init__.py | 13 +- .../langchain/langchain/storage/exceptions.py | 2 +- libs/langchain/langchain/storage/in_memory.py | 152 +----------------- .../unit_tests/storage/test_filesystem.py | 2 +- .../tests/unit_tests/storage/test_imports.py | 1 + 9 files changed, 159 insertions(+), 157 deletions(-) create mode 100644 libs/core/tests/unit_tests/stores/__init__.py rename libs/{langchain/tests/unit_tests/storage => core/tests/unit_tests/stores}/test_in_memory.py (98%) diff --git a/libs/community/langchain_community/storage/exceptions.py b/libs/community/langchain_community/storage/exceptions.py index d7231de65c..82d7c8a2fa 100644 --- a/libs/community/langchain_community/storage/exceptions.py +++ b/libs/community/langchain_community/storage/exceptions.py @@ -1,5 +1,3 @@ -from langchain_core.exceptions import LangChainException +from langchain_core.stores import InvalidKeyException - -class InvalidKeyException(LangChainException): - """Raised when a key is invalid; e.g., uses incorrect characters.""" +__all__ = ["InvalidKeyException"] diff --git a/libs/core/langchain_core/stores.py b/libs/core/langchain_core/stores.py index 7695816ae1..f6283597a9 100644 --- a/libs/core/langchain_core/stores.py +++ b/libs/core/langchain_core/stores.py @@ -7,7 +7,9 @@ The primary goal of these storages is to support implementation of caching. """ from abc import ABC, abstractmethod from typing import ( + Any, AsyncIterator, + Dict, Generic, Iterator, List, @@ -18,6 +20,7 @@ from typing import ( Union, ) +from langchain_core.exceptions import LangChainException from langchain_core.runnables import run_in_executor K = TypeVar("K") @@ -123,3 +126,138 @@ class BaseStore(Generic[K, V], ABC): ByteStore = BaseStore[str, bytes] + + +class InMemoryBaseStore(BaseStore[str, V], Generic[V]): + """In-memory implementation of the BaseStore using a dictionary. + + Attributes: + store (Dict[str, Any]): The underlying dictionary that stores + the key-value pairs. + + Examples: + + .. code-block:: python + + from langchain.storage import InMemoryStore + + store = InMemoryStore() + store.mset([('key1', 'value1'), ('key2', 'value2')]) + store.mget(['key1', 'key2']) + # ['value1', 'value2'] + store.mdelete(['key1']) + list(store.yield_keys()) + # ['key2'] + list(store.yield_keys(prefix='k')) + # ['key2'] + """ + + def __init__(self) -> None: + """Initialize an empty store.""" + self.store: Dict[str, V] = {} + + def mget(self, keys: Sequence[str]) -> List[Optional[V]]: + """Get the values associated with the given keys. + + Args: + keys (Sequence[str]): A sequence of keys. + + Returns: + A sequence of optional values associated with the keys. + If a key is not found, the corresponding value will be None. + """ + return [self.store.get(key) for key in keys] + + async def amget(self, keys: Sequence[str]) -> List[Optional[V]]: + """Get the values associated with the given keys. + + Args: + keys (Sequence[str]): A sequence of keys. + + Returns: + A sequence of optional values associated with the keys. + If a key is not found, the corresponding value will be None. + """ + return self.mget(keys) + + def mset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: + """Set the values for the given keys. + + Args: + key_value_pairs (Sequence[Tuple[str, V]]): A sequence of key-value pairs. + + Returns: + None + """ + for key, value in key_value_pairs: + self.store[key] = value + + async def amset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: + """Set the values for the given keys. + + Args: + key_value_pairs (Sequence[Tuple[str, V]]): A sequence of key-value pairs. + + Returns: + None + """ + return self.mset(key_value_pairs) + + def mdelete(self, keys: Sequence[str]) -> None: + """Delete the given keys and their associated values. + + Args: + keys (Sequence[str]): A sequence of keys to delete. + """ + for key in keys: + if key in self.store: + del self.store[key] + + async def amdelete(self, keys: Sequence[str]) -> None: + """Delete the given keys and their associated values. + + Args: + keys (Sequence[str]): A sequence of keys to delete. + """ + self.mdelete(keys) + + def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]: + """Get an iterator over keys that match the given prefix. + + Args: + prefix (str, optional): The prefix to match. Defaults to None. + + Returns: + Iterator[str]: An iterator over keys that match the given prefix. + """ + if prefix is None: + yield from self.store.keys() + else: + for key in self.store.keys(): + if key.startswith(prefix): + yield key + + async def ayield_keys(self, prefix: Optional[str] = None) -> AsyncIterator[str]: + """Get an async iterator over keys that match the given prefix. + + Args: + prefix (str, optional): The prefix to match. Defaults to None. + + Returns: + AsyncIterator[str]: An async iterator over keys that match the given prefix. + """ + if prefix is None: + for key in self.store.keys(): + yield key + else: + for key in self.store.keys(): + if key.startswith(prefix): + yield key + + +InMemoryStore = InMemoryBaseStore[Any] +InMemoryByteStore = InMemoryBaseStore[bytes] + + +class InvalidKeyException(LangChainException): + """Raised when a key is invalid; e.g., uses incorrect characters.""" diff --git a/libs/core/tests/unit_tests/stores/__init__.py b/libs/core/tests/unit_tests/stores/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/langchain/tests/unit_tests/storage/test_in_memory.py b/libs/core/tests/unit_tests/stores/test_in_memory.py similarity index 98% rename from libs/langchain/tests/unit_tests/storage/test_in_memory.py rename to libs/core/tests/unit_tests/stores/test_in_memory.py index a12233b4f6..d664954f6b 100644 --- a/libs/langchain/tests/unit_tests/storage/test_in_memory.py +++ b/libs/core/tests/unit_tests/stores/test_in_memory.py @@ -1,4 +1,4 @@ -from langchain.storage.in_memory import InMemoryStore +from langchain_core.stores import InMemoryStore def test_mget() -> None: diff --git a/libs/langchain/langchain/storage/__init__.py b/libs/langchain/langchain/storage/__init__.py index aae71c5e90..0ad76ba1e0 100644 --- a/libs/langchain/langchain/storage/__init__.py +++ b/libs/langchain/langchain/storage/__init__.py @@ -9,11 +9,15 @@ import warnings from typing import Any from langchain_core._api import LangChainDeprecationWarning +from langchain_core.stores import ( + InMemoryByteStore, + InMemoryStore, + InvalidKeyException, +) from langchain.storage._lc_store import create_kv_docstore, create_lc_store from langchain.storage.encoder_backed import EncoderBackedStore from langchain.storage.file_system import LocalFileStore -from langchain.storage.in_memory import InMemoryByteStore, InMemoryStore from langchain.utils.interactive_env import is_interactive_env @@ -36,12 +40,13 @@ def __getattr__(name: str) -> Any: __all__ = [ "EncoderBackedStore", - "InMemoryStore", - "InMemoryByteStore", - "LocalFileStore", "RedisStore", "create_lc_store", "create_kv_docstore", + "LocalFileStore", + "InMemoryStore", + "InvalidKeyException", + "InMemoryByteStore", "UpstashRedisByteStore", "UpstashRedisStore", ] diff --git a/libs/langchain/langchain/storage/exceptions.py b/libs/langchain/langchain/storage/exceptions.py index 3d7cc3ea01..82d7c8a2fa 100644 --- a/libs/langchain/langchain/storage/exceptions.py +++ b/libs/langchain/langchain/storage/exceptions.py @@ -1,3 +1,3 @@ -from langchain_community.storage.exceptions import InvalidKeyException +from langchain_core.stores import InvalidKeyException __all__ = ["InvalidKeyException"] diff --git a/libs/langchain/langchain/storage/in_memory.py b/libs/langchain/langchain/storage/in_memory.py index 310f81ce28..e028edf579 100644 --- a/libs/langchain/langchain/storage/in_memory.py +++ b/libs/langchain/langchain/storage/in_memory.py @@ -3,150 +3,10 @@ This is a simple implementation of the BaseStore using a dictionary that is useful primarily for unit testing purposes. """ -from typing import ( - Any, - AsyncIterator, - Dict, - Generic, - Iterator, - List, - Optional, - Sequence, - Tuple, - TypeVar, -) +from langchain_core.stores import InMemoryBaseStore, InMemoryByteStore, InMemoryStore -from langchain_core.stores import BaseStore - -V = TypeVar("V") - - -class InMemoryBaseStore(BaseStore[str, V], Generic[V]): - """In-memory implementation of the BaseStore using a dictionary. - - Attributes: - store (Dict[str, Any]): The underlying dictionary that stores - the key-value pairs. - - Examples: - - .. code-block:: python - - from langchain.storage import InMemoryStore - - store = InMemoryStore() - store.mset([('key1', 'value1'), ('key2', 'value2')]) - store.mget(['key1', 'key2']) - # ['value1', 'value2'] - store.mdelete(['key1']) - list(store.yield_keys()) - # ['key2'] - list(store.yield_keys(prefix='k')) - # ['key2'] - """ - - def __init__(self) -> None: - """Initialize an empty store.""" - self.store: Dict[str, V] = {} - - def mget(self, keys: Sequence[str]) -> List[Optional[V]]: - """Get the values associated with the given keys. - - Args: - keys (Sequence[str]): A sequence of keys. - - Returns: - A sequence of optional values associated with the keys. - If a key is not found, the corresponding value will be None. - """ - return [self.store.get(key) for key in keys] - - async def amget(self, keys: Sequence[str]) -> List[Optional[V]]: - """Get the values associated with the given keys. - - Args: - keys (Sequence[str]): A sequence of keys. - - Returns: - A sequence of optional values associated with the keys. - If a key is not found, the corresponding value will be None. - """ - return self.mget(keys) - - def mset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: - """Set the values for the given keys. - - Args: - key_value_pairs (Sequence[Tuple[str, V]]): A sequence of key-value pairs. - - Returns: - None - """ - for key, value in key_value_pairs: - self.store[key] = value - - async def amset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: - """Set the values for the given keys. - - Args: - key_value_pairs (Sequence[Tuple[str, V]]): A sequence of key-value pairs. - - Returns: - None - """ - return self.mset(key_value_pairs) - - def mdelete(self, keys: Sequence[str]) -> None: - """Delete the given keys and their associated values. - - Args: - keys (Sequence[str]): A sequence of keys to delete. - """ - for key in keys: - if key in self.store: - del self.store[key] - - async def amdelete(self, keys: Sequence[str]) -> None: - """Delete the given keys and their associated values. - - Args: - keys (Sequence[str]): A sequence of keys to delete. - """ - self.mdelete(keys) - - def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]: - """Get an iterator over keys that match the given prefix. - - Args: - prefix (str, optional): The prefix to match. Defaults to None. - - Returns: - Iterator[str]: An iterator over keys that match the given prefix. - """ - if prefix is None: - yield from self.store.keys() - else: - for key in self.store.keys(): - if key.startswith(prefix): - yield key - - async def ayield_keys(self, prefix: Optional[str] = None) -> AsyncIterator[str]: - """Get an async iterator over keys that match the given prefix. - - Args: - prefix (str, optional): The prefix to match. Defaults to None. - - Returns: - AsyncIterator[str]: An async iterator over keys that match the given prefix. - """ - if prefix is None: - for key in self.store.keys(): - yield key - else: - for key in self.store.keys(): - if key.startswith(prefix): - yield key - - -InMemoryStore = InMemoryBaseStore[Any] -InMemoryByteStore = InMemoryBaseStore[bytes] +__all__ = [ + "InMemoryStore", + "InMemoryBaseStore", + "InMemoryByteStore", +] diff --git a/libs/langchain/tests/unit_tests/storage/test_filesystem.py b/libs/langchain/tests/unit_tests/storage/test_filesystem.py index 26d5cccd68..455d39e7dd 100644 --- a/libs/langchain/tests/unit_tests/storage/test_filesystem.py +++ b/libs/langchain/tests/unit_tests/storage/test_filesystem.py @@ -3,8 +3,8 @@ import tempfile from typing import Generator import pytest +from langchain_core.stores import InvalidKeyException -from langchain.storage.exceptions import InvalidKeyException from langchain.storage.file_system import LocalFileStore diff --git a/libs/langchain/tests/unit_tests/storage/test_imports.py b/libs/langchain/tests/unit_tests/storage/test_imports.py index ec7bdb8de7..33f74105f9 100644 --- a/libs/langchain/tests/unit_tests/storage/test_imports.py +++ b/libs/langchain/tests/unit_tests/storage/test_imports.py @@ -7,6 +7,7 @@ EXPECTED_ALL = [ "InMemoryByteStore", "LocalFileStore", "RedisStore", + "InvalidKeyException", "create_lc_store", "create_kv_docstore", "UpstashRedisByteStore",