mirror of
https://github.com/hwchase17/langchain
synced 2024-11-18 09:25:54 +00:00
langchain[minor]: enhance LocalFileStore
to allow directory/file permissions to be specified (#18857)
**Description:** The `LocalFileStore` class can be used to create an on-disk `CacheBackedEmbeddings` cache. However, the default `umask` settings gives file/directory write permissions only to the original user. Once the cache directory is created by the first user, other users cannot write their own cache entries into the directory. To make the cache usable by multiple users, this pull request updates the `LocalFileStore` constructor to allow the permissions for newly created directories and files to be specified. The specified permissions override the default `umask` values. For example, when configured as follows: ```python file_store = LocalFileStore(temp_dir, chmod_dir=0o770, chmod_file=0o660) ``` then "user" and "group" (but not "other") have permissions to access the store, which means: * Anyone in our group could contribute embeddings to the cache. * If we implement cache cleanup/eviction in the future, anyone in our group could perform the cleanup. The default values for the `chmod_dir` and `chmod_file` parameters is `None`, which retains the original behavior of using the default `umask` settings. **Issue:** Implements enhancement #18075. **Testing:** I updated the `LocalFileStore` unit tests to test the permissions. --------- Signed-off-by: chrispy <chrispy@synopsys.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
parent
df25829f33
commit
a954dedb77
@ -36,14 +36,26 @@ class LocalFileStore(ByteStore):
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, root_path: Union[str, Path]) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
root_path: Union[str, Path],
|
||||
*,
|
||||
chmod_file: Optional[int] = None,
|
||||
chmod_dir: Optional[int] = None,
|
||||
) -> None:
|
||||
"""Implement the BaseStore interface for the local file system.
|
||||
|
||||
Args:
|
||||
root_path (Union[str, Path]): The root path of the file store. All keys are
|
||||
interpreted as paths relative to this root.
|
||||
chmod_file: (optional, defaults to `None`) If specified, sets permissions
|
||||
for newly created files, overriding the current `umask` if needed.
|
||||
chmod_dir: (optional, defaults to `None`) If specified, sets permissions
|
||||
for newly created dirs, overriding the current `umask` if needed.
|
||||
"""
|
||||
self.root_path = Path(root_path).absolute()
|
||||
self.chmod_file = chmod_file
|
||||
self.chmod_dir = chmod_dir
|
||||
|
||||
def _get_full_path(self, key: str) -> Path:
|
||||
"""Get the full path for a given key relative to the root path.
|
||||
@ -66,6 +78,24 @@ class LocalFileStore(ByteStore):
|
||||
|
||||
return Path(full_path)
|
||||
|
||||
def _mkdir_for_store(self, dir: Path) -> None:
|
||||
"""Makes a store directory path (including parents) with specified permissions
|
||||
|
||||
This is needed because `Path.mkdir()` is restricted by the current `umask`,
|
||||
whereas the explicit `os.chmod()` used here is not.
|
||||
|
||||
Args:
|
||||
dir: (Path) The store directory to make
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
if not dir.exists():
|
||||
self._mkdir_for_store(dir.parent)
|
||||
dir.mkdir(exist_ok=True)
|
||||
if self.chmod_dir is not None:
|
||||
os.chmod(dir, self.chmod_dir)
|
||||
|
||||
def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]:
|
||||
"""Get the values associated with the given keys.
|
||||
|
||||
@ -97,8 +127,10 @@ class LocalFileStore(ByteStore):
|
||||
"""
|
||||
for key, value in key_value_pairs:
|
||||
full_path = self._get_full_path(key)
|
||||
full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._mkdir_for_store(full_path.parent)
|
||||
full_path.write_bytes(value)
|
||||
if self.chmod_file is not None:
|
||||
os.chmod(full_path, self.chmod_file)
|
||||
|
||||
def mdelete(self, keys: Sequence[str]) -> None:
|
||||
"""Delete the given keys and their associated values.
|
||||
|
@ -29,6 +29,34 @@ def test_mset_and_mget(file_store: LocalFileStore) -> None:
|
||||
assert values == [b"value1", b"value2"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"chmod_dir_s, chmod_file_s", [("777", "666"), ("770", "660"), ("700", "600")]
|
||||
)
|
||||
def test_mset_chmod(chmod_dir_s: str, chmod_file_s: str) -> None:
|
||||
chmod_dir = int(chmod_dir_s, base=8)
|
||||
chmod_file = int(chmod_file_s, base=8)
|
||||
|
||||
# Create a temporary directory for testing
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Instantiate the LocalFileStore with a directory inside the temporary directory
|
||||
# as the root path
|
||||
temp_dir = os.path.join(temp_dir, "store_dir")
|
||||
file_store = LocalFileStore(
|
||||
temp_dir, chmod_dir=chmod_dir, chmod_file=chmod_file
|
||||
)
|
||||
|
||||
# Set values for keys
|
||||
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]
|
||||
file_store.mset(key_value_pairs)
|
||||
|
||||
# verify the permissions are set correctly
|
||||
# (test only the standard user/group/other bits)
|
||||
dir_path = str(file_store.root_path)
|
||||
file_path = os.path.join(dir_path, "key1")
|
||||
assert (os.stat(dir_path).st_mode & 0o777) == chmod_dir
|
||||
assert (os.stat(file_path).st_mode & 0o777) == chmod_file
|
||||
|
||||
|
||||
def test_mdelete(file_store: LocalFileStore) -> None:
|
||||
# Set values for keys
|
||||
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]
|
||||
|
Loading…
Reference in New Issue
Block a user