langchain[minor]: enhance LocalFileStore to allow directory/file permissions to be specified (#18857)

**Description:**
The `LocalFileStore` class can be used to create an on-disk
`CacheBackedEmbeddings` cache. However, the default `umask` settings
gives file/directory write permissions only to the original user. Once
the cache directory is created by the first user, other users cannot
write their own cache entries into the directory.

To make the cache usable by multiple users, this pull request updates
the `LocalFileStore` constructor to allow the permissions for newly
created directories and files to be specified. The specified permissions
override the default `umask` values.

For example, when configured as follows:

```python
file_store = LocalFileStore(temp_dir, chmod_dir=0o770, chmod_file=0o660)
```

then "user" and "group" (but not "other") have permissions to access the
store, which means:

* Anyone in our group could contribute embeddings to the cache.
* If we implement cache cleanup/eviction in the future, anyone in our
group could perform the cleanup.

The default values for the `chmod_dir` and `chmod_file` parameters is
`None`, which retains the original behavior of using the default `umask`
settings.

**Issue:**
Implements enhancement #18075.

**Testing:**
I updated the `LocalFileStore` unit tests to test the permissions.

---------

Signed-off-by: chrispy <chrispy@synopsys.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
Chris Papademetrious 2024-04-04 12:40:16 -04:00 committed by GitHub
parent df25829f33
commit a954dedb77
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 62 additions and 2 deletions

View File

@ -36,14 +36,26 @@ class LocalFileStore(ByteStore):
"""
def __init__(self, root_path: Union[str, Path]) -> None:
def __init__(
self,
root_path: Union[str, Path],
*,
chmod_file: Optional[int] = None,
chmod_dir: Optional[int] = None,
) -> None:
"""Implement the BaseStore interface for the local file system.
Args:
root_path (Union[str, Path]): The root path of the file store. All keys are
interpreted as paths relative to this root.
chmod_file: (optional, defaults to `None`) If specified, sets permissions
for newly created files, overriding the current `umask` if needed.
chmod_dir: (optional, defaults to `None`) If specified, sets permissions
for newly created dirs, overriding the current `umask` if needed.
"""
self.root_path = Path(root_path).absolute()
self.chmod_file = chmod_file
self.chmod_dir = chmod_dir
def _get_full_path(self, key: str) -> Path:
"""Get the full path for a given key relative to the root path.
@ -66,6 +78,24 @@ class LocalFileStore(ByteStore):
return Path(full_path)
def _mkdir_for_store(self, dir: Path) -> None:
"""Makes a store directory path (including parents) with specified permissions
This is needed because `Path.mkdir()` is restricted by the current `umask`,
whereas the explicit `os.chmod()` used here is not.
Args:
dir: (Path) The store directory to make
Returns:
None
"""
if not dir.exists():
self._mkdir_for_store(dir.parent)
dir.mkdir(exist_ok=True)
if self.chmod_dir is not None:
os.chmod(dir, self.chmod_dir)
def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]:
"""Get the values associated with the given keys.
@ -97,8 +127,10 @@ class LocalFileStore(ByteStore):
"""
for key, value in key_value_pairs:
full_path = self._get_full_path(key)
full_path.parent.mkdir(parents=True, exist_ok=True)
self._mkdir_for_store(full_path.parent)
full_path.write_bytes(value)
if self.chmod_file is not None:
os.chmod(full_path, self.chmod_file)
def mdelete(self, keys: Sequence[str]) -> None:
"""Delete the given keys and their associated values.

View File

@ -29,6 +29,34 @@ def test_mset_and_mget(file_store: LocalFileStore) -> None:
assert values == [b"value1", b"value2"]
@pytest.mark.parametrize(
"chmod_dir_s, chmod_file_s", [("777", "666"), ("770", "660"), ("700", "600")]
)
def test_mset_chmod(chmod_dir_s: str, chmod_file_s: str) -> None:
chmod_dir = int(chmod_dir_s, base=8)
chmod_file = int(chmod_file_s, base=8)
# Create a temporary directory for testing
with tempfile.TemporaryDirectory() as temp_dir:
# Instantiate the LocalFileStore with a directory inside the temporary directory
# as the root path
temp_dir = os.path.join(temp_dir, "store_dir")
file_store = LocalFileStore(
temp_dir, chmod_dir=chmod_dir, chmod_file=chmod_file
)
# Set values for keys
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]
file_store.mset(key_value_pairs)
# verify the permissions are set correctly
# (test only the standard user/group/other bits)
dir_path = str(file_store.root_path)
file_path = os.path.join(dir_path, "key1")
assert (os.stat(dir_path).st_mode & 0o777) == chmod_dir
assert (os.stat(file_path).st_mode & 0o777) == chmod_file
def test_mdelete(file_store: LocalFileStore) -> None:
# Set values for keys
key_value_pairs = [("key1", b"value1"), ("key2", b"value2")]