From 5a0844bae1d9a24f85e4f40a672273d4a4263f43 Mon Sep 17 00:00:00 2001 From: JC Touzalin Date: Sat, 1 Apr 2023 17:58:53 +0200 Subject: [PATCH] Open a Deeplake dataset in read only mode (#2240) I'm using Deeplake as a vector store for a Q&A application. When several questions are being processed at the same time for the same dataset, the 2nd one triggers the following error: > LockedException: This dataset cannot be open for writing as it is locked by another machine. Try loading the dataset with `read_only=True`. Answering questions doesn't require writing new embeddings so it's ok to open the dataset in read only mode at that time. This pull request thus adds the `read_only` option to the Deeplake constructor and to its subsequent `deeplake.load()` call. The related Deeplake documentation is [here](https://docs.deeplake.ai/en/latest/deeplake.html#deeplake.load). I've tested this update on my local dev environment. I don't know if an integration test and/or additional documentation are expected however. Let me know if it is, ideally with some guidance as I'm not particularly experienced in Python. --- langchain/vectorstores/deeplake.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/langchain/vectorstores/deeplake.py b/langchain/vectorstores/deeplake.py index 67865bc6..f4a7ddf5 100644 --- a/langchain/vectorstores/deeplake.py +++ b/langchain/vectorstores/deeplake.py @@ -57,6 +57,7 @@ class DeepLake(VectorStore): dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH, token: Optional[str] = None, embedding_function: Optional[Embeddings] = None, + read_only: Optional[bool] = None, ) -> None: """Initialize with Deep Lake client.""" @@ -70,7 +71,7 @@ class DeepLake(VectorStore): self._deeplake = deeplake if deeplake.exists(dataset_path, token=token): - self.ds = deeplake.load(dataset_path, token=token) + self.ds = deeplake.load(dataset_path, token=token, read_only=read_only) logger.warning( f"Deep Lake Dataset in {dataset_path} already exists, " f"loading from the storage"