From 657f5f259f1764b25249eac9f202f9332aa90dca Mon Sep 17 00:00:00 2001 From: Jan Philipp Harries Date: Thu, 4 May 2023 07:16:27 +0200 Subject: [PATCH] Added option to reduce verbosity of Deeplake integration (#4038) The deeplake integration was/is very verbose (see e.g. [the documentation example](https://python.langchain.com/en/latest/use_cases/code/code-analysis-deeplake.html) when loading or creating a deeplake dataset with only limited options to dial down verbosity. Additionally, the warning that a "Deep Lake Dataset already exists" was confusing, as there is as far as I can tell no other way to load a dataset. This small PR changes that and introduces an explicit `verbose` argument which is also passed to the deeplake library. There should be minimal changes to the default output (the loading line is printed instead of warned to make it consistent with `ds.summary()` which also prints. --- langchain/vectorstores/deeplake.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/langchain/vectorstores/deeplake.py b/langchain/vectorstores/deeplake.py index 7773e26bed..fbe2f6f734 100644 --- a/langchain/vectorstores/deeplake.py +++ b/langchain/vectorstores/deeplake.py @@ -100,11 +100,13 @@ class DeepLake(VectorStore): read_only: Optional[bool] = False, ingestion_batch_size: int = 1024, num_workers: int = 0, + verbose: bool = True, **kwargs: Any, ) -> None: """Initialize with Deep Lake client.""" self.ingestion_batch_size = ingestion_batch_size self.num_workers = num_workers + self.verbose = verbose try: import deeplake @@ -123,19 +125,29 @@ class DeepLake(VectorStore): and "overwrite" not in kwargs ): self.ds = deeplake.load( - dataset_path, token=token, read_only=read_only, **kwargs + dataset_path, + token=token, + read_only=read_only, + verbose=self.verbose, + **kwargs, ) - logger.warning( - f"Deep Lake Dataset in {dataset_path} already exists, " - f"loading from the storage" - ) - self.ds.summary() + logger.info(f"Loading deeplake {dataset_path} from storage.") + if self.verbose: + print( + f"Deep Lake Dataset in {dataset_path} already exists, " + f"loading from the storage" + ) + self.ds.summary() else: if "overwrite" in kwargs: del kwargs["overwrite"] self.ds = deeplake.empty( - dataset_path, token=token, overwrite=True, **kwargs + dataset_path, + token=token, + overwrite=True, + verbose=self.verbose, + **kwargs, ) with self.ds: @@ -240,7 +252,8 @@ class DeepLake(VectorStore): **kwargs, ) self.ds.commit(allow_empty=True) - self.ds.summary() + if self.verbose: + self.ds.summary() return ids def _search_helper(