Added option to reduce verbosity of Deeplake integration (#4038)

The deeplake integration was/is very verbose (see e.g. [the
documentation
example](https://python.langchain.com/en/latest/use_cases/code/code-analysis-deeplake.html)
when loading or creating a deeplake dataset with only limited options to
dial down verbosity.

Additionally, the warning that a "Deep Lake Dataset already exists" was
confusing, as there is as far as I can tell no other way to load a
dataset.

This small PR changes that and introduces an explicit `verbose` argument
which is also passed to the deeplake library.

There should be minimal changes to the default output (the loading line
is printed instead of warned to make it consistent with `ds.summary()`
which also prints.
This commit is contained in:
Jan Philipp Harries 2023-05-04 07:16:27 +02:00 committed by GitHub
parent 7f8727bbcd
commit 657f5f259f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -100,11 +100,13 @@ class DeepLake(VectorStore):
read_only: Optional[bool] = False,
ingestion_batch_size: int = 1024,
num_workers: int = 0,
verbose: bool = True,
**kwargs: Any,
) -> None:
"""Initialize with Deep Lake client."""
self.ingestion_batch_size = ingestion_batch_size
self.num_workers = num_workers
self.verbose = verbose
try:
import deeplake
@ -123,9 +125,15 @@ class DeepLake(VectorStore):
and "overwrite" not in kwargs
):
self.ds = deeplake.load(
dataset_path, token=token, read_only=read_only, **kwargs
dataset_path,
token=token,
read_only=read_only,
verbose=self.verbose,
**kwargs,
)
logger.warning(
logger.info(f"Loading deeplake {dataset_path} from storage.")
if self.verbose:
print(
f"Deep Lake Dataset in {dataset_path} already exists, "
f"loading from the storage"
)
@ -135,7 +143,11 @@ class DeepLake(VectorStore):
del kwargs["overwrite"]
self.ds = deeplake.empty(
dataset_path, token=token, overwrite=True, **kwargs
dataset_path,
token=token,
overwrite=True,
verbose=self.verbose,
**kwargs,
)
with self.ds:
@ -240,6 +252,7 @@ class DeepLake(VectorStore):
**kwargs,
)
self.ds.commit(allow_empty=True)
if self.verbose:
self.ds.summary()
return ids