Added option to reduce verbosity of Deeplake integration (#4038)

The deeplake integration was/is very verbose (see e.g. [the
documentation
example](https://python.langchain.com/en/latest/use_cases/code/code-analysis-deeplake.html)
when loading or creating a deeplake dataset with only limited options to
dial down verbosity.

Additionally, the warning that a "Deep Lake Dataset already exists" was
confusing, as there is as far as I can tell no other way to load a
dataset.

This small PR changes that and introduces an explicit `verbose` argument
which is also passed to the deeplake library.

There should be minimal changes to the default output (the loading line
is printed instead of warned to make it consistent with `ds.summary()`
which also prints.
fix_agent_callbacks
Jan Philipp Harries 1 year ago committed by GitHub
parent 7f8727bbcd
commit 657f5f259f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -100,11 +100,13 @@ class DeepLake(VectorStore):
read_only: Optional[bool] = False,
ingestion_batch_size: int = 1024,
num_workers: int = 0,
verbose: bool = True,
**kwargs: Any,
) -> None:
"""Initialize with Deep Lake client."""
self.ingestion_batch_size = ingestion_batch_size
self.num_workers = num_workers
self.verbose = verbose
try:
import deeplake
@ -123,19 +125,29 @@ class DeepLake(VectorStore):
and "overwrite" not in kwargs
):
self.ds = deeplake.load(
dataset_path, token=token, read_only=read_only, **kwargs
dataset_path,
token=token,
read_only=read_only,
verbose=self.verbose,
**kwargs,
)
logger.warning(
f"Deep Lake Dataset in {dataset_path} already exists, "
f"loading from the storage"
)
self.ds.summary()
logger.info(f"Loading deeplake {dataset_path} from storage.")
if self.verbose:
print(
f"Deep Lake Dataset in {dataset_path} already exists, "
f"loading from the storage"
)
self.ds.summary()
else:
if "overwrite" in kwargs:
del kwargs["overwrite"]
self.ds = deeplake.empty(
dataset_path, token=token, overwrite=True, **kwargs
dataset_path,
token=token,
overwrite=True,
verbose=self.verbose,
**kwargs,
)
with self.ds:
@ -240,7 +252,8 @@ class DeepLake(VectorStore):
**kwargs,
)
self.ds.commit(allow_empty=True)
self.ds.summary()
if self.verbose:
self.ds.summary()
return ids
def _search_helper(

Loading…
Cancel
Save