forked from Archives/langchain
Added option to reduce verbosity of Deeplake integration (#4038)
The deeplake integration was/is very verbose (see e.g. [the documentation example](https://python.langchain.com/en/latest/use_cases/code/code-analysis-deeplake.html) when loading or creating a deeplake dataset with only limited options to dial down verbosity. Additionally, the warning that a "Deep Lake Dataset already exists" was confusing, as there is as far as I can tell no other way to load a dataset. This small PR changes that and introduces an explicit `verbose` argument which is also passed to the deeplake library. There should be minimal changes to the default output (the loading line is printed instead of warned to make it consistent with `ds.summary()` which also prints.
This commit is contained in:
parent
7f8727bbcd
commit
657f5f259f
@ -100,11 +100,13 @@ class DeepLake(VectorStore):
|
||||
read_only: Optional[bool] = False,
|
||||
ingestion_batch_size: int = 1024,
|
||||
num_workers: int = 0,
|
||||
verbose: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize with Deep Lake client."""
|
||||
self.ingestion_batch_size = ingestion_batch_size
|
||||
self.num_workers = num_workers
|
||||
self.verbose = verbose
|
||||
|
||||
try:
|
||||
import deeplake
|
||||
@ -123,19 +125,29 @@ class DeepLake(VectorStore):
|
||||
and "overwrite" not in kwargs
|
||||
):
|
||||
self.ds = deeplake.load(
|
||||
dataset_path, token=token, read_only=read_only, **kwargs
|
||||
dataset_path,
|
||||
token=token,
|
||||
read_only=read_only,
|
||||
verbose=self.verbose,
|
||||
**kwargs,
|
||||
)
|
||||
logger.warning(
|
||||
f"Deep Lake Dataset in {dataset_path} already exists, "
|
||||
f"loading from the storage"
|
||||
)
|
||||
self.ds.summary()
|
||||
logger.info(f"Loading deeplake {dataset_path} from storage.")
|
||||
if self.verbose:
|
||||
print(
|
||||
f"Deep Lake Dataset in {dataset_path} already exists, "
|
||||
f"loading from the storage"
|
||||
)
|
||||
self.ds.summary()
|
||||
else:
|
||||
if "overwrite" in kwargs:
|
||||
del kwargs["overwrite"]
|
||||
|
||||
self.ds = deeplake.empty(
|
||||
dataset_path, token=token, overwrite=True, **kwargs
|
||||
dataset_path,
|
||||
token=token,
|
||||
overwrite=True,
|
||||
verbose=self.verbose,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
with self.ds:
|
||||
@ -240,7 +252,8 @@ class DeepLake(VectorStore):
|
||||
**kwargs,
|
||||
)
|
||||
self.ds.commit(allow_empty=True)
|
||||
self.ds.summary()
|
||||
if self.verbose:
|
||||
self.ds.summary()
|
||||
return ids
|
||||
|
||||
def _search_helper(
|
||||
|
Loading…
Reference in New Issue
Block a user