forked from Archives/langchain
Added option to reduce verbosity of Deeplake integration (#4038)
The deeplake integration was/is very verbose (see e.g. [the documentation example](https://python.langchain.com/en/latest/use_cases/code/code-analysis-deeplake.html) when loading or creating a deeplake dataset with only limited options to dial down verbosity. Additionally, the warning that a "Deep Lake Dataset already exists" was confusing, as there is as far as I can tell no other way to load a dataset. This small PR changes that and introduces an explicit `verbose` argument which is also passed to the deeplake library. There should be minimal changes to the default output (the loading line is printed instead of warned to make it consistent with `ds.summary()` which also prints.
This commit is contained in:
parent
7f8727bbcd
commit
657f5f259f
@ -100,11 +100,13 @@ class DeepLake(VectorStore):
|
|||||||
read_only: Optional[bool] = False,
|
read_only: Optional[bool] = False,
|
||||||
ingestion_batch_size: int = 1024,
|
ingestion_batch_size: int = 1024,
|
||||||
num_workers: int = 0,
|
num_workers: int = 0,
|
||||||
|
verbose: bool = True,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize with Deep Lake client."""
|
"""Initialize with Deep Lake client."""
|
||||||
self.ingestion_batch_size = ingestion_batch_size
|
self.ingestion_batch_size = ingestion_batch_size
|
||||||
self.num_workers = num_workers
|
self.num_workers = num_workers
|
||||||
|
self.verbose = verbose
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import deeplake
|
import deeplake
|
||||||
@ -123,19 +125,29 @@ class DeepLake(VectorStore):
|
|||||||
and "overwrite" not in kwargs
|
and "overwrite" not in kwargs
|
||||||
):
|
):
|
||||||
self.ds = deeplake.load(
|
self.ds = deeplake.load(
|
||||||
dataset_path, token=token, read_only=read_only, **kwargs
|
dataset_path,
|
||||||
|
token=token,
|
||||||
|
read_only=read_only,
|
||||||
|
verbose=self.verbose,
|
||||||
|
**kwargs,
|
||||||
)
|
)
|
||||||
logger.warning(
|
logger.info(f"Loading deeplake {dataset_path} from storage.")
|
||||||
f"Deep Lake Dataset in {dataset_path} already exists, "
|
if self.verbose:
|
||||||
f"loading from the storage"
|
print(
|
||||||
)
|
f"Deep Lake Dataset in {dataset_path} already exists, "
|
||||||
self.ds.summary()
|
f"loading from the storage"
|
||||||
|
)
|
||||||
|
self.ds.summary()
|
||||||
else:
|
else:
|
||||||
if "overwrite" in kwargs:
|
if "overwrite" in kwargs:
|
||||||
del kwargs["overwrite"]
|
del kwargs["overwrite"]
|
||||||
|
|
||||||
self.ds = deeplake.empty(
|
self.ds = deeplake.empty(
|
||||||
dataset_path, token=token, overwrite=True, **kwargs
|
dataset_path,
|
||||||
|
token=token,
|
||||||
|
overwrite=True,
|
||||||
|
verbose=self.verbose,
|
||||||
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.ds:
|
with self.ds:
|
||||||
@ -240,7 +252,8 @@ class DeepLake(VectorStore):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
self.ds.commit(allow_empty=True)
|
self.ds.commit(allow_empty=True)
|
||||||
self.ds.summary()
|
if self.verbose:
|
||||||
|
self.ds.summary()
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
def _search_helper(
|
def _search_helper(
|
||||||
|
Loading…
Reference in New Issue
Block a user