@ -60,7 +60,7 @@ class DeepLake(VectorStore):
embedding : Optional [ Embeddings ] = None ,
embedding_function : Optional [ Embeddings ] = None ,
read_only : bool = False ,
ingestion_batch_size : int = 10 00 ,
ingestion_batch_size : int = 10 24 ,
num_workers : int = 0 ,
verbose : bool = True ,
exec_option : Optional [ str ] = None ,
@ -85,8 +85,19 @@ class DeepLake(VectorStore):
. . . )
Args :
dataset_path ( str ) : Path to existing dataset or where to create
a new one . Defaults to _LANGCHAIN_DEFAULT_DEEPLAKE_PATH .
dataset_path ( str ) : The full path for storing to the Deep Lake
Vector Store . It can be :
- a Deep Lake cloud path of the form ` ` hub : / / org_id / dataset_name ` ` .
Requires registration with Deep Lake .
- an s3 path of the form ` ` s3 : / / bucketname / path / to / dataset ` ` .
Credentials are required in either the environment or passed to
the creds argument .
- a local file system path of the form ` ` . / path / to / dataset ` `
or ` ` ~ / path / to / dataset ` ` or ` ` path / to / dataset ` ` .
- a memory path of the form ` ` mem : / / path / to / dataset ` ` which doesn ' t
save the dataset but keeps it in memory instead .
Should be used only for testing as it does not persist .
Defaults to _LANGCHAIN_DEFAULT_DEEPLAKE_PATH .
token ( str , optional ) : Activeloop token , for fetching credentials
to the dataset at path if it is a Deep Lake dataset .
Tokens are normally autogenerated . Optional .
@ -98,25 +109,29 @@ class DeepLake(VectorStore):
read_only ( bool ) : Open dataset in read - only mode . Default is False .
ingestion_batch_size ( int ) : During data ingestion , data is divided
into batches . Batch size is the size of each batch .
Default is 10 00 .
Default is 10 24 .
num_workers ( int ) : Number of workers to use during data ingestion .
Default is 0.
verbose ( bool ) : Print dataset summary after each operation .
Default is True .
exec_option ( str , optional ) : DeepLakeVectorStore supports 3 ways to perform
searching - " python " , " compute_engine " , " tensor_db " and auto .
Default is None .
exec_option ( str , optional ) : Default method for search execution .
It could be either ` ` " auto " ` ` , ` ` " python " ` ` , ` ` " compute_engine " ` `
or ` ` " tensor_db " ` ` . Defaults to ` ` " auto " ` ` .
If None , it ' s set to " auto " .
- ` ` auto ` ` - Selects the best execution method based on the storage
location of the Vector Store . It is the default option .
- ` ` python ` ` - Pure - python implementation that runs on the client .
WARNING : using this with big datasets can lead to memory
issues . Data can be stored anywhere .
- ` ` compute_engine ` ` - C + + implementation of the Deep Lake Compute
Engine that runs on the client . Can be used for any data stored in
or connected to Deep Lake . Not for in - memory or local datasets .
- ` ` tensor_db ` ` - Hosted Managed Tensor Database that is
responsible for storage and query execution . Only for data stored in
the Deep Lake Managed Database . Use runtime = { " db_engine " : True }
- ` ` python ` ` - Pure - python implementation that runs on the client and
can be used for data stored anywhere . WARNING : using this option
with big datasets is discouraged because it can lead to
memory issues .
- ` ` compute_engine ` ` - Performant C + + implementation of the Deep Lake
Compute Engine that runs on the client and can be used for any data
stored in or connected to Deep Lake . It cannot be used with
in - memory or local datasets .
- ` ` tensor_db ` ` - Performant and fully - hosted Managed Tensor Database
that is responsible for storage and query execution . Only available
for data stored in the Deep Lake Managed Database . Store datasets
in this database by specifying runtime = { " tensor_db " : True }
during dataset creation .
runtime ( Dict , optional ) : Parameters for creating the Vector Store in
Deep Lake ' s Managed Tensor Database. Not applicable when loading an