|
|
|
@ -56,6 +56,7 @@ class DeepLake(VectorStore):
|
|
|
|
|
self,
|
|
|
|
|
dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH,
|
|
|
|
|
token: Optional[str] = None,
|
|
|
|
|
embedding: Optional[Embeddings] = None,
|
|
|
|
|
embedding_function: Optional[Embeddings] = None,
|
|
|
|
|
read_only: bool = False,
|
|
|
|
|
ingestion_batch_size: int = 1000,
|
|
|
|
@ -86,8 +87,11 @@ class DeepLake(VectorStore):
|
|
|
|
|
token (str, optional): Activeloop token, for fetching credentials
|
|
|
|
|
to the dataset at path if it is a Deep Lake dataset.
|
|
|
|
|
Tokens are normally autogenerated. Optional.
|
|
|
|
|
embedding_function (str, optional): Function to convert
|
|
|
|
|
embedding (Embeddings, optional): Function to convert
|
|
|
|
|
either documents or query. Optional.
|
|
|
|
|
embedding_function (Embeddings, optional): Function to convert
|
|
|
|
|
either documents or query. Optional. Deprecated: keeping this
|
|
|
|
|
parameter for backwards compatibility.
|
|
|
|
|
read_only (bool): Open dataset in read-only mode. Default is False.
|
|
|
|
|
ingestion_batch_size (int): During data ingestion, data is divided
|
|
|
|
|
into batches. Batch size is the size of each batch.
|
|
|
|
@ -138,9 +142,14 @@ class DeepLake(VectorStore):
|
|
|
|
|
|
|
|
|
|
self.dataset_path = dataset_path
|
|
|
|
|
|
|
|
|
|
logger.warning(
|
|
|
|
|
"Using embedding function is deprecated and will be removed "
|
|
|
|
|
"in the future. Please use embedding instead."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.vectorstore = DeepLakeVectorStore(
|
|
|
|
|
path=self.dataset_path,
|
|
|
|
|
embedding_function=embedding_function,
|
|
|
|
|
embedding_function=embedding_function or embedding,
|
|
|
|
|
read_only=read_only,
|
|
|
|
|
token=token,
|
|
|
|
|
exec_option=exec_option,
|
|
|
|
@ -148,7 +157,7 @@ class DeepLake(VectorStore):
|
|
|
|
|
**kwargs,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self._embedding_function = embedding_function
|
|
|
|
|
self._embedding_function = embedding_function or embedding
|
|
|
|
|
self._id_tensor_name = "ids" if "ids" in self.vectorstore.tensors() else "id"
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
@ -719,7 +728,6 @@ class DeepLake(VectorStore):
|
|
|
|
|
metadatas: Optional[List[dict]] = None,
|
|
|
|
|
ids: Optional[List[str]] = None,
|
|
|
|
|
dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH,
|
|
|
|
|
embedding_function: Optional[Embeddings] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> DeepLake:
|
|
|
|
|
"""Create a Deep Lake dataset from a raw documents.
|
|
|
|
@ -761,20 +769,8 @@ class DeepLake(VectorStore):
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
DeepLake: Deep Lake dataset.
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
ValueError: If 'embedding' is provided in kwargs. This is deprecated,
|
|
|
|
|
please use `embedding_function` instead.
|
|
|
|
|
"""
|
|
|
|
|
if embedding:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"using embedding as embedidng_functions is deprecated. "
|
|
|
|
|
"Please use `embedding_function` instead."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
deeplake_dataset = cls(
|
|
|
|
|
dataset_path=dataset_path, embedding_function=embedding_function, **kwargs
|
|
|
|
|
)
|
|
|
|
|
deeplake_dataset = cls(dataset_path=dataset_path, embedding=embedding, **kwargs)
|
|
|
|
|
deeplake_dataset.add_texts(
|
|
|
|
|
texts=texts,
|
|
|
|
|
metadatas=metadatas,
|
|
|
|
|