mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
Bagatur/from texts bug fix (#8394)
--------- Co-authored-by: Davit Buniatyan <davit@loqsh.com> Co-authored-by: Davit Buniatyan <d@activeloop.ai> Co-authored-by: adilkhan <adilkhan.sarsen@nu.edu.kz> Co-authored-by: Ivo Stranic <istranic@gmail.com>
This commit is contained in:
parent
1efb9bae5f
commit
a1a650c743
@ -56,6 +56,7 @@ class DeepLake(VectorStore):
|
|||||||
self,
|
self,
|
||||||
dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH,
|
dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH,
|
||||||
token: Optional[str] = None,
|
token: Optional[str] = None,
|
||||||
|
embedding: Optional[Embeddings] = None,
|
||||||
embedding_function: Optional[Embeddings] = None,
|
embedding_function: Optional[Embeddings] = None,
|
||||||
read_only: bool = False,
|
read_only: bool = False,
|
||||||
ingestion_batch_size: int = 1000,
|
ingestion_batch_size: int = 1000,
|
||||||
@ -86,8 +87,11 @@ class DeepLake(VectorStore):
|
|||||||
token (str, optional): Activeloop token, for fetching credentials
|
token (str, optional): Activeloop token, for fetching credentials
|
||||||
to the dataset at path if it is a Deep Lake dataset.
|
to the dataset at path if it is a Deep Lake dataset.
|
||||||
Tokens are normally autogenerated. Optional.
|
Tokens are normally autogenerated. Optional.
|
||||||
embedding_function (str, optional): Function to convert
|
embedding (Embeddings, optional): Function to convert
|
||||||
either documents or query. Optional.
|
either documents or query. Optional.
|
||||||
|
embedding_function (Embeddings, optional): Function to convert
|
||||||
|
either documents or query. Optional. Deprecated: keeping this
|
||||||
|
parameter for backwards compatibility.
|
||||||
read_only (bool): Open dataset in read-only mode. Default is False.
|
read_only (bool): Open dataset in read-only mode. Default is False.
|
||||||
ingestion_batch_size (int): During data ingestion, data is divided
|
ingestion_batch_size (int): During data ingestion, data is divided
|
||||||
into batches. Batch size is the size of each batch.
|
into batches. Batch size is the size of each batch.
|
||||||
@ -138,9 +142,14 @@ class DeepLake(VectorStore):
|
|||||||
|
|
||||||
self.dataset_path = dataset_path
|
self.dataset_path = dataset_path
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"Using embedding function is deprecated and will be removed "
|
||||||
|
"in the future. Please use embedding instead."
|
||||||
|
)
|
||||||
|
|
||||||
self.vectorstore = DeepLakeVectorStore(
|
self.vectorstore = DeepLakeVectorStore(
|
||||||
path=self.dataset_path,
|
path=self.dataset_path,
|
||||||
embedding_function=embedding_function,
|
embedding_function=embedding_function or embedding,
|
||||||
read_only=read_only,
|
read_only=read_only,
|
||||||
token=token,
|
token=token,
|
||||||
exec_option=exec_option,
|
exec_option=exec_option,
|
||||||
@ -148,7 +157,7 @@ class DeepLake(VectorStore):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._embedding_function = embedding_function
|
self._embedding_function = embedding_function or embedding
|
||||||
self._id_tensor_name = "ids" if "ids" in self.vectorstore.tensors() else "id"
|
self._id_tensor_name = "ids" if "ids" in self.vectorstore.tensors() else "id"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -719,7 +728,6 @@ class DeepLake(VectorStore):
|
|||||||
metadatas: Optional[List[dict]] = None,
|
metadatas: Optional[List[dict]] = None,
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH,
|
dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH,
|
||||||
embedding_function: Optional[Embeddings] = None,
|
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> DeepLake:
|
) -> DeepLake:
|
||||||
"""Create a Deep Lake dataset from a raw documents.
|
"""Create a Deep Lake dataset from a raw documents.
|
||||||
@ -761,20 +769,8 @@ class DeepLake(VectorStore):
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DeepLake: Deep Lake dataset.
|
DeepLake: Deep Lake dataset.
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If 'embedding' is provided in kwargs. This is deprecated,
|
|
||||||
please use `embedding_function` instead.
|
|
||||||
"""
|
"""
|
||||||
if embedding:
|
deeplake_dataset = cls(dataset_path=dataset_path, embedding=embedding, **kwargs)
|
||||||
raise ValueError(
|
|
||||||
"using embedding as embedidng_functions is deprecated. "
|
|
||||||
"Please use `embedding_function` instead."
|
|
||||||
)
|
|
||||||
|
|
||||||
deeplake_dataset = cls(
|
|
||||||
dataset_path=dataset_path, embedding_function=embedding_function, **kwargs
|
|
||||||
)
|
|
||||||
deeplake_dataset.add_texts(
|
deeplake_dataset.add_texts(
|
||||||
texts=texts,
|
texts=texts,
|
||||||
metadatas=metadatas,
|
metadatas=metadatas,
|
||||||
|
564
libs/langchain/poetry.lock
generated
564
libs/langchain/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -137,10 +137,11 @@ def test_similarity_search(deeplake_datastore: DeepLake, distance_metric: str) -
|
|||||||
f"SELECT * WHERE "
|
f"SELECT * WHERE "
|
||||||
f"id=='{deeplake_datastore.vectorstore.dataset.id[0].numpy()[0]}'"
|
f"id=='{deeplake_datastore.vectorstore.dataset.id[0].numpy()[0]}'"
|
||||||
)
|
)
|
||||||
with pytest.raises(ValueError):
|
|
||||||
output = deeplake_datastore.similarity_search(
|
output = deeplake_datastore.similarity_search(
|
||||||
query="foo", tql_query=tql_query, k=1, distance_metric=distance_metric
|
query="foo", tql_query=tql_query, k=1, distance_metric=distance_metric
|
||||||
)
|
)
|
||||||
|
assert len(output) == 1
|
||||||
deeplake_datastore.delete_dataset()
|
deeplake_datastore.delete_dataset()
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user