diff --git a/langchain/vectorstores/deeplake.py b/langchain/vectorstores/deeplake.py index dc9a2106..01ed62df 100644 --- a/langchain/vectorstores/deeplake.py +++ b/langchain/vectorstores/deeplake.py @@ -120,10 +120,12 @@ class DeepLake(VectorStore): self.dataset_path = dataset_path creds_args = {"creds": kwargs["creds"]} if "creds" in kwargs else {} - if ( - deeplake.exists(dataset_path, token=token, **creds_args) - and "overwrite" not in kwargs + if deeplake.exists(dataset_path, token=token, **creds_args) and not kwargs.get( + "overwrite", False ): + if "overwrite" in kwargs: + del kwargs["overwrite"] + self.ds = deeplake.load( dataset_path, token=token, diff --git a/tests/integration_tests/vectorstores/test_deeplake.py b/tests/integration_tests/vectorstores/test_deeplake.py index f858c904..634d5237 100644 --- a/tests/integration_tests/vectorstores/test_deeplake.py +++ b/tests/integration_tests/vectorstores/test_deeplake.py @@ -83,6 +83,53 @@ def test_deeplakewith_persistence() -> None: # Or on program exit +def test_deeplake_overwrite_flag() -> None: + """Test overwrite behavior""" + dataset_path = "./tests/persist_dir" + if deeplake.exists(dataset_path): + deeplake.delete(dataset_path) + + texts = ["foo", "bar", "baz"] + docsearch = DeepLake.from_texts( + dataset_path=dataset_path, + texts=texts, + embedding=FakeEmbeddings(), + ) + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + docsearch.persist() + + # Get a new VectorStore from the persisted directory, with no overwrite (implicit) + docsearch = DeepLake( + dataset_path=dataset_path, + embedding_function=FakeEmbeddings(), + ) + output = docsearch.similarity_search("foo", k=1) + # assert page still present + assert output == [Document(page_content="foo")] + + # Get a new VectorStore from the persisted directory, with no overwrite (explicit) + docsearch = DeepLake( + dataset_path=dataset_path, + embedding_function=FakeEmbeddings(), + overwrite=False, + ) + output = docsearch.similarity_search("foo", k=1) + # assert page still present + assert output == [Document(page_content="foo")] + + # Get a new VectorStore from the persisted directory, with overwrite + docsearch = DeepLake( + dataset_path=dataset_path, + embedding_function=FakeEmbeddings(), + overwrite=True, + ) + output = docsearch.similarity_search("foo", k=1) + # assert page no longer present + assert output == [] + + def test_similarity_search(deeplake_datastore: DeepLake, distance_metric: str) -> None: """Test similarity search.""" output = deeplake_datastore.similarity_search(