Deep Lake mini upgrades (#3375)

Improvements
* set default num_workers for ingestion to 0
* upgraded notebooks for avoiding dataset creation ambiguity
* added `force_delete_dataset_by_path`
* bumped deeplake to 3.3.0
* creds arg passing to deeplake object that would allow custom S3

Notes
* please double check if poetry is not messed up (thanks!)

Asks
* Would be great to create a shared slack channel for quick questions

---------

Co-authored-by: Davit Buniatyan <d@activeloop.ai>
fix_agent_callbacks
Davit Buniatyan 1 year ago committed by GitHub
parent 93d53e417a
commit 2c0023393b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
@ -33,7 +33,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
@ -46,7 +46,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
@ -60,16 +60,24 @@
"embeddings = OpenAIEmbeddings()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Creates a dataset locally at `./deeplake/`, then runs similiarity search "
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mem://langchain loaded successfully.\n"
"./my_deeplake/ loaded successfully.\n"
]
},
{
@ -83,7 +91,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='mem://langchain', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"Dataset(path='./my_deeplake/', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
@ -95,15 +103,17 @@
}
],
"source": [
"db = DeepLake.from_documents(docs, embeddings)\n",
"\n",
"db = DeepLake(dataset_path=\"./my_deeplake/\", embedding_function=embeddings, overwrite=True)\n",
"db.add_documents(docs)\n",
"# or shorter\n",
"# db = DeepLake.from_documents(docs, dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True)\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 50,
"metadata": {},
"outputs": [
{
@ -124,6 +134,62 @@
"print(docs[0].page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Later, you can reload the dataset without recomputing embeddings"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"./my_deeplake/ loaded successfully.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Deep Lake Dataset in ./my_deeplake/ already exists, loading from the storage\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='./my_deeplake/', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
}
],
"source": [
"db = DeepLake(dataset_path=\"./my_deeplake/\", embedding_function=embeddings, read_only=True)\n",
"docs = db.similarity_search(query)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Deep Lake, for now, is single writer and multiple reader. Setting `read_only=True` helps to avoid acquring the writer lock."
]
},
{
"attachments": {},
"cell_type": "markdown",
@ -134,14 +200,14 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/media/sdb/davit/.local/lib/python3.10/site-packages/langchain/llms/openai.py:624: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n",
"/media/sdb/davit/Git/experiments/langchain/langchain/llms/openai.py:672: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n",
" warnings.warn(\n"
]
}
@ -155,16 +221,16 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'The president nominated Circuit Court of Appeals Judge Ketanji Brown Jackson for the United States Supreme Court and praised her qualifications and broad support from both Democrats and Republicans.'"
"\"The president nominated Ketanji Brown Jackson to serve on the United States Supreme Court, describing her as one of the nation's top legal minds and a consensus builder with a background in private practice and public defense, and noting that she has received broad support from both Democrats and Republicans.\""
]
},
"execution_count": 10,
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
@ -184,14 +250,14 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mem://langchain loaded successfully.\n"
"./my_deeplake/ loaded successfully.\n"
]
},
{
@ -205,14 +271,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='mem://langchain', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"Dataset(path='./my_deeplake/', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (42, 1536) float32 None \n",
" ids text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
},
{
@ -227,31 +293,29 @@
"for d in docs:\n",
" d.metadata['year'] = random.randint(2012, 2014)\n",
"\n",
"db = DeepLake.from_documents(docs, embeddings)"
"db = DeepLake.from_documents(docs, embeddings, dataset_path=\"./my_deeplake/\", overwrite=True)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 42/42 [00:00<00:00, 3456.17it/s]\n"
"100%|██████████| 4/4 [00:00<00:00, 1080.24it/s]\n"
]
},
{
"data": {
"text/plain": [
"[Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='Vice President Harris and I ran for office with a new economic vision for America. \\n\\nInvest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up \\nand the middle out, not from the top down. \\n\\nBecause we know that when the middle class grows, the poor have a ladder up and the wealthy do very well. \\n\\nAmerica used to have the best roads, bridges, and airports on Earth. \\n\\nNow our infrastructure is ranked 13th in the world. \\n\\nWe wont be able to compete for the jobs of the 21st Century if we dont fix that. \\n\\nThats why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. \\n\\nThis was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. \\n\\nWere done talking about infrastructure weeks. \\n\\nWere going to have an infrastructure decade.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='It is going to transform America and put us on a path to win the economic competition of the 21st Century that we face with the rest of the world—particularly with China. \\n\\nAs Ive told Xi Jinping, it is never a good bet to bet against the American people. \\n\\nWell create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \\n\\nAnd well do it all to withstand the devastating effects of the climate crisis and promote environmental justice. \\n\\nWell build a national network of 500,000 electric vehicle charging stations, begin to replace poisonous lead pipes—so every child—and every American—has clean water to drink at home and at school, provide affordable high-speed internet for every American—urban, suburban, rural, and tribal communities. \\n\\n4,000 projects have already been announced. \\n\\nAnd tonight, Im announcing that this year we will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]"
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]"
]
},
"execution_count": 12,
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
@ -271,19 +335,19 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
" Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2014})]"
" Document(page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012})]"
]
},
"execution_count": 13,
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
@ -303,19 +367,19 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
" Document(page_content='One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \\n\\nWhen they came home, many of the worlds fittest and best trained warriors were never the same. \\n\\nHeadaches. Numbness. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. \\n\\nI know. \\n\\nOne of those soldiers was my son Major Beau Biden. \\n\\nWe dont know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \\n\\nBut Im committed to finding out everything we can. \\n\\nCommitted to military families like Danielle Robinson from Ohio. \\n\\nThe widow of Sergeant First Class Heath Robinson. \\n\\nHe was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \\n\\nStationed near Baghdad, just yards from burn pits the size of football fields. \\n\\nHeaths widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2014}),\n",
" Document(page_content='As Ohio Senator Sherrod Brown says, “Its time to bury the label “Rust Belt.” \\n\\nIts time. \\n\\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. \\n\\nInflation is robbing them of the gains they might otherwise feel. \\n\\nI get it. Thats why my top priority is getting prices under control. \\n\\nLook, our economy roared back faster than most predicted, but the pandemic meant that businesses had a hard time hiring enough workers to keep up production in their factories. \\n\\nThe pandemic also disrupted global supply chains. \\n\\nWhen factories close, it takes longer to make goods and get them from the warehouse to the store, and prices go up. \\n\\nLook at cars. \\n\\nLast year, there werent enough semiconductors to make all the cars that people wanted to buy. \\n\\nAnd guess what, prices of automobiles went up. \\n\\nSo—we have a choice. \\n\\nOne way to fight inflation is to drive down wages and make Americans poorer.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
" Document(page_content='We cant change how divided weve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans whod grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nIve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers wholl walk the beat, wholl know the neighborhood, and who can restore trust and safety.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012})]"
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
" Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]"
]
},
"execution_count": 14,
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
@ -324,6 +388,46 @@
"db.max_marginal_relevance_search('What did the president say about Ketanji Brown Jackson?')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete dataset"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"db.delete_dataset()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"and if delete fails you can also force delete"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": []
}
],
"source": [
"DeepLake.force_delete_by_path(\"./my_deeplake\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
@ -335,7 +439,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
@ -344,7 +448,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 63,
"metadata": {},
"outputs": [
{
@ -352,57 +456,122 @@
"output_type": "stream",
"text": [
"Your Deep Lake dataset has been successfully created!\n",
"The dataset is private so make sure you are logged in!\n"
"The dataset is private so make sure you are logged in!\n",
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test\n",
"hub://davitbun/langchain_test loaded successfully.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\\"
"Evaluating ingest: 100%|██████████| 1/1 [00:14<00:00\n",
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/linkedin\n"
"Dataset(path='hub://davitbun/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
},
{
"name": "stderr",
"data": {
"text/plain": [
"['d6d6ccb4-e187-11ed-b66d-41c5f7b85421',\n",
" 'd6d6ccb5-e187-11ed-b66d-41c5f7b85421',\n",
" 'd6d6ccb6-e187-11ed-b66d-41c5f7b85421',\n",
" 'd6d6ccb7-e187-11ed-b66d-41c5f7b85421']"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Embed and store the texts\n",
"username = \"<username>\" # your username on app.activeloop.ai \n",
"dataset_path = f\"hub://{username}/langchain_test\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings, overwrite=True)\n",
"db.add_documents(docs)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" \r"
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
]
},
}
],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query)\n",
"print(docs[0].page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Creating dataset on AWS S3"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hub://davitbun/linkedin loaded successfully.\n"
"s3://hub-2.0-datasets-n/langchain_test loaded successfully.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating ingest: 100%|██████████| 1/1 [00:23<00:00\n",
"/"
"Evaluating ingest: 100%|██████████| 1/1 [00:10<00:00\n",
"\\"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://davitbun/linkedin', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"Dataset(path='s3://hub-2.0-datasets-n/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (42, 1536) float32 None \n",
" ids text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
},
{
@ -414,69 +583,212 @@
}
],
"source": [
"# Embed and store the texts\n",
"dataset_path = f\"hub://{USERNAME}/{DATASET_NAME}\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
"dataset_path = f\"s3://BUCKET/langchain_test\" # could be also ./local/path (much faster locally), hub://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"vectordb = DeepLake.from_documents(documents=docs, embedding=embedding, dataset_path=dataset_path)"
"db = DeepLake.from_documents(docs, dataset_path=dataset_path, embedding=embeddings, overwrite=True, creds = {\n",
" 'aws_access_key_id': os.environ['AWS_ACCESS_KEY_ID'], \n",
" 'aws_secret_access_key': os.environ['AWS_SECRET_ACCESS_KEY'], \n",
" 'aws_session_token': os.environ['AWS_SESSION_TOKEN'], # Optional\n",
"})"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deep Lake API\n",
"you can access the Deep Lake dataset at `db.ds`"
]
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"Dataset(path='hub://davitbun/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
}
],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query)\n",
"print(docs[0].page_content)"
"# get structure of the dataset\n",
"db.ds.summary()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"# get embeddings numpy array\n",
"embeds = db.ds.embedding.numpy()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Transfer local dataset to cloud\n",
"Copy already created dataset to the cloud. You can also transfer from cloud to local."
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Copying dataset: 100%|██████████| 56/56 [00:38<00:00\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://davitbun/linkedin', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (42, 1536) float32 None \n",
" ids text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
"Your Deep Lake dataset has been successfully created!\n",
"The dataset is private so make sure you are logged in!\n"
]
},
{
"data": {
"text/plain": [
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vectordb.ds.summary()"
"import deeplake\n",
"username = \"davitbun\" # your username on app.activeloop.ai \n",
"source = f\"hub://{username}/langchain_test\" # could be local, s3, gcs, etc.\n",
"destination = f\"hub://{username}/langchain_test_copy\" # could be local, s3, gcs, etc.\n",
"\n",
"deeplake.deepcopy(src=source, dest=destination, overwrite=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 76,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"hub://davitbun/langchain_test_copy loaded successfully.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Deep Lake Dataset in hub://davitbun/langchain_test_copy already exists, loading from the storage\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating ingest: 100%|██████████| 1/1 [00:31<00:00\n",
"-"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (8, 1536) float32 None \n",
" ids text (8, 1) str None \n",
" metadata json (8, 1) str None \n",
" text text (8, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"data": {
"text/plain": [
"['ad42f3fe-e188-11ed-b66d-41c5f7b85421',\n",
" 'ad42f3ff-e188-11ed-b66d-41c5f7b85421',\n",
" 'ad42f400-e188-11ed-b66d-41c5f7b85421',\n",
" 'ad42f401-e188-11ed-b66d-41c5f7b85421']"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"embeddings = vectordb.ds.embedding.numpy()"
"db = DeepLake(dataset_path=destination, embedding_function=embeddings)\n",
"db.add_documents(docs)"
]
},
{

@ -40,8 +40,24 @@
"from langchain.vectorstores import DeepLake\n",
"\n",
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n",
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')\n",
"embeddings = OpenAIEmbeddings()"
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embeddings = OpenAIEmbeddings(disallowed_special=())"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"disallowed_special=() is required to avoid `Exception: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte` from tiktoken for some repositories"
]
},
{
@ -120,7 +136,9 @@
"metadata": {},
"outputs": [],
"source": [
"db = DeepLake.from_documents(texts, embeddings, dataset_path=\"hub://davitbun/twitter-algorithm\")"
"username = \"davitbun\" # replace with your username from app.activeloop.ai\n",
"db = DeepLake(dataset_path=f\"hub://{username}/twitter-algorithm\", embedding_function=embeddings, public=True) #dataset would be publicly available\n",
"db.add_documents(texts)"
]
},
{
@ -133,61 +151,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"-"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/twitter-algorithm\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"-"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"hub://davitbun/twitter-algorithm loaded successfully.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Deep Lake Dataset in hub://davitbun/twitter-algorithm already exists, loading from the storage\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://davitbun/twitter-algorithm', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (23152, 1536) float32 None \n",
" ids text (23152, 1) str None \n",
" metadata json (23152, 1) str None \n",
" text text (23152, 1) str None \n"
]
}
],
"outputs": [],
"source": [
"db = DeepLake(dataset_path=\"hub://davitbun/twitter-algorithm\", read_only=True, embedding_function=embeddings)"
]
@ -203,7 +169,7 @@
"retriever.search_kwargs['distance_metric'] = 'cos'\n",
"retriever.search_kwargs['fetch_k'] = 100\n",
"retriever.search_kwargs['maximal_marginal_relevance'] = True\n",
"retriever.search_kwargs['k'] = 20"
"retriever.search_kwargs['k'] = 10"
]
},
{
@ -241,7 +207,7 @@
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.chains import ConversationalRetrievalChain\n",
"\n",
"model = ChatOpenAI(model='gpt-4') # 'gpt-3.5-turbo',\n",
"model = ChatOpenAI(model='gpt-3.5-turbo') # switch to 'gpt-4'\n",
"qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)"
]
},

@ -108,7 +108,7 @@
"\n",
"dataset_path = 'hub://'+org+'/data'\n",
"embeddings = OpenAIEmbeddings()\n",
"db = DeepLake.from_documents(texts, embeddings, dataset_path=dataset_path)"
"db = DeepLake.from_documents(texts, embeddings, dataset_path=dataset_path, overwrite=True)"
]
},
{

@ -43,6 +43,9 @@ def vector_search(
returns:
nearest_indices: List, indices of nearest neighbors
"""
if data_vectors.shape[0] == 0:
return [], []
# Calculate the distance between the query_vector and all data_vectors
distances = distance_metric_map[distance_metric](query_embedding, data_vectors)
nearest_indices = np.argsort(distances)
@ -87,7 +90,7 @@ class DeepLake(VectorStore):
vectorstore = DeepLake("langchain_store", embeddings.embed_query)
"""
_LANGCHAIN_DEFAULT_DEEPLAKE_PATH = "mem://langchain"
_LANGCHAIN_DEFAULT_DEEPLAKE_PATH = "./deeplake/"
def __init__(
self,
@ -96,7 +99,7 @@ class DeepLake(VectorStore):
embedding_function: Optional[Embeddings] = None,
read_only: Optional[bool] = False,
ingestion_batch_size: int = 1024,
num_workers: int = 4,
num_workers: int = 0,
**kwargs: Any,
) -> None:
"""Initialize with Deep Lake client."""
@ -112,8 +115,13 @@ class DeepLake(VectorStore):
"Please install it with `pip install deeplake`."
)
self._deeplake = deeplake
self.dataset_path = dataset_path
creds_args = {"creds": kwargs["creds"]} if "creds" in kwargs else {}
if deeplake.exists(dataset_path, token=token):
if (
deeplake.exists(dataset_path, token=token, **creds_args)
and "overwrite" not in kwargs
):
self.ds = deeplake.load(
dataset_path, token=token, read_only=read_only, **kwargs
)
@ -123,6 +131,9 @@ class DeepLake(VectorStore):
)
self.ds.summary()
else:
if "overwrite" in kwargs:
del kwargs["overwrite"]
self.ds = deeplake.empty(
dataset_path, token=token, overwrite=True, **kwargs
)
@ -215,6 +226,9 @@ class DeepLake(VectorStore):
)
batch_size = min(self.ingestion_batch_size, len(elements))
if batch_size == 0:
return []
batched = [
elements[i : i + batch_size] for i in range(0, len(elements), batch_size)
]
@ -222,7 +236,8 @@ class DeepLake(VectorStore):
ingest().eval(
batched,
self.ds,
num_workers=min(self.num_workers, len(batched) // self.num_workers),
num_workers=min(self.num_workers, len(batched) // max(self.num_workers, 1)),
**kwargs,
)
self.ds.commit(allow_empty=True)
self.ds.summary()
@ -443,8 +458,8 @@ class DeepLake(VectorStore):
) -> DeepLake:
"""Create a Deep Lake dataset from a raw documents.
If a dataset_path is specified, the dataset will be persisted there.
Otherwise, the data will be ephemeral in-memory.
If a dataset_path is specified, the dataset will be persisted in that location,
otherwise by default at `./deeplake`
Args:
path (str, pathlib.Path): - The full path to the dataset. Can be:
@ -493,7 +508,7 @@ class DeepLake(VectorStore):
Defaults to None.
"""
if delete_all:
self.ds.delete()
self.ds.delete(large_ok=True)
return True
view = None
@ -515,6 +530,18 @@ class DeepLake(VectorStore):
return True
@classmethod
def force_delete_by_path(cls, path: str) -> None:
"""Force delete dataset by path"""
try:
import deeplake
except ImportError:
raise ValueError(
"Could not import deeplake python package. "
"Please install it with `pip install deeplake`."
)
deeplake.delete(path, large_ok=True, force=True)
def delete_dataset(self) -> None:
"""Delete the collection."""
self.delete(delete_all=True)

19
poetry.lock generated

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry and should not be changed by hand.
# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
[[package]]
name = "absl-py"
@ -1413,17 +1413,17 @@ files = [
[[package]]
name = "deeplake"
version = "3.2.22"
version = "3.3.0"
description = "Activeloop Deep Lake"
category = "main"
optional = false
python-versions = "*"
files = [
{file = "deeplake-3.2.22.tar.gz", hash = "sha256:068280561366dd1bd891d3ffda8638ec59860a23b9426815a484b0591ab467a6"},
{file = "deeplake-3.3.0.tar.gz", hash = "sha256:161663ccba922156912a0ddace7133284487732b8d671fc64c74519ccce62d96"},
]
[package.dependencies]
aioboto3 = {version = "10.4.0", markers = "python_version >= \"3.7\" and sys_platform != \"win32\""}
aioboto3 = {version = ">=10.4.0", markers = "python_version >= \"3.7\" and sys_platform != \"win32\""}
boto3 = "*"
click = "*"
humbug = ">=0.3.1"
@ -1436,11 +1436,10 @@ pyjwt = "*"
tqdm = "*"
[package.extras]
all = ["IPython", "av (>=8.1.0)", "flask", "google-api-python-client (>=2.31.0,<2.32.0)", "google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "google-cloud-storage (>=1.42.0,<1.43.0)", "laspy", "libdeeplake (==0.0.41)", "nibabel", "oauth2client (>=4.1.3,<4.2.0)", "pydicom"]
all = ["IPython", "av (>=8.1.0)", "flask", "google-api-python-client (>=2.31.0,<2.32.0)", "google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "google-cloud-storage (>=1.42.0,<1.43.0)", "laspy", "nibabel", "oauth2client (>=4.1.3,<4.2.0)", "pydicom"]
audio = ["av (>=8.1.0)"]
av = ["av (>=8.1.0)"]
dicom = ["nibabel", "pydicom"]
enterprise = ["libdeeplake (==0.0.41)", "pyjwt"]
gcp = ["google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "google-cloud-storage (>=1.42.0,<1.43.0)"]
gdrive = ["google-api-python-client (>=2.31.0,<2.32.0)", "google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "oauth2client (>=4.1.3,<4.2.0)"]
medical = ["nibabel", "pydicom"]
@ -7508,7 +7507,7 @@ files = [
]
[package.dependencies]
greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"}
greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and platform_machine == \"aarch64\" or python_version >= \"3\" and platform_machine == \"ppc64le\" or python_version >= \"3\" and platform_machine == \"x86_64\" or python_version >= \"3\" and platform_machine == \"amd64\" or python_version >= \"3\" and platform_machine == \"AMD64\" or python_version >= \"3\" and platform_machine == \"win32\" or python_version >= \"3\" and platform_machine == \"WIN32\""}
[package.extras]
aiomysql = ["aiomysql", "greenlet (!=0.4.17)"]
@ -9268,13 +9267,13 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
cffi = ["cffi (>=1.11)"]
[extras]
all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect"]
all = ["aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "manifest-ml", "networkx", "nlpcloud", "nltk", "nomic", "openai", "opensearch-py", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pyowm", "pypdf", "pytesseract", "qdrant-client", "redis", "sentence-transformers", "spacy", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
cohere = ["cohere"]
llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "torch", "transformers"]
openai = ["openai"]
qdrant = ["qdrant-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "da027a1b27f348548ca828c6da40795e2f57a7a7858bdeac1a08573d3e031e12"
content-hash = "ab6ea1c53c7a6e792d5bdcf8865b87e5dcfe4c89080c18b356dc4ed8a17cc3a3"

@ -56,7 +56,7 @@ arxiv = {version = "^1.4", optional = true}
pypdf = {version = "^3.4.0", optional = true}
networkx = {version="^2.6.3", optional = true}
aleph-alpha-client = {version="^2.15.0", optional = true}
deeplake = {version = "^3.2.21", optional = true}
deeplake = {version = "^3.3.0", optional = true}
pgvector = {version = "^0.1.6", optional = true}
psycopg2-binary = {version = "^2.9.5", optional = true}
#boto3 = {version = "^1.26.96", optional = true} # TODO: fix it, commented because the version failed with deeplake

@ -164,3 +164,10 @@ def test_delete_dataset_by_filter(deeplake_datastore: DeepLake) -> None:
assert len(deeplake_datastore.ds) == 2
deeplake_datastore.delete_dataset()
def test_delete_by_path(deeplake_datastore: DeepLake) -> None:
"""Test delete dataset."""
path = deeplake_datastore.dataset_path
DeepLake.force_delete_by_path(path)
assert not deeplake.exists(path)

Loading…
Cancel
Save