Harrison/deeplake new features (#6263)

Co-authored-by: adilkhan <adilkhan.sarsen@nu.edu.kz>
Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
Harrison Chase 2023-06-16 17:53:55 -07:00 committed by GitHub
parent 6640293087
commit af18413d97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 919 additions and 531 deletions

12
docs/.local_build.sh Executable file
View File

@ -0,0 +1,12 @@
mkdir _dist
cp -r {docs_skeleton,snippets} _dist
mkdir -p _dist/docs_skeleton/static/api_reference
cd api_reference
poetry run make html
cp -r _build/* ../_dist/docs_skeleton/static/api_reference
cd ..
cp -r extras/* _dist/docs_skeleton/docs
cd _dist/docs_skeleton
poetry run nbdoc_build
yarn install
yarn start

View File

@ -26,10 +26,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 1,
"metadata": { "metadata": {},
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.embeddings.openai import OpenAIEmbeddings\n",
@ -39,30 +37,23 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 2,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [],
{
"name": "stdin",
"output_type": "stream",
"text": [
"OpenAI API Key: ········\n"
]
}
],
"source": [ "source": [
"import os\n", "import os\n",
"import getpass\n", "import getpass\n",
"\n", "\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"activeloop_token = getpass.getpass(\"activeloop token:\")\n",
"embeddings = OpenAIEmbeddings()" "embeddings = OpenAIEmbeddings()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 3,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
@ -70,7 +61,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"docs/modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
@ -87,7 +78,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 4,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
@ -95,57 +86,27 @@
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": []
"/home/leo/.local/lib/python3.10/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.3.2) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
" warnings.warn(\n"
]
}, },
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"./my_deeplake/ loaded successfully.\n" "Dataset(path='./my_deeplake/', tensors=['embedding', 'id', 'metadata', 'text'])\n",
]
},
{
"name": "stderr",
"output_type": "stream",
"text": []
},
{
"name": "stderr",
"output_type": "stream",
"text": []
},
{
"name": "stderr",
"output_type": "stream",
"text": []
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating ingest: 100%|██████████████████████████████████████| 1/1 [00:07<00:00\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='./my_deeplake/', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n", "\n",
" tensor htype shape dtype compression\n", " tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n", " ------- ------- ------- ------- ------- \n",
" embedding generic (42, 1536) float32 None \n", " embedding embedding (42, 1536) float32 None \n",
" ids text (42, 1) str None \n", " id text (42, 1) str None \n",
" metadata json (42, 1) str None \n", " metadata json (42, 1) str None \n",
" text text (42, 1) str None \n" " text text (42, 1) str None \n"
] ]
} }
], ],
"source": [ "source": [
"db = DeepLake(dataset_path=\"./my_deeplake/\", embedding_function=embeddings)\n", "db = DeepLake(\n",
" dataset_path=\"./my_deeplake/\", embedding_function=embeddings, overwrite=True\n",
")\n",
"db.add_documents(docs)\n", "db.add_documents(docs)\n",
"# or shorter\n", "# or shorter\n",
"# db = DeepLake.from_documents(docs, dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True)\n", "# db = DeepLake.from_documents(docs, dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True)\n",
@ -155,7 +116,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 5,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
@ -187,7 +148,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 6,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
@ -195,36 +156,9 @@
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [
"./my_deeplake/ loaded successfully.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": []
},
{
"name": "stderr",
"output_type": "stream",
"text": [ "text": [
"Deep Lake Dataset in ./my_deeplake/ already exists, loading from the storage\n" "Deep Lake Dataset in ./my_deeplake/ already exists, loading from the storage\n"
] ]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='./my_deeplake/', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (42, 1536) float32 None \n",
" ids text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
]
} }
], ],
"source": [ "source": [
@ -250,7 +184,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 7,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
@ -259,7 +193,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"/home/leo/.local/lib/python3.10/site-packages/langchain/llms/openai.py:624: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n", "/Users/adilkhansarsen/Documents/work/LangChain/langchain/langchain/llms/openai.py:751: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n",
" warnings.warn(\n" " warnings.warn(\n"
] ]
} }
@ -277,7 +211,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 8,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
@ -285,10 +219,10 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"'The president nominated Ketanji Brown Jackson to serve on the United States Supreme Court. He described her as a former top litigator in private practice, a former federal public defender, a consensus builder, and from a family of public school educators and police officers. He also mentioned that she has received broad support from various groups since being nominated.'" "'The President nominated Ketanji Brown Jackson to serve on the United States Supreme Court and spoke highly of her legal expertise and reputation as a consensus builder.'"
] ]
}, },
"execution_count": 10, "execution_count": 8,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -307,35 +241,26 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 54, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"./my_deeplake/ loaded successfully.\n"
]
},
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": []
"Evaluating ingest: 100%|██████████| 1/1 [00:04<00:00\n"
]
}, },
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Dataset(path='./my_deeplake/', tensors=['embedding', 'ids', 'metadata', 'text'])\n", "Dataset(path='./my_deeplake/', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n", "\n",
" tensor htype shape dtype compression\n", " tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n", " ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n", " embedding embedding (4, 1536) float32 None \n",
" ids text (4, 1) str None \n", " id text (4, 1) str None \n",
" metadata json (4, 1) str None \n", " metadata json (4, 1) str None \n",
" text text (4, 1) str None \n" " text text (4, 1) str None \n"
] ]
}, },
{ {
@ -357,31 +282,33 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 55, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"100%|██████████| 4/4 [00:00<00:00, 1080.24it/s]\n" "100%|██████████| 4/4 [00:00<00:00, 3300.00it/s]\n"
] ]
}, },
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n", "[Document(lc_kwargs={'page_content': 'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}}, page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]" " Document(lc_kwargs={'page_content': 'A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}}, page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(lc_kwargs={'page_content': 'Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}}, page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2013})]"
] ]
}, },
"execution_count": 55, "execution_count": 10,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"db.similarity_search(\n", "db.similarity_search(\n",
" \"What did the president say about Ketanji Brown Jackson\", filter={\"year\": 2013}\n", " \"What did the president say about Ketanji Brown Jackson\",\n",
" filter={\"metadata\": {\"year\": 2013}},\n",
")" ")"
] ]
}, },
@ -395,19 +322,19 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 56, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n", "[Document(lc_kwargs={'page_content': 'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}}, page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n", " Document(lc_kwargs={'page_content': 'A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}}, page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n", " Document(lc_kwargs={'page_content': 'Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}}, page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012})]" " Document(lc_kwargs={'page_content': 'And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2012}}, page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2012})]"
] ]
}, },
"execution_count": 56, "execution_count": 11,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -428,19 +355,19 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 57, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n", "[Document(lc_kwargs={'page_content': 'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}}, page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n", " Document(lc_kwargs={'page_content': 'Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}}, page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n", " Document(lc_kwargs={'page_content': 'A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}}, page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]" " Document(lc_kwargs={'page_content': 'And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt', 'year': 2012}}, page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': 'docs/modules/state_of_the_union.txt', 'year': 2012})]"
] ]
}, },
"execution_count": 57, "execution_count": 12,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -460,9 +387,15 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 59, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": []
}
],
"source": [ "source": [
"db.delete_dataset()" "db.delete_dataset()"
] ]
@ -476,7 +409,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 61, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -499,16 +432,23 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 62, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"os.environ[\"ACTIVELOOP_TOKEN\"] = getpass.getpass(\"Activeloop Token:\")" "os.environ[\"ACTIVELOOP_TOKEN\"] = activeloop_token"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Deeplake now supports running the inference in 3 modes. `python` naive way of searching inside of the data, `tensor_db` which is managed database, it runs tql on a remote optimized engine and sends results back, and `compute_engine` which is C++ implementation of search that runs locally."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 63, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -516,43 +456,85 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Your Deep Lake dataset has been successfully created!\n", "Your Deep Lake dataset has been successfully created!\n",
"The dataset is private so make sure you are logged in!\n", "The dataset is private so make sure you are logged in!\n"
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test\n",
"hub://davitbun/langchain_test loaded successfully.\n"
] ]
}, },
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Evaluating ingest: 100%|██████████| 1/1 [00:14<00:00\n", "-"
" \r"
] ]
}, },
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Dataset(path='hub://davitbun/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n", "Dataset(path='hub://adilkhan/langchain_testing_python', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n", "\n",
" tensor htype shape dtype compression\n", " tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n", " ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n", " embedding embedding (42, 1536) float32 None \n",
" ids text (4, 1) str None \n", " id text (42, 1) str None \n",
" metadata json (4, 1) str None \n", " metadata json (42, 1) str None \n",
" text text (4, 1) str None \n" " text text (42, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
] ]
}, },
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"['d6d6ccb4-e187-11ed-b66d-41c5f7b85421',\n", "['d604b1ac-093c-11ee-bdba-76d8a30504e0',\n",
" 'd6d6ccb5-e187-11ed-b66d-41c5f7b85421',\n", " 'd604b238-093c-11ee-bdba-76d8a30504e0',\n",
" 'd6d6ccb6-e187-11ed-b66d-41c5f7b85421',\n", " 'd604b260-093c-11ee-bdba-76d8a30504e0',\n",
" 'd6d6ccb7-e187-11ed-b66d-41c5f7b85421']" " 'd604b27e-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b29c-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b2ba-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b2d8-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b2f6-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b314-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b332-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b350-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b36e-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b38c-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b3a0-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b3be-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b3dc-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b3fa-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b418-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b436-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b454-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b472-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b490-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b4a4-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b4c2-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b4e0-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b4fe-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b51c-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b53a-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b558-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b576-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b594-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b5b2-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b5c6-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b5e4-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b602-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b620-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b63e-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b65c-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b67a-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b698-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b6b6-093c-11ee-bdba-76d8a30504e0',\n",
" 'd604b6d4-093c-11ee-bdba-76d8a30504e0']"
] ]
}, },
"execution_count": 63, "execution_count": 16,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -560,7 +542,9 @@
"source": [ "source": [
"# Embed and store the texts\n", "# Embed and store the texts\n",
"username = \"<username>\" # your username on app.activeloop.ai\n", "username = \"<username>\" # your username on app.activeloop.ai\n",
"dataset_path = f\"hub://{username}/langchain_test\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n", "dataset_path = f\"hub://{username}/langchain_testing_python\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
"\n",
"docs = text_splitter.split_documents(documents)\n",
"\n", "\n",
"embedding = OpenAIEmbeddings()\n", "embedding = OpenAIEmbeddings()\n",
"db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings, overwrite=True)\n", "db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings, overwrite=True)\n",
@ -569,7 +553,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 64, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -592,6 +576,204 @@
"print(docs[0].page_content)" "print(docs[0].page_content)"
] ]
}, },
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Your Deep Lake dataset has been successfully created!\n",
"The dataset is private so make sure you are logged in!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"|"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding embedding (42, 1536) float32 None \n",
" id text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"data": {
"text/plain": [
"['6584c33a-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c3ee-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c420-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c43e-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c466-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c484-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c4a2-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c4c0-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c4de-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c4fc-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c51a-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c538-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c556-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c574-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c592-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c5b0-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c5ce-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c5f6-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c614-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c632-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c646-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c66e-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c682-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c6a0-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c6be-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c6e6-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c704-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c722-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c740-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c75e-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c77c-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c79a-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c7ae-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c7cc-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c7ea-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c808-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c826-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c844-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c862-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c876-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c894-093d-11ee-bdba-76d8a30504e0',\n",
" '6584c8bc-093d-11ee-bdba-76d8a30504e0']"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Embed and store the texts\n",
"username = \"adilkhan\" # your username on app.activeloop.ai\n",
"dataset_path = f\"hub://{username}/langchain_testing\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
"\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"db = DeepLake(\n",
" dataset_path=dataset_path,\n",
" embedding_function=embeddings,\n",
" overwrite=True,\n",
" exec_option=\"tensor_db\",\n",
")\n",
"db.add_documents(docs)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
]
}
],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query, exec_option=\"tensor_db\")\n",
"print(docs[0].page_content)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### The difference will be apparent on a bigger datasets (~10000 rows)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### TQL Search"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"now we can use tql search with DeepLake"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"search_id = db.vectorstore.dataset.id[0].numpy()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"docs = db.similarity_search(\n",
" query=None,\n",
" tql_query=f\"SELECT * WHERE id == '{search_id[0]}'\",\n",
" exec_option=\"tensor_db\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(lc_kwargs={'page_content': 'Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russias Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.', 'metadata': {'source': 'docs/modules/state_of_the_union.txt'}}, page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russias Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.', metadata={'source': 'docs/modules/state_of_the_union.txt'})]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -668,37 +850,37 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 66, "execution_count": 26,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Dataset(path='hub://davitbun/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n", "Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n", "\n",
" tensor htype shape dtype compression\n", " tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n", " ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n", " embedding embedding (42, 1536) float32 None \n",
" ids text (4, 1) str None \n", " id text (42, 1) str None \n",
" metadata json (4, 1) str None \n", " metadata json (42, 1) str None \n",
" text text (4, 1) str None \n" " text text (42, 1) str None \n"
] ]
} }
], ],
"source": [ "source": [
"# get structure of the dataset\n", "# get structure of the dataset\n",
"db.ds.summary()" "db.vectorstore.summary()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 67, "execution_count": 27,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# get embeddings numpy array\n", "# get embeddings numpy array\n",
"embeds = db.ds.embedding.numpy()" "embeds = db.vectorstore.dataset.embedding.numpy()"
] ]
}, },
{ {
@ -854,18 +1036,11 @@
"db = DeepLake(dataset_path=destination, embedding_function=embeddings)\n", "db = DeepLake(dataset_path=destination, embedding_function=embeddings)\n",
"db.add_documents(docs)" "db.add_documents(docs)"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 (ipykernel)", "display_name": "Python 3.9.6 ('langchain_venv': venv)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@ -879,11 +1054,11 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.6" "version": "3.9.6"
}, },
"vscode": { "vscode": {
"interpreter": { "interpreter": {
"hash": "7b14174bb6f9d4680b62ac2a6390e1ce94fbfabf172a10844870451d539c58d6" "hash": "0b0bacaffd430edc3085253ee7ee1bcda9f76a5e66b369dda8ba68baa6d14ba7"
} }
} }
}, },

File diff suppressed because it is too large Load Diff

10
poetry.lock generated
View File

@ -1762,13 +1762,13 @@ files = [
[[package]] [[package]]
name = "deeplake" name = "deeplake"
version = "3.6.1" version = "3.6.4"
description = "Activeloop Deep Lake" description = "Activeloop Deep Lake"
category = "main" category = "main"
optional = false optional = false
python-versions = "*" python-versions = "*"
files = [ files = [
{file = "deeplake-3.6.1.tar.gz", hash = "sha256:78b0280e3e21c6731a96a9a2519a24e767df708c309e934ab473dfbc17b13581"}, {file = "deeplake-3.6.4.tar.gz", hash = "sha256:cd31e772e00012a0cfdcd9d2cfe0e7fa3eaea1de122e98ae4f54e15d2722b94f"},
] ]
[package.dependencies] [package.dependencies]
@ -1785,12 +1785,12 @@ pyjwt = "*"
tqdm = "*" tqdm = "*"
[package.extras] [package.extras]
all = ["IPython", "av (>=8.1.0)", "azure-cli", "azure-identity", "azure-storage-blob", "flask", "google-api-python-client (>=2.31.0,<2.32.0)", "google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "google-cloud-storage (>=1.42.0,<1.43.0)", "laspy", "libdeeplake (==0.0.56)", "nibabel", "oauth2client (>=4.1.3,<4.2.0)", "pydicom"] all = ["IPython", "av (>=8.1.0)", "azure-cli", "azure-identity", "azure-storage-blob", "flask", "google-api-python-client (>=2.31.0,<2.32.0)", "google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "google-cloud-storage (>=1.42.0,<1.43.0)", "laspy", "libdeeplake (==0.0.59)", "nibabel", "oauth2client (>=4.1.3,<4.2.0)", "pydicom"]
audio = ["av (>=8.1.0)"] audio = ["av (>=8.1.0)"]
av = ["av (>=8.1.0)"] av = ["av (>=8.1.0)"]
azure = ["azure-cli", "azure-identity", "azure-storage-blob"] azure = ["azure-cli", "azure-identity", "azure-storage-blob"]
dicom = ["nibabel", "pydicom"] dicom = ["nibabel", "pydicom"]
enterprise = ["libdeeplake (==0.0.56)", "pyjwt"] enterprise = ["libdeeplake (==0.0.59)", "pyjwt"]
gcp = ["google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "google-cloud-storage (>=1.42.0,<1.43.0)"] gcp = ["google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "google-cloud-storage (>=1.42.0,<1.43.0)"]
gdrive = ["google-api-python-client (>=2.31.0,<2.32.0)", "google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "oauth2client (>=4.1.3,<4.2.0)"] gdrive = ["google-api-python-client (>=2.31.0,<2.32.0)", "google-auth (>=2.0.1,<2.1.0)", "google-auth-oauthlib (>=0.4.5,<0.5.0)", "oauth2client (>=4.1.3,<4.2.0)"]
medical = ["nibabel", "pydicom"] medical = ["nibabel", "pydicom"]
@ -11486,4 +11486,4 @@ text-helpers = ["chardet"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<4.0" python-versions = ">=3.8.1,<4.0"
content-hash = "dd54bb9201b260b734ceebad2c6629b4b1b3bf224731de092782678219f32120" content-hash = "71afe34849d7dc7189fe79e795b12a48d5e185ea2146880ba80fc6770b6e6272"

View File

@ -59,7 +59,7 @@ arxiv = {version = "^1.4", optional = true}
pypdf = {version = "^3.4.0", optional = true} pypdf = {version = "^3.4.0", optional = true}
networkx = {version="^2.6.3", optional = true} networkx = {version="^2.6.3", optional = true}
aleph-alpha-client = {version="^2.15.0", optional = true} aleph-alpha-client = {version="^2.15.0", optional = true}
deeplake = {version = "^3.3.0", optional = true} deeplake = {version = "^3.6.2", optional = true}
pgvector = {version = "^0.1.6", optional = true} pgvector = {version = "^0.1.6", optional = true}
psycopg2-binary = {version = "^2.9.5", optional = true} psycopg2-binary = {version = "^2.9.5", optional = true}
pyowm = {version = "^3.3.0", optional = true} pyowm = {version = "^3.3.0", optional = true}

View File

@ -66,8 +66,6 @@ def test_deeplakewith_persistence() -> None:
output = docsearch.similarity_search("foo", k=1) output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")] assert output == [Document(page_content="foo")]
docsearch.persist()
# Get a new VectorStore from the persisted directory # Get a new VectorStore from the persisted directory
docsearch = DeepLake( docsearch = DeepLake(
dataset_path=dataset_path, dataset_path=dataset_path,
@ -98,8 +96,6 @@ def test_deeplake_overwrite_flag() -> None:
output = docsearch.similarity_search("foo", k=1) output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")] assert output == [Document(page_content="foo")]
docsearch.persist()
# Get a new VectorStore from the persisted directory, with no overwrite (implicit) # Get a new VectorStore from the persisted directory, with no overwrite (implicit)
docsearch = DeepLake( docsearch = DeepLake(
dataset_path=dataset_path, dataset_path=dataset_path,
@ -125,9 +121,8 @@ def test_deeplake_overwrite_flag() -> None:
embedding_function=FakeEmbeddings(), embedding_function=FakeEmbeddings(),
overwrite=True, overwrite=True,
) )
output = docsearch.similarity_search("foo", k=1) with pytest.raises(ValueError):
# assert page no longer present output = docsearch.similarity_search("foo", k=1)
assert output == []
def test_similarity_search(deeplake_datastore: DeepLake, distance_metric: str) -> None: def test_similarity_search(deeplake_datastore: DeepLake, distance_metric: str) -> None:
@ -172,7 +167,10 @@ def test_similarity_search_with_filter(
"""Test similarity search.""" """Test similarity search."""
output = deeplake_datastore.similarity_search( output = deeplake_datastore.similarity_search(
"foo", k=1, distance_metric=distance_metric, filter={"page": "1"} "foo",
k=1,
distance_metric=distance_metric,
filter={"metadata": {"page": "1"}},
) )
assert output == [Document(page_content="bar", metadata={"page": "1"})] assert output == [Document(page_content="bar", metadata={"page": "1"})]
deeplake_datastore.delete_dataset() deeplake_datastore.delete_dataset()
@ -196,19 +194,29 @@ def test_max_marginal_relevance_search(deeplake_datastore: DeepLake) -> None:
def test_delete_dataset_by_ids(deeplake_datastore: DeepLake) -> None: def test_delete_dataset_by_ids(deeplake_datastore: DeepLake) -> None:
"""Test delete dataset.""" """Test delete dataset."""
id = deeplake_datastore.ds.ids.data()["value"][0] id = deeplake_datastore.vectorstore.dataset.id.data()["value"][0]
deeplake_datastore.delete(ids=[id]) deeplake_datastore.delete(ids=[id])
assert deeplake_datastore.similarity_search("foo", k=1, filter={"page": "0"}) == [] assert (
assert len(deeplake_datastore.ds) == 2 deeplake_datastore.similarity_search(
"foo", k=1, filter={"metadata": {"page": "0"}}
)
== []
)
assert len(deeplake_datastore.vectorstore) == 2
deeplake_datastore.delete_dataset() deeplake_datastore.delete_dataset()
def test_delete_dataset_by_filter(deeplake_datastore: DeepLake) -> None: def test_delete_dataset_by_filter(deeplake_datastore: DeepLake) -> None:
"""Test delete dataset.""" """Test delete dataset."""
deeplake_datastore.delete(filter={"page": "1"}) deeplake_datastore.delete(filter={"metadata": {"page": "1"}})
assert deeplake_datastore.similarity_search("bar", k=1, filter={"page": "1"}) == [] assert (
assert len(deeplake_datastore.ds) == 2 deeplake_datastore.similarity_search(
"bar", k=1, filter={"metadata": {"page": "1"}}
)
== []
)
assert len(deeplake_datastore.vectorstore.dataset) == 2
deeplake_datastore.delete_dataset() deeplake_datastore.delete_dataset()