Handled empty search result handling and updated the notebook (#27914)

- [ ] **PR title**: "community: updated Kinetica vectorstore"

  - **Description:** Handled empty search results
  - **Issue:** used to throw error if the search results were empty

@efriis
This commit is contained in:
am-kinetica 2024-11-13 02:33:49 +05:30 committed by GitHub
parent 00e7b2dada
commit a646f1c383
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 187 additions and 252 deletions

View File

@ -33,35 +33,13 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: gpudb==7.2.0.0b in /home/anindyam/kinetica/kinetica-github/langchain/libs/langchain/.venv/lib/python3.8/site-packages (7.2.0.0b0)\n",
"Requirement already satisfied: future in /home/anindyam/kinetica/kinetica-github/langchain/libs/langchain/.venv/lib/python3.8/site-packages (from gpudb==7.2.0.0b) (0.18.3)\n",
"Requirement already satisfied: pyzmq in /home/anindyam/kinetica/kinetica-github/langchain/libs/langchain/.venv/lib/python3.8/site-packages (from gpudb==7.2.0.0b) (25.1.2)\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"outputs": [],
"source": [
"# Pip install necessary package\n",
"%pip install --upgrade --quiet langchain-openai langchain-community\n",
"%pip install gpudb==7.2.0.9\n",
"%pip install gpudb>=7.2.2.0 \n",
"%pip install --upgrade --quiet tiktoken"
]
},
@ -74,7 +52,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -87,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -96,7 +74,7 @@
"False"
]
},
"execution_count": 25,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -110,38 +88,30 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import (\n",
" DistanceStrategy,\n",
" Kinetica,\n",
" KineticaSettings,\n",
")\n",
"from langchain_core.documents import Document\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
"from langchain_openai import OpenAIEmbeddings"
]
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embeddings = OpenAIEmbeddings()"
"embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -157,6 +127,81 @@
" return KineticaSettings(host=HOST, username=USERNAME, password=PASSWORD)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from uuid import uuid4\n",
"\n",
"from langchain_core.documents import Document\n",
"\n",
"document_1 = Document(\n",
" page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"document_2 = Document(\n",
" page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n",
" metadata={\"source\": \"news\"},\n",
")\n",
"\n",
"document_3 = Document(\n",
" page_content=\"Building an exciting new project with LangChain - come check it out!\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"document_4 = Document(\n",
" page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n",
" metadata={\"source\": \"news\"},\n",
")\n",
"\n",
"document_5 = Document(\n",
" page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"document_6 = Document(\n",
" page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n",
" metadata={\"source\": \"website\"},\n",
")\n",
"\n",
"document_7 = Document(\n",
" page_content=\"The top 10 soccer players in the world right now.\",\n",
" metadata={\"source\": \"website\"},\n",
")\n",
"\n",
"document_8 = Document(\n",
" page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"document_9 = Document(\n",
" page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n",
" metadata={\"source\": \"news\"},\n",
")\n",
"\n",
"document_10 = Document(\n",
" page_content=\"I have a bad feeling I am going to get deleted :(\",\n",
" metadata={\"source\": \"tweet\"},\n",
")\n",
"\n",
"documents = [\n",
" document_1,\n",
" document_2,\n",
" document_3,\n",
" document_4,\n",
" document_5,\n",
" document_6,\n",
" document_7,\n",
" document_8,\n",
" document_9,\n",
" document_10,\n",
"]\n",
"uuids = [str(uuid4()) for _ in range(len(documents))]"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -166,207 +211,92 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 8,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"['05e5a484-0273-49d1-90eb-1276baca31de',\n",
" 'd98b808f-dc0b-4328-bdbf-88f6b2ab6040',\n",
" 'ba0968d4-e344-4285-ae0f-f5199b56f9d6',\n",
" 'a25393b8-6539-45b5-993e-ea16d01941ec',\n",
" '804a37e3-1278-4b60-8b02-36b159ee8c1a',\n",
" '9688b594-3dc6-41d2-a937-babf8ff24c2f',\n",
" '40f7b8fe-67c7-489a-a5a5-7d3965e33bba',\n",
" 'b4fc1376-c113-41e9-8f16-f9320517bedd',\n",
" '4d94d089-fdde-442b-84ab-36d9fe0670c8',\n",
" '66fdb79d-49ce-4b06-901a-fda6271baf2a']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The Kinetica Module will try to create a table with the name of the collection.\n",
"# So, make sure that the collection name is unique and the user has the permission to create a table.\n",
"\n",
"COLLECTION_NAME = \"state_of_the_union_test\"\n",
"COLLECTION_NAME = \"langchain_example\"\n",
"connection = create_config()\n",
"\n",
"db = Kinetica.from_documents(\n",
" embedding=embeddings,\n",
" documents=docs,\n",
"db = Kinetica(\n",
" connection,\n",
" embeddings,\n",
" collection_name=COLLECTION_NAME,\n",
" config=connection,\n",
")"
")\n",
"\n",
"db.add_documents(documents=documents, ids=uuids)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs_with_score = db.similarity_search_with_score(query)"
"# query = \"What did the president say about Ketanji Brown Jackson\"\n",
"# docs_with_score = db.similarity_search_with_score(query)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--------------------------------------------------------------------------------\n",
"Score: 0.6077010035514832\n",
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"Similarity Search\n",
"* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n",
"* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6077010035514832\n",
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6596046090126038\n",
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
"\n",
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
"\n",
"We can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \n",
"\n",
"Weve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n",
"\n",
"Were putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
"\n",
"Were securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6597143411636353\n",
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
"\n",
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
"\n",
"We can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \n",
"\n",
"Weve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n",
"\n",
"Were putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
"\n",
"Were securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
"--------------------------------------------------------------------------------\n"
"Similarity search with score\n",
"* [SIM=0.945397] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n"
]
}
],
"source": [
"for doc, score in docs_with_score:\n",
" print(\"-\" * 80)\n",
" print(\"Score: \", score)\n",
" print(doc.page_content)\n",
" print(\"-\" * 80)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Maximal Marginal Relevance Search (MMR)\n",
"Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents."
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"docs_with_score = db.max_marginal_relevance_search_with_score(query)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--------------------------------------------------------------------------------\n",
"Score: 0.6077010035514832\n",
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6852865219116211\n",
"It is going to transform America and put us on a path to win the economic competition of the 21st Century that we face with the rest of the world—particularly with China. \n",
"\n",
"As Ive told Xi Jinping, it is never a good bet to bet against the American people. \n",
"\n",
"Well create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \n",
"\n",
"And well do it all to withstand the devastating effects of the climate crisis and promote environmental justice. \n",
"\n",
"Well build a national network of 500,000 electric vehicle charging stations, begin to replace poisonous lead pipes—so every child—and every American—has clean water to drink at home and at school, provide affordable high-speed internet for every American—urban, suburban, rural, and tribal communities. \n",
"\n",
"4,000 projects have already been announced. \n",
"\n",
"And tonight, Im announcing that this year we will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6866700053215027\n",
"We cant change how divided weve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n",
"\n",
"I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n",
"\n",
"They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n",
"\n",
"Officer Mora was 27 years old. \n",
"\n",
"Officer Rivera was 22. \n",
"\n",
"Both Dominican Americans whod grown up on the same streets they later chose to patrol as police officers. \n",
"\n",
"I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n",
"\n",
"Ive worked on these issues a long time. \n",
"\n",
"I know what works: Investing in crime prevention and community police officers wholl walk the beat, wholl know the neighborhood, and who can restore trust and safety.\n",
"--------------------------------------------------------------------------------\n",
"--------------------------------------------------------------------------------\n",
"Score: 0.6936529278755188\n",
"But cancer from prolonged exposure to burn pits ravaged Heaths lungs and body. \n",
"\n",
"Danielle says Heath was a fighter to the very end. \n",
"\n",
"He didnt know how to stop fighting, and neither did she. \n",
"\n",
"Through her pain she found purpose to demand we do better. \n",
"\n",
"Tonight, Danielle—we are. \n",
"\n",
"The VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits. \n",
"\n",
"And tonight, Im announcing were expanding eligibility to veterans suffering from nine respiratory cancers. \n",
"\n",
"Im also calling on Congress: pass a law to make sure veterans devastated by toxic exposures in Iraq and Afghanistan finally get the benefits and comprehensive health care they deserve. \n",
"\n",
"And fourth, lets end cancer as we know it. \n",
"\n",
"This is personal to me and Jill, to Kamala, and to so many of you. \n",
"\n",
"Cancer is the #2 cause of death in Americasecond only to heart disease.\n",
"--------------------------------------------------------------------------------\n"
]
}
],
"source": [
"for doc, score in docs_with_score:\n",
" print(\"-\" * 80)\n",
" print(\"Score: \", score)\n",
" print(doc.page_content)\n",
" print(\"-\" * 80)"
"print()\n",
"print(\"Similarity Search\")\n",
"results = db.similarity_search(\n",
" \"LangChain provides abstractions to make working with LLMs easy\",\n",
" k=2,\n",
" filter={\"source\": \"tweet\"},\n",
")\n",
"for res in results:\n",
" print(f\"* {res.page_content} [{res.metadata}]\")\n",
"\n",
"print()\n",
"print(\"Similarity search with score\")\n",
"results = db.similarity_search_with_score(\n",
" \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n",
")\n",
"for res, score in results:\n",
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
]
},
{
@ -381,7 +311,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@ -402,16 +332,16 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['b94dc67c-ce7e-11ee-b8cb-b940b0e45762']"
"['68c4c679-c4d9-4f2d-bf01-f6c4f2181503']"
]
},
"execution_count": 35,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@ -422,7 +352,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@ -431,16 +361,16 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Document(page_content='foo'), 0.0)"
"(Document(metadata={}, page_content='foo'), 0.0015394920483231544)"
]
},
"execution_count": 37,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@ -451,17 +381,17 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
" 0.6946534514427185)"
"(Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),\n",
" 1.2609431743621826)"
]
},
"execution_count": 38,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@ -481,12 +411,12 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"db = Kinetica.from_documents(\n",
" documents=docs,\n",
" documents=documents,\n",
" embedding=embeddings,\n",
" collection_name=COLLECTION_NAME,\n",
" config=connection,\n",
@ -496,7 +426,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@ -505,17 +435,17 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
" 0.6946534514427185)"
"(Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),\n",
" 1.260920763015747)"
]
},
"execution_count": 41,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@ -533,7 +463,7 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
@ -542,14 +472,14 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tags=['Kinetica', 'OpenAIEmbeddings'] vectorstore=<langchain_community.vectorstores.kinetica.Kinetica object at 0x7f1644375e20>\n"
"tags=['Kinetica', 'OpenAIEmbeddings'] vectorstore=<langchain_community.vectorstores.kinetica.Kinetica object at 0x7a48142b2230> search_kwargs={}\n"
]
}
],
@ -574,7 +504,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -93,7 +93,7 @@ class Kinetica(VectorStore):
To use, you should have the ``gpudb`` python package installed.
Args:
kinetica_settings: Kinetica connection settings class.
config: Kinetica connection settings class.
embedding_function: Any embedding function implementing
`langchain.embeddings.base.Embeddings` interface.
collection_name: The name of the collection to use. (default: langchain)
@ -170,7 +170,7 @@ class Kinetica(VectorStore):
except ImportError:
raise ImportError(
"Could not import Kinetica python API. "
"Please install it with `pip install gpudb==7.2.0.9`."
"Please install it with `pip install gpudb>=7.2.2.0`."
)
self.dimensions = dimensions
@ -199,7 +199,7 @@ class Kinetica(VectorStore):
except ImportError:
raise ImportError(
"Could not import Kinetica python API. "
"Please install it with `pip install gpudb==7.2.0.9`."
"Please install it with `pip install gpudb>=7.2.2.0`."
)
options = GPUdb.Options()
@ -290,7 +290,7 @@ class Kinetica(VectorStore):
except ImportError:
raise ImportError(
"Could not import Kinetica python API. "
"Please install it with `pip install gpudb==7.2.0.9`."
"Please install it with `pip install gpudb>=7.2.2.0`."
)
return GPUdbTable(
_type=self.table_schema,
@ -428,7 +428,7 @@ class Kinetica(VectorStore):
k: int = 4,
filter: Optional[dict] = None,
) -> List[Tuple[Document, float]]:
from gpudb import GPUdbException
# from gpudb import GPUdbException
resp: Dict = self.__query_collection(embedding, k, filter)
if resp and resp["status_info"]["status"] == "OK" and "records" in resp:
@ -436,9 +436,10 @@ class Kinetica(VectorStore):
results = list(zip(*list(records.values())))
return self._results_to_docs_and_scores(results)
else:
self.logger.error(resp["status_info"]["message"])
raise GPUdbException(resp["status_info"]["message"])
self.logger.error(resp["status_info"]["message"])
# raise GPUdbException(resp["status_info"]["message"])
return []
def similarity_search_by_vector(
self,
@ -464,16 +465,20 @@ class Kinetica(VectorStore):
def _results_to_docs_and_scores(self, results: Any) -> List[Tuple[Document, float]]:
"""Return docs and scores from results."""
docs = [
(
Document(
page_content=result[0],
metadata=json.loads(result[1]),
),
result[2] if self.embedding_function is not None else None,
)
for result in results
]
docs = (
[
(
Document(
page_content=result[0],
metadata=json.loads(result[1]),
),
result[2] if self.embedding_function is not None else None,
)
for result in results
]
if len(results) > 0
else []
)
return docs
def _select_relevance_score_fn(self) -> Callable[[float], float]: