mirror of https://github.com/hwchase17/langchain
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
719 lines
19 KiB
Plaintext
719 lines
19 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# SAP HANA Cloud Vector Engine\n",
|
|
"\n",
|
|
">[SAP HANA Cloud Vector Engine](https://www.sap.com/events/teched/news-guide/ai.html#article8) is a vector store fully integrated into the `SAP HANA Cloud` database."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Setting up\n",
|
|
"\n",
|
|
"Installation of the HANA database driver."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Pip install necessary package\n",
|
|
"%pip install --upgrade --quiet hdbcli"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"To use `OpenAIEmbeddings` we use the OpenAI API Key."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2023-09-09T08:02:16.802456Z",
|
|
"start_time": "2023-09-09T08:02:07.065604Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"# Use OPENAI_API_KEY env variable\n",
|
|
"# os.environ[\"OPENAI_API_KEY\"] = \"Your OpenAI API key\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Create a database connection to a HANA Cloud instance"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2023-09-09T08:02:28.174088Z",
|
|
"start_time": "2023-09-09T08:02:28.162698Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from hdbcli import dbapi\n",
|
|
"\n",
|
|
"# Use connection settings from the environment\n",
|
|
"connection = dbapi.connect(\n",
|
|
" address=os.environ.get(\"HANA_DB_ADDRESS\"),\n",
|
|
" port=os.environ.get(\"HANA_DB_PORT\"),\n",
|
|
" user=os.environ.get(\"HANA_DB_USER\"),\n",
|
|
" password=os.environ.get(\"HANA_DB_PASSWORD\"),\n",
|
|
" autocommit=True,\n",
|
|
" sslValidateCertificate=False,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Example"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Load the sample document \"state_of_the_union.txt\" and create chunks from it."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2023-09-09T08:02:25.452472Z",
|
|
"start_time": "2023-09-09T08:02:25.441563Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.docstore.document import Document\n",
|
|
"from langchain_community.document_loaders import TextLoader\n",
|
|
"from langchain_community.vectorstores.hanavector import HanaDB\n",
|
|
"from langchain_openai import OpenAIEmbeddings\n",
|
|
"from langchain_text_splitters import CharacterTextSplitter\n",
|
|
"\n",
|
|
"text_documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
|
|
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
|
"text_chunks = text_splitter.split_documents(text_documents)\n",
|
|
"print(f\"Number of document chunks: {len(text_chunks)}\")\n",
|
|
"\n",
|
|
"embeddings = OpenAIEmbeddings()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Create a LangChain VectorStore interface for the HANA database and specify the table (collection) to use for accessing the vector embeddings"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2023-09-09T08:04:16.696625Z",
|
|
"start_time": "2023-09-09T08:02:31.817790Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"db = HanaDB(\n",
|
|
" embedding=embeddings, connection=connection, table_name=\"STATE_OF_THE_UNION\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Add the loaded document chunks to the table. For this example, we delete any previous content from the table which might exist from previous runs."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Delete already existing documents from the table\n",
|
|
"db.delete(filter={})\n",
|
|
"\n",
|
|
"# add the loaded document chunks\n",
|
|
"db.add_documents(text_chunks)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Perform a query to get the two best-matching document chunks from the ones that we added in the previous step.\n",
|
|
"By default \"Cosine Similarity\" is used for the search."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
|
"docs = db.similarity_search(query, k=2)\n",
|
|
"\n",
|
|
"for doc in docs:\n",
|
|
" print(\"-\" * 80)\n",
|
|
" print(doc.page_content)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Query the same content with \"Euclidian Distance\". The results shoud be the same as with \"Cosine Similarity\"."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain_community.vectorstores.utils import DistanceStrategy\n",
|
|
"\n",
|
|
"db = HanaDB(\n",
|
|
" embedding=embeddings,\n",
|
|
" connection=connection,\n",
|
|
" distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,\n",
|
|
" table_name=\"STATE_OF_THE_UNION\",\n",
|
|
")\n",
|
|
"\n",
|
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
|
"docs = db.similarity_search(query, k=2)\n",
|
|
"for doc in docs:\n",
|
|
" print(\"-\" * 80)\n",
|
|
" print(doc.page_content)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"collapsed": false,
|
|
"jupyter": {
|
|
"outputs_hidden": false
|
|
}
|
|
},
|
|
"source": [
|
|
"## Maximal Marginal Relevance Search (MMR)\n",
|
|
"\n",
|
|
"`Maximal marginal relevance` optimizes for similarity to query AND diversity among selected documents. The first 20 (fetch_k) items will be retrieved from the DB. The MMR algorithm will then find the best 2 (k) matches."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2023-09-09T08:05:23.276819Z",
|
|
"start_time": "2023-09-09T08:05:21.972256Z"
|
|
},
|
|
"collapsed": false,
|
|
"jupyter": {
|
|
"outputs_hidden": false
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"docs = db.max_marginal_relevance_search(query, k=2, fetch_k=20)\n",
|
|
"for doc in docs:\n",
|
|
" print(\"-\" * 80)\n",
|
|
" print(doc.page_content)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Basic Vectorstore Operations"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db = HanaDB(\n",
|
|
" connection=connection, embedding=embeddings, table_name=\"LANGCHAIN_DEMO_BASIC\"\n",
|
|
")\n",
|
|
"\n",
|
|
"# Delete already existing documents from the table\n",
|
|
"db.delete(filter={})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We can add simple text documents to the existing table."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"docs = [Document(page_content=\"Some text\"), Document(page_content=\"Other docs\")]\n",
|
|
"db.add_documents(docs)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Add documents with metadata."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"docs = [\n",
|
|
" Document(\n",
|
|
" page_content=\"foo\",\n",
|
|
" metadata={\"start\": 100, \"end\": 150, \"doc_name\": \"foo.txt\", \"quality\": \"bad\"},\n",
|
|
" ),\n",
|
|
" Document(\n",
|
|
" page_content=\"bar\",\n",
|
|
" metadata={\"start\": 200, \"end\": 250, \"doc_name\": \"bar.txt\", \"quality\": \"good\"},\n",
|
|
" ),\n",
|
|
"]\n",
|
|
"db.add_documents(docs)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Query documents with specific metadata."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"docs = db.similarity_search(\"foobar\", k=2, filter={\"quality\": \"bad\"})\n",
|
|
"# With filtering on \"quality\"==\"bad\", only one document should be returned\n",
|
|
"for doc in docs:\n",
|
|
" print(\"-\" * 80)\n",
|
|
" print(doc.page_content)\n",
|
|
" print(doc.metadata)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Delete documents with specific metadata."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db.delete(filter={\"quality\": \"bad\"})\n",
|
|
"\n",
|
|
"# Now the similarity search with the same filter will return no results\n",
|
|
"docs = db.similarity_search(\"foobar\", k=2, filter={\"quality\": \"bad\"})\n",
|
|
"print(len(docs))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Using a VectorStore as a retriever in chains for retrieval augmented generation (RAG)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.memory import ConversationBufferMemory\n",
|
|
"from langchain_openai import ChatOpenAI\n",
|
|
"\n",
|
|
"# Access the vector DB with a new table\n",
|
|
"db = HanaDB(\n",
|
|
" connection=connection,\n",
|
|
" embedding=embeddings,\n",
|
|
" table_name=\"LANGCHAIN_DEMO_RETRIEVAL_CHAIN\",\n",
|
|
")\n",
|
|
"\n",
|
|
"# Delete already existing entries from the table\n",
|
|
"db.delete(filter={})\n",
|
|
"\n",
|
|
"# add the loaded document chunks from the \"State Of The Union\" file\n",
|
|
"db.add_documents(text_chunks)\n",
|
|
"\n",
|
|
"# Create a retriever instance of the vector store\n",
|
|
"retriever = db.as_retriever()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Define the prompt."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.prompts import PromptTemplate\n",
|
|
"\n",
|
|
"prompt_template = \"\"\"\n",
|
|
"You are an expert in state of the union topics. You are provided multiple context items that are related to the prompt you have to answer.\n",
|
|
"Use the following pieces of context to answer the question at the end.\n",
|
|
"\n",
|
|
"```\n",
|
|
"{context}\n",
|
|
"```\n",
|
|
"\n",
|
|
"Question: {question}\n",
|
|
"\"\"\"\n",
|
|
"\n",
|
|
"PROMPT = PromptTemplate(\n",
|
|
" template=prompt_template, input_variables=[\"context\", \"question\"]\n",
|
|
")\n",
|
|
"chain_type_kwargs = {\"prompt\": PROMPT}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Create the ConversationalRetrievalChain, which handles the chat history and the retrieval of similar document chunks to be added to the prompt."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.chains import ConversationalRetrievalChain\n",
|
|
"\n",
|
|
"llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n",
|
|
"memory = ConversationBufferMemory(\n",
|
|
" memory_key=\"chat_history\", output_key=\"answer\", return_messages=True\n",
|
|
")\n",
|
|
"qa_chain = ConversationalRetrievalChain.from_llm(\n",
|
|
" llm,\n",
|
|
" db.as_retriever(search_kwargs={\"k\": 5}),\n",
|
|
" return_source_documents=True,\n",
|
|
" memory=memory,\n",
|
|
" verbose=False,\n",
|
|
" combine_docs_chain_kwargs={\"prompt\": PROMPT},\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Ask the first question (and verify how many text chunks have been used)."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"question = \"What about Mexico and Guatemala?\"\n",
|
|
"\n",
|
|
"result = qa_chain.invoke({\"question\": question})\n",
|
|
"print(\"Answer from LLM:\")\n",
|
|
"print(\"================\")\n",
|
|
"print(result[\"answer\"])\n",
|
|
"\n",
|
|
"source_docs = result[\"source_documents\"]\n",
|
|
"print(\"================\")\n",
|
|
"print(f\"Number of used source document chunks: {len(source_docs)}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Examine the used chunks of the chain in detail. Check if the best ranked chunk contains info about \"Mexico and Guatemala\" as mentioned in the question."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for doc in source_docs:\n",
|
|
" print(\"-\" * 80)\n",
|
|
" print(doc.page_content)\n",
|
|
" print(doc.metadata)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Ask another question on the same conversational chain. The answer should relate to the previous answer given."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"question = \"What about other countries?\"\n",
|
|
"\n",
|
|
"result = qa_chain.invoke({\"question\": question})\n",
|
|
"print(\"Answer from LLM:\")\n",
|
|
"print(\"================\")\n",
|
|
"print(result[\"answer\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Standard tables vs. \"custom\" tables with vector data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"As default behaviour, the table for the embeddings is created with 3 columns\n",
|
|
"* A column `VEC_TEXT`, which contains the text of the Document\n",
|
|
"* A column `VEC_METADATA`, which contains the metadata of the Document\n",
|
|
"* A column `VEC_VECTOR`, which contains the embeddings-vector of the document's text"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Access the vector DB with a new table\n",
|
|
"db = HanaDB(\n",
|
|
" connection=connection, embedding=embeddings, table_name=\"LANGCHAIN_DEMO_NEW_TABLE\"\n",
|
|
")\n",
|
|
"\n",
|
|
"# Delete already existing entries from the table\n",
|
|
"db.delete(filter={})\n",
|
|
"\n",
|
|
"# Add a simple document with some metadata\n",
|
|
"docs = [\n",
|
|
" Document(\n",
|
|
" page_content=\"A simple document\",\n",
|
|
" metadata={\"start\": 100, \"end\": 150, \"doc_name\": \"simple.txt\"},\n",
|
|
" )\n",
|
|
"]\n",
|
|
"db.add_documents(docs)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Show the columns in table \"LANGCHAIN_DEMO_NEW_TABLE\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cur = connection.cursor()\n",
|
|
"cur.execute(\n",
|
|
" \"SELECT COLUMN_NAME, DATA_TYPE_NAME FROM SYS.TABLE_COLUMNS WHERE SCHEMA_NAME = CURRENT_SCHEMA AND TABLE_NAME = 'LANGCHAIN_DEMO_NEW_TABLE'\"\n",
|
|
")\n",
|
|
"rows = cur.fetchall()\n",
|
|
"for row in rows:\n",
|
|
" print(row)\n",
|
|
"cur.close()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Show the value of the inserted document in the three columns "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cur = connection.cursor()\n",
|
|
"cur.execute(\n",
|
|
" \"SELECT VEC_TEXT, VEC_META, TO_NVARCHAR(VEC_VECTOR) FROM LANGCHAIN_DEMO_NEW_TABLE LIMIT 1\"\n",
|
|
")\n",
|
|
"rows = cur.fetchall()\n",
|
|
"print(rows[0][0]) # The text\n",
|
|
"print(rows[0][1]) # The metadata\n",
|
|
"print(rows[0][2]) # The vector\n",
|
|
"cur.close()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Custom tables must have at least three columns that match the semantics of a standard table\n",
|
|
"* A column with type `NCLOB` or `NVARCHAR` for the text/context of the embeddings\n",
|
|
"* A column with type `NCLOB` or `NVARCHAR` for the metadata \n",
|
|
"* A column with type REAL_VECTOR for the embedding vector\n",
|
|
"\n",
|
|
"The table can contain additional columns. When new Documents are inserted into the table, these additional columns must allow NULL values."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create a new table \"MY_OWN_TABLE\" with three \"standard\" columns and one additional column\n",
|
|
"my_own_table_name = \"MY_OWN_TABLE\"\n",
|
|
"cur = connection.cursor()\n",
|
|
"cur.execute(\n",
|
|
" (\n",
|
|
" f\"CREATE TABLE {my_own_table_name} (\"\n",
|
|
" \"SOME_OTHER_COLUMN NVARCHAR(42), \"\n",
|
|
" \"MY_TEXT NVARCHAR(2048), \"\n",
|
|
" \"MY_METADATA NVARCHAR(1024), \"\n",
|
|
" \"MY_VECTOR REAL_VECTOR )\"\n",
|
|
" )\n",
|
|
")\n",
|
|
"\n",
|
|
"# Create a HanaDB instance with the own table\n",
|
|
"db = HanaDB(\n",
|
|
" connection=connection,\n",
|
|
" embedding=embeddings,\n",
|
|
" table_name=my_own_table_name,\n",
|
|
" content_column=\"MY_TEXT\",\n",
|
|
" metadata_column=\"MY_METADATA\",\n",
|
|
" vector_column=\"MY_VECTOR\",\n",
|
|
")\n",
|
|
"\n",
|
|
"# Add a simple document with some metadata\n",
|
|
"docs = [\n",
|
|
" Document(\n",
|
|
" page_content=\"Some other text\",\n",
|
|
" metadata={\"start\": 400, \"end\": 450, \"doc_name\": \"other.txt\"},\n",
|
|
" )\n",
|
|
"]\n",
|
|
"db.add_documents(docs)\n",
|
|
"\n",
|
|
"# Check if data has been inserted into our own table\n",
|
|
"cur.execute(f\"SELECT * FROM {my_own_table_name} LIMIT 1\")\n",
|
|
"rows = cur.fetchall()\n",
|
|
"print(rows[0][0]) # Value of column \"SOME_OTHER_DATA\". Should be NULL/None\n",
|
|
"print(rows[0][1]) # The text\n",
|
|
"print(rows[0][2]) # The metadata\n",
|
|
"print(rows[0][3]) # The vector\n",
|
|
"\n",
|
|
"cur.close()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Add another document and perform a similarity search on the custom table."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"docs = [\n",
|
|
" Document(\n",
|
|
" page_content=\"Some more text\",\n",
|
|
" metadata={\"start\": 800, \"end\": 950, \"doc_name\": \"more.txt\"},\n",
|
|
" )\n",
|
|
"]\n",
|
|
"db.add_documents(docs)\n",
|
|
"\n",
|
|
"query = \"What's up?\"\n",
|
|
"docs = db.similarity_search(query, k=2)\n",
|
|
"for doc in docs:\n",
|
|
" print(\"-\" * 80)\n",
|
|
" print(doc.page_content)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|