Updated QA notebook (#6801)

Description: `all_metadatas` was not defined, `OpenAIEmbeddings` was not imported, Issue: #6723 the issue # it fixes (if applicable), Dependencies: lark, Tag maintainer: @vowelparrot , @dev2049 --------- Co-authored-by: rlm <pexpresss31@gmail.com>
1 year ago · 5861770a53
parent 140ba682f1
commit 5861770a53
1 changed files with 43 additions and 5 deletions
--- a/docs/extras/use_cases/question_answering/document-context-aware-QA.ipynb
+++ b/docs/extras/use_cases/question_answering/document-context-aware-QA.ipynb
@ -30,14 +30,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
   "id": "2e587f65",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Notion page as a markdownfile file\n",
    "from langchain.document_loaders import NotionDirectoryLoader\n",
-    "path='.../Notion_Folder_With_Markdown_File'\n",
+    "path='../Notion_DB/'\n",
    "loader = NotionDirectoryLoader(path)\n",
    "docs = loader.load()\n",
    "md_file=docs[0].page_content"
@ -45,7 +45,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
   "id": "1cd3fd7e",
   "metadata": {},
   "outputs": [],
@ -69,7 +69,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 26,
   "id": "7fbff95f",
   "metadata": {},
   "outputs": [],
@ -110,8 +110,10 @@
   "outputs": [],
   "source": [
    "# Build vectorstore and keep the metadata\n",
+    "from langchain.embeddings import OpenAIEmbeddings\n",
    "from langchain.vectorstores import Chroma\n",
-    "vectorstore = Chroma.from_documents(texts=all_splits,metadatas=all_metadatas,embedding=OpenAIEmbeddings())"
+    "vectorstore = Chroma.from_documents(documents=all_splits,\n",
+    "                                    embedding=OpenAIEmbeddings())"
   ]
  },
  {
@ -157,6 +159,37 @@
    "We can see that we can query *only for texts* in the `Introduction` of the document!"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "d688db6e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "query='Introduction' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='Section', value='Introduction') limit=None\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='![Untitled](Auto-Evaluation%20of%20Metadata%20Filtering%2018502448c85240828f33716740f9574b/Untitled.png)', metadata={'Section': 'Introduction'}),\n",
+       " Document(page_content='Q+A systems often use a two-step approach: retrieve relevant text chunks and then synthesize them into an answer. There many ways to approach this. For example, we recently [discussed](https://blog.langchain.dev/auto-evaluation-of-anthropic-100k-context-window/) the Retriever-Less option (at bottom in the below diagram), highlighting the Anthropic 100k context window model. Metadata filtering is an alternative approach that pre-filters chunks based on a user-defined criteria in a VectorDB using', metadata={'Section': 'Introduction'}),\n",
+       " Document(page_content='metadata tags prior to semantic search.', metadata={'Section': 'Introduction'})]"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Test\n",
+    "retriever.get_relevant_documents(\"Summarize the Introduction section of the document\")"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 29,
@ -287,6 +320,11 @@
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+   }
  }
 },
 "nbformat": 4,