Updated QA notebook (#6801)

Description: `all_metadatas` was not defined, `OpenAIEmbeddings` was not
imported,
Issue: #6723 the issue # it fixes (if applicable),
Dependencies: lark,
Tag maintainer: @vowelparrot , @dev2049

---------

Co-authored-by: rlm <pexpresss31@gmail.com>
pull/6477/head
Hashem Alsaket 1 year ago committed by GitHub
parent 140ba682f1
commit 5861770a53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -30,14 +30,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "2e587f65",
"metadata": {},
"outputs": [],
"source": [
"# Load Notion page as a markdownfile file\n",
"from langchain.document_loaders import NotionDirectoryLoader\n",
"path='.../Notion_Folder_With_Markdown_File'\n",
"path='../Notion_DB/'\n",
"loader = NotionDirectoryLoader(path)\n",
"docs = loader.load()\n",
"md_file=docs[0].page_content"
@ -45,7 +45,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "1cd3fd7e",
"metadata": {},
"outputs": [],
@ -69,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 26,
"id": "7fbff95f",
"metadata": {},
"outputs": [],
@ -110,8 +110,10 @@
"outputs": [],
"source": [
"# Build vectorstore and keep the metadata\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.vectorstores import Chroma\n",
"vectorstore = Chroma.from_documents(texts=all_splits,metadatas=all_metadatas,embedding=OpenAIEmbeddings())"
"vectorstore = Chroma.from_documents(documents=all_splits,\n",
" embedding=OpenAIEmbeddings())"
]
},
{
@ -157,6 +159,37 @@
"We can see that we can query *only for texts* in the `Introduction` of the document!"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "d688db6e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"query='Introduction' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='Section', value='Introduction') limit=None\n"
]
},
{
"data": {
"text/plain": [
"[Document(page_content='![Untitled](Auto-Evaluation%20of%20Metadata%20Filtering%2018502448c85240828f33716740f9574b/Untitled.png)', metadata={'Section': 'Introduction'}),\n",
" Document(page_content='Q+A systems often use a two-step approach: retrieve relevant text chunks and then synthesize them into an answer. There many ways to approach this. For example, we recently [discussed](https://blog.langchain.dev/auto-evaluation-of-anthropic-100k-context-window/) the Retriever-Less option (at bottom in the below diagram), highlighting the Anthropic 100k context window model. Metadata filtering is an alternative approach that pre-filters chunks based on a user-defined criteria in a VectorDB using', metadata={'Section': 'Introduction'}),\n",
" Document(page_content='metadata tags prior to semantic search.', metadata={'Section': 'Introduction'})]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Test\n",
"retriever.get_relevant_documents(\"Summarize the Introduction section of the document\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
@ -287,6 +320,11 @@
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
},
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,

Loading…
Cancel
Save