Minor updates to notebook for MultiQueryRetriever (#7102)

* Add an easier-to-run example.
* Add logging per https://github.com/hwchase17/langchain/pull/6891.
* Updated params per https://github.com/hwchase17/langchain/pull/5962.

---------

Co-authored-by: R. Lance Martin <rlm@Rs-MacBook-Pro.local>
Co-authored-by: Lance Martin <lance@langchain.dev>
pull/7125/head
Lance Martin 1 year ago committed by GitHub
parent dfa48dc3b5
commit 9ca4c54428
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -14,31 +14,24 @@
},
{
"cell_type": "code",
"execution_count": 1,
"id": "c2f3f5f2",
"execution_count": 2,
"id": "994d6c74",
"metadata": {},
"outputs": [],
"source": [
"# Build a sample vectorDB\n",
"from langchain.vectorstores import Chroma\n",
"from langchain.document_loaders import PyPDFLoader\n",
"from langchain.document_loaders import WebBaseLoader\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"\n",
"# Load PDF\n",
"path=\"path-to-files\"\n",
"loaders = [\n",
" PyPDFLoader(path+\"docs/cs229_lectures/MachineLearning-Lecture01.pdf\"),\n",
" PyPDFLoader(path+\"docs/cs229_lectures/MachineLearning-Lecture02.pdf\"),\n",
" PyPDFLoader(path+\"docs/cs229_lectures/MachineLearning-Lecture03.pdf\")\n",
"]\n",
"docs = []\n",
"for loader in loaders:\n",
" docs.extend(loader.load())\n",
"# Load blog post\n",
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
"data = loader.load()\n",
" \n",
"# Split\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500,chunk_overlap = 150)\n",
"splits = text_splitter.split_documents(docs)\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)\n",
"splits = text_splitter.split_documents(data)\n",
"\n",
"# VectorDB\n",
"embedding = OpenAIEmbeddings()\n",
@ -64,8 +57,7 @@
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.retrievers.multi_query import MultiQueryRetriever\n",
"question=\"What does the course say about regression?\"\n",
"num_queries=3\n",
"question=\"What are the approaches to Task Decomposition?\"\n",
"llm = ChatOpenAI(temperature=0)\n",
"retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectordb.as_retriever(),llm=llm)"
]
@ -73,6 +65,19 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "9e6d3b69",
"metadata": {},
"outputs": [],
"source": [
"# Set logging for the queries\n",
"import logging\n",
"logging.basicConfig()\n",
"logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e5203612",
"metadata": {},
"outputs": [
@ -80,22 +85,22 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:Generated queries: [\"1. What is the course's perspective on regression?\", '2. How does the course discuss regression?', '3. What information does the course provide about regression?']\n"
"INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be approached?', '2. What are the different methods for Task Decomposition?', '3. What are the various approaches to decomposing tasks?']\n"
]
},
{
"data": {
"text/plain": [
"6"
"5"
]
},
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"unique_docs = retriever_from_llm.get_relevant_documents(question=\"What does the course say about regression?\")\n",
"unique_docs = retriever_from_llm.get_relevant_documents(query=question)\n",
"len(unique_docs)"
]
},
@ -111,7 +116,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "d9afb0ca",
"metadata": {},
"outputs": [],
@ -151,12 +156,12 @@
"llm_chain = LLMChain(llm=llm,prompt=QUERY_PROMPT,output_parser=output_parser)\n",
" \n",
"# Other inputs\n",
"question=\"What does the course say about regression?\""
"question=\"What are the approaches to Task Decomposition?\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "6660d7ee",
"metadata": {},
"outputs": [
@ -164,16 +169,16 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:Generated queries: [\"1. What is the course's perspective on regression?\", '2. Can you provide information on regression as discussed in the course?', '3. How does the course cover the topic of regression?', \"4. What are the course's teachings on regression?\", '5. In relation to the course, what is mentioned about regression?']\n"
"INFO:langchain.retrievers.multi_query:Generated queries: [\"1. What is the course's perspective on regression?\", '2. Can you provide information on regression as discussed in the course?', '3. How does the course cover the topic of regression?', \"4. What are the course's teachings on regression?\", '5. In relation to the course, what is mentioned about regression?']\n"
]
},
{
"data": {
"text/plain": [
"8"
"11"
]
},
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@ -185,7 +190,7 @@
" parser_key=\"lines\") # \"lines\" is the key (attribute name) of the parsed output\n",
"\n",
"# Results\n",
"unique_docs = retriever.get_relevant_documents(question=\"What does the course say about regression?\")\n",
"unique_docs = retriever.get_relevant_documents(query=\"What does the course say about regression?\")\n",
"len(unique_docs)"
]
}

Loading…
Cancel
Save