Harrison/condense q llm (#5438)

12 months ago · c4b502a470
parent ee57054d05
commit c4b502a470
2 changed files with 63 additions and 2 deletions
--- a/docs/modules/chains/index_examples/chat_vector_db.ipynb
+++ b/docs/modules/chains/index_examples/chat_vector_db.ipynb
@ -113,7 +113,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 5,
   "id": "af803fee",
   "metadata": {},
   "outputs": [],
@ -316,6 +316,64 @@
    "result['answer']"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "11a76453",
+   "metadata": {},
+   "source": [
+    "## Using a different model for condensing the question\n",
+    "\n",
+    "This chain has two steps. First, it condenses the current question and the chat history into a standalone question. This is neccessary to create a standanlone vector to use for retrieval. After that, it does retrieval and then answers the question using retrieval augmented generation with a separate model. Part of the power of the declarative nature of LangChain is that you can easily use a separate language model for each call. This can be useful to use a cheaper and faster model for the simpler task of condensing the question, and then a more expensive model for answering the question. Here is an example of doing so."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "8d4ede9e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "04a23e23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qa = ConversationalRetrievalChain.from_llm(\n",
+    "    ChatOpenAI(temperature=0, model=\"gpt-4\"),\n",
+    "    vectorstore.as_retriever(),\n",
+    "    condense_question_llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo'),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "b1223752",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat_history = []\n",
+    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
+    "result = qa({\"question\": query, \"chat_history\": chat_history})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cdce4e28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat_history = [(query, result[\"answer\"])]\n",
+    "query = \"Did he mention who she suceeded\"\n",
+    "result = qa({\"question\": query, \"chat_history\": chat_history})"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "0eaadf0f",
--- a/langchain/chains/conversational_retrieval/base.py
+++ b/langchain/chains/conversational_retrieval/base.py
@ -195,6 +195,7 @@ class ConversationalRetrievalChain(BaseConversationalRetrievalChain):
        condense_question_prompt: BasePromptTemplate = CONDENSE_QUESTION_PROMPT,
        chain_type: str = "stuff",
        verbose: bool = False,
+        condense_question_llm: Optional[BaseLanguageModel] = None,
        combine_docs_chain_kwargs: Optional[Dict] = None,
        **kwargs: Any,
    ) -> BaseConversationalRetrievalChain:
@ -206,8 +207,10 @@ class ConversationalRetrievalChain(BaseConversationalRetrievalChain):
            verbose=verbose,
            **combine_docs_chain_kwargs,
        )
+
+        _llm = condense_question_llm or llm
        condense_question_chain = LLMChain(
-            llm=llm, prompt=condense_question_prompt, verbose=verbose
+            llm=_llm, prompt=condense_question_prompt, verbose=verbose
        )
        return cls(
            retriever=retriever,