From c4b502a47051f50c6e24b824d3db622748458d13 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 30 May 2023 07:15:37 -0700 Subject: [PATCH] Harrison/condense q llm (#5438) --- .../index_examples/chat_vector_db.ipynb | 60 ++++++++++++++++++- .../chains/conversational_retrieval/base.py | 5 +- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/docs/modules/chains/index_examples/chat_vector_db.ipynb b/docs/modules/chains/index_examples/chat_vector_db.ipynb index de329013..c78804ad 100644 --- a/docs/modules/chains/index_examples/chat_vector_db.ipynb +++ b/docs/modules/chains/index_examples/chat_vector_db.ipynb @@ -113,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "id": "af803fee", "metadata": {}, "outputs": [], @@ -316,6 +316,64 @@ "result['answer']" ] }, + { + "cell_type": "markdown", + "id": "11a76453", + "metadata": {}, + "source": [ + "## Using a different model for condensing the question\n", + "\n", + "This chain has two steps. First, it condenses the current question and the chat history into a standalone question. This is neccessary to create a standanlone vector to use for retrieval. After that, it does retrieval and then answers the question using retrieval augmented generation with a separate model. Part of the power of the declarative nature of LangChain is that you can easily use a separate language model for each call. This can be useful to use a cheaper and faster model for the simpler task of condensing the question, and then a more expensive model for answering the question. Here is an example of doing so." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8d4ede9e", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "04a23e23", + "metadata": {}, + "outputs": [], + "source": [ + "qa = ConversationalRetrievalChain.from_llm(\n", + " ChatOpenAI(temperature=0, model=\"gpt-4\"),\n", + " vectorstore.as_retriever(),\n", + " condense_question_llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo'),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b1223752", + "metadata": {}, + "outputs": [], + "source": [ + "chat_history = []\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "result = qa({\"question\": query, \"chat_history\": chat_history})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cdce4e28", + "metadata": {}, + "outputs": [], + "source": [ + "chat_history = [(query, result[\"answer\"])]\n", + "query = \"Did he mention who she suceeded\"\n", + "result = qa({\"question\": query, \"chat_history\": chat_history})" + ] + }, { "cell_type": "markdown", "id": "0eaadf0f", diff --git a/langchain/chains/conversational_retrieval/base.py b/langchain/chains/conversational_retrieval/base.py index ce7e7115..1832585c 100644 --- a/langchain/chains/conversational_retrieval/base.py +++ b/langchain/chains/conversational_retrieval/base.py @@ -195,6 +195,7 @@ class ConversationalRetrievalChain(BaseConversationalRetrievalChain): condense_question_prompt: BasePromptTemplate = CONDENSE_QUESTION_PROMPT, chain_type: str = "stuff", verbose: bool = False, + condense_question_llm: Optional[BaseLanguageModel] = None, combine_docs_chain_kwargs: Optional[Dict] = None, **kwargs: Any, ) -> BaseConversationalRetrievalChain: @@ -206,8 +207,10 @@ class ConversationalRetrievalChain(BaseConversationalRetrievalChain): verbose=verbose, **combine_docs_chain_kwargs, ) + + _llm = condense_question_llm or llm condense_question_chain = LLMChain( - llm=llm, prompt=condense_question_prompt, verbose=verbose + llm=_llm, prompt=condense_question_prompt, verbose=verbose ) return cls( retriever=retriever,