chat vector db chain (#902)

2024-11-06 03:20:49 +00:00 · 2023-02-05 21:38:47 -08:00 · 2023-02-05 21:38:47 -08:00 · 2a68be3e8d
commit 2a68be3e8d
parent 8217a2f26c
5 changed files with 273 additions and 0 deletions
--- a/docs/modules/chains/combine_docs_examples/chat_vector_db.ipynb
+++ b/docs/modules/chains/combine_docs_examples/chat_vector_db.ipynb
@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "134a0785",
+   "metadata": {},
+   "source": [
+    "# Chat Vector DB\n",
+    "\n",
+    "This notebook goes over how to set up a chain to chat with a vector database. The only difference because this chain and the [VectorDBQAChain](./vector_db_qa.ipynb) is that this allows for passing in of a chat history which can be used to allow for follow up questions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "70c4e529",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+    "from langchain.vectorstores.faiss import FAISS\n",
+    "from langchain.text_splitter import CharacterTextSplitter\n",
+    "from langchain.llms import OpenAI\n",
+    "from langchain.chains import ChatVectorDBChain"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a8930cf7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('../../state_of_the_union.txt') as f:\n",
+    "    state_of_the_union = f.read()\n",
+    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
+    "texts = text_splitter.split_text(state_of_the_union)\n",
+    "\n",
+    "embeddings = OpenAIEmbeddings()\n",
+    "vectorstore = FAISS.from_texts(texts, embeddings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "7b4110f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0), vectorstore)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3872432d",
+   "metadata": {},
+   "source": [
+    "Here's an example of asking a question with no chat history"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7fe3e730",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat_history = []\n",
+    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
+    "result = qa({\"question\": query, \"chat_history\": chat_history})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "bfff9cc8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\""
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result[\"answer\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e46edf7",
+   "metadata": {},
+   "source": [
+    "Here's an example of asking a question with some chat history"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "00b4cf00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat_history = [(query, result[\"answer\"])]\n",
+    "query = \"Did he mention who she suceeded\"\n",
+    "result = qa({\"question\": query, \"chat_history\": chat_history})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "f01828d1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "' Justice Stephen Breyer'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result['answer']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0f869c6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/langchain/chains/init.py
+++ b/langchain/chains/init.py
@ -1,5 +1,6 @@
 """Chains are easily reusable components which can be linked together."""
 from langchain.chains.api.base import APIChain
+from langchain.chains.chat_vector_db.base import ChatVectorDBChain
 from langchain.chains.conversation.base import ConversationChain
 from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
 from langchain.chains.llm import LLMChain
@ -42,4 +43,5 @@ __all__ = [
    "SQLDatabaseSequentialChain",
    "load_chain",
    "HypotheticalDocumentEmbedder",
+    "ChatVectorDBChain",
 ]
--- a/langchain/chains/chat_vector_db/init.py
+++ b/langchain/chains/chat_vector_db/init.py
@ -0,0 +1 @@
+"""Chain for chatting with a vector database."""
--- a/langchain/chains/chat_vector_db/base.py
+++ b/langchain/chains/chat_vector_db/base.py
@ -0,0 +1,85 @@
+"""Chain for chatting with a vector database."""
+from __future__ import annotations
+
+from typing import Any, Dict, List, Tuple
+
+from pydantic import BaseModel
+
+from langchain.chains.base import Chain
+from langchain.chains.chat_vector_db.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
+from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
+from langchain.chains.llm import LLMChain
+from langchain.chains.question_answering import load_qa_chain
+from langchain.llms.base import BaseLLM
+from langchain.prompts.base import BasePromptTemplate
+from langchain.vectorstores.base import VectorStore
+
+
+def _get_chat_history(chat_history: List[Tuple[str, str]]) -> str:
+    buffer = ""
+    for human_s, ai_s in chat_history:
+        human = "Human: " + human_s
+        ai = "Assistant: " + ai_s
+        buffer += "\n" + "\n".join([human, ai])
+    return buffer
+
+
+class ChatVectorDBChain(Chain, BaseModel):
+    """Chain for chatting with a vector database."""
+
+    vectorstore: VectorStore
+    combine_docs_chain: BaseCombineDocumentsChain
+    question_generator: LLMChain
+    output_key: str = "answer"
+
+    @property
+    def _chain_type(self) -> str:
+        return "chat-vector-db"
+
+    @property
+    def input_keys(self) -> List[str]:
+        """Input keys."""
+        return ["question", "chat_history"]
+
+    @property
+    def output_keys(self) -> List[str]:
+        """Output keys."""
+        return [self.output_key]
+
+    @classmethod
+    def from_llm(
+        cls,
+        llm: BaseLLM,
+        vectorstore: VectorStore,
+        condense_question_prompt: BasePromptTemplate = CONDENSE_QUESTION_PROMPT,
+        qa_prompt: BasePromptTemplate = QA_PROMPT,
+        chain_type: str = "stuff",
+    ) -> ChatVectorDBChain:
+        """Load chain from LLM."""
+        doc_chain = load_qa_chain(
+            llm,
+            chain_type=chain_type,
+            prompt=qa_prompt,
+        )
+        condense_question_chain = LLMChain(llm=llm, prompt=condense_question_prompt)
+        return cls(
+            vectorstore=vectorstore,
+            combine_docs_chain=doc_chain,
+            question_generator=condense_question_chain,
+        )
+
+    def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]:
+        question = inputs["question"]
+        chat_history_str = _get_chat_history(inputs["chat_history"])
+        if chat_history_str:
+            new_question = self.question_generator.run(
+                question=question, chat_history=chat_history_str
+            )
+        else:
+            new_question = question
+        docs = self.vectorstore.similarity_search(new_question, k=4)
+        new_inputs = inputs.copy()
+        new_inputs["question"] = new_question
+        new_inputs["chat_history"] = chat_history_str
+        answer, _ = self.combine_docs_chain.combine_docs(docs, **new_inputs)
+        return {self.output_key: answer}
--- a/langchain/chains/chat_vector_db/prompts.py
+++ b/langchain/chains/chat_vector_db/prompts.py
@ -0,0 +1,20 @@
+# flake8: noqa
+from langchain.prompts.prompt import PromptTemplate
+
+_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
+
+Chat History:
+{chat_history}
+Follow Up Input: {question}
+Standalone question:"""
+CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+
+prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+
+{context}
+
+Question: {question}
+Helpful Answer:"""
+QA_PROMPT = PromptTemplate(
+    template=prompt_template, input_variables=["context", "question"]
+)
				`@ -0,0 +1 @@`
				`"""Chain for chatting with a vector database."""`