From 2a68be3e8d6b81f9975e00cb1e8e07d14209cc35 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sun, 5 Feb 2023 21:38:47 -0800 Subject: [PATCH] chat vector db chain (#902) --- .../chat_vector_db.ipynb | 165 ++++++++++++++++++ langchain/chains/__init__.py | 2 + langchain/chains/chat_vector_db/__init__.py | 1 + langchain/chains/chat_vector_db/base.py | 85 +++++++++ langchain/chains/chat_vector_db/prompts.py | 20 +++ 5 files changed, 273 insertions(+) create mode 100644 docs/modules/chains/combine_docs_examples/chat_vector_db.ipynb create mode 100644 langchain/chains/chat_vector_db/__init__.py create mode 100644 langchain/chains/chat_vector_db/base.py create mode 100644 langchain/chains/chat_vector_db/prompts.py diff --git a/docs/modules/chains/combine_docs_examples/chat_vector_db.ipynb b/docs/modules/chains/combine_docs_examples/chat_vector_db.ipynb new file mode 100644 index 0000000000..caaae7f0f9 --- /dev/null +++ b/docs/modules/chains/combine_docs_examples/chat_vector_db.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "134a0785", + "metadata": {}, + "source": [ + "# Chat Vector DB\n", + "\n", + "This notebook goes over how to set up a chain to chat with a vector database. The only difference because this chain and the [VectorDBQAChain](./vector_db_qa.ipynb) is that this allows for passing in of a chat history which can be used to allow for follow up questions." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "70c4e529", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.vectorstores.faiss import FAISS\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.llms import OpenAI\n", + "from langchain.chains import ChatVectorDBChain" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a8930cf7", + "metadata": {}, + "outputs": [], + "source": [ + "with open('../../state_of_the_union.txt') as f:\n", + " state_of_the_union = f.read()\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "texts = text_splitter.split_text(state_of_the_union)\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "vectorstore = FAISS.from_texts(texts, embeddings)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7b4110f3", + "metadata": {}, + "outputs": [], + "source": [ + "qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0), vectorstore)" + ] + }, + { + "cell_type": "markdown", + "id": "3872432d", + "metadata": {}, + "source": [ + "Here's an example of asking a question with no chat history" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7fe3e730", + "metadata": {}, + "outputs": [], + "source": [ + "chat_history = []\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "result = qa({\"question\": query, \"chat_history\": chat_history})" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "bfff9cc8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\"" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result[\"answer\"]" + ] + }, + { + "cell_type": "markdown", + "id": "9e46edf7", + "metadata": {}, + "source": [ + "Here's an example of asking a question with some chat history" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "00b4cf00", + "metadata": {}, + "outputs": [], + "source": [ + "chat_history = [(query, result[\"answer\"])]\n", + "query = \"Did he mention who she suceeded\"\n", + "result = qa({\"question\": query, \"chat_history\": chat_history})" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f01828d1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "' Justice Stephen Breyer'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result['answer']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0f869c6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/chains/__init__.py b/langchain/chains/__init__.py index 718e53cfd7..f63b32d8fd 100644 --- a/langchain/chains/__init__.py +++ b/langchain/chains/__init__.py @@ -1,5 +1,6 @@ """Chains are easily reusable components which can be linked together.""" from langchain.chains.api.base import APIChain +from langchain.chains.chat_vector_db.base import ChatVectorDBChain from langchain.chains.conversation.base import ConversationChain from langchain.chains.hyde.base import HypotheticalDocumentEmbedder from langchain.chains.llm import LLMChain @@ -42,4 +43,5 @@ __all__ = [ "SQLDatabaseSequentialChain", "load_chain", "HypotheticalDocumentEmbedder", + "ChatVectorDBChain", ] diff --git a/langchain/chains/chat_vector_db/__init__.py b/langchain/chains/chat_vector_db/__init__.py new file mode 100644 index 0000000000..3522b876d8 --- /dev/null +++ b/langchain/chains/chat_vector_db/__init__.py @@ -0,0 +1 @@ +"""Chain for chatting with a vector database.""" diff --git a/langchain/chains/chat_vector_db/base.py b/langchain/chains/chat_vector_db/base.py new file mode 100644 index 0000000000..abb2c06771 --- /dev/null +++ b/langchain/chains/chat_vector_db/base.py @@ -0,0 +1,85 @@ +"""Chain for chatting with a vector database.""" +from __future__ import annotations + +from typing import Any, Dict, List, Tuple + +from pydantic import BaseModel + +from langchain.chains.base import Chain +from langchain.chains.chat_vector_db.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT +from langchain.chains.combine_documents.base import BaseCombineDocumentsChain +from langchain.chains.llm import LLMChain +from langchain.chains.question_answering import load_qa_chain +from langchain.llms.base import BaseLLM +from langchain.prompts.base import BasePromptTemplate +from langchain.vectorstores.base import VectorStore + + +def _get_chat_history(chat_history: List[Tuple[str, str]]) -> str: + buffer = "" + for human_s, ai_s in chat_history: + human = "Human: " + human_s + ai = "Assistant: " + ai_s + buffer += "\n" + "\n".join([human, ai]) + return buffer + + +class ChatVectorDBChain(Chain, BaseModel): + """Chain for chatting with a vector database.""" + + vectorstore: VectorStore + combine_docs_chain: BaseCombineDocumentsChain + question_generator: LLMChain + output_key: str = "answer" + + @property + def _chain_type(self) -> str: + return "chat-vector-db" + + @property + def input_keys(self) -> List[str]: + """Input keys.""" + return ["question", "chat_history"] + + @property + def output_keys(self) -> List[str]: + """Output keys.""" + return [self.output_key] + + @classmethod + def from_llm( + cls, + llm: BaseLLM, + vectorstore: VectorStore, + condense_question_prompt: BasePromptTemplate = CONDENSE_QUESTION_PROMPT, + qa_prompt: BasePromptTemplate = QA_PROMPT, + chain_type: str = "stuff", + ) -> ChatVectorDBChain: + """Load chain from LLM.""" + doc_chain = load_qa_chain( + llm, + chain_type=chain_type, + prompt=qa_prompt, + ) + condense_question_chain = LLMChain(llm=llm, prompt=condense_question_prompt) + return cls( + vectorstore=vectorstore, + combine_docs_chain=doc_chain, + question_generator=condense_question_chain, + ) + + def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]: + question = inputs["question"] + chat_history_str = _get_chat_history(inputs["chat_history"]) + if chat_history_str: + new_question = self.question_generator.run( + question=question, chat_history=chat_history_str + ) + else: + new_question = question + docs = self.vectorstore.similarity_search(new_question, k=4) + new_inputs = inputs.copy() + new_inputs["question"] = new_question + new_inputs["chat_history"] = chat_history_str + answer, _ = self.combine_docs_chain.combine_docs(docs, **new_inputs) + return {self.output_key: answer} diff --git a/langchain/chains/chat_vector_db/prompts.py b/langchain/chains/chat_vector_db/prompts.py new file mode 100644 index 0000000000..b2a2df09e3 --- /dev/null +++ b/langchain/chains/chat_vector_db/prompts.py @@ -0,0 +1,20 @@ +# flake8: noqa +from langchain.prompts.prompt import PromptTemplate + +_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. + +Chat History: +{chat_history} +Follow Up Input: {question} +Standalone question:""" +CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) + +prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. + +{context} + +Question: {question} +Helpful Answer:""" +QA_PROMPT = PromptTemplate( + template=prompt_template, input_variables=["context", "question"] +)