vector db qa (#71)

harrison/ape
Harrison Chase 2 years ago committed by GitHub
parent 4c0b684f79
commit 47af2bcee4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,94 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "82525493",
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.vectorstores.faiss import FAISS\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain import OpenAI, VectorDBQA"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5c7049db",
"metadata": {},
"outputs": [],
"source": [
"with open('state_of_the_union.txt') as f:\n",
" state_of_the_union = f.read()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_text(state_of_the_union)\n",
"\n",
"embeddings = OpenAIEmbeddings()\n",
"docsearch = FAISS.from_texts(texts, embeddings)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3018f865",
"metadata": {},
"outputs": [],
"source": [
"qa = VectorDBQA(llm=OpenAI(), vectorstore=docsearch)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "032a47f8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\" The president said that Ketanji Brown Jackson is one of our nation's top legal minds, who will continue Justice Breyers legacy of excellence.\""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"qa.run(query)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0f20b92",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -14,6 +14,7 @@ from langchain.chains import (
SelfAskWithSearchChain,
SerpAPIChain,
SQLDatabaseChain,
VectorDBQA,
)
from langchain.docstore import Wikipedia
from langchain.llms import Cohere, HuggingFaceHub, OpenAI
@ -39,5 +40,6 @@ __all__ = [
"SQLDatabaseChain",
"FAISS",
"MRKLChain",
"VectorDBQA",
"ElasticVectorSearch",
]

@ -7,6 +7,7 @@ from langchain.chains.react.base import ReActChain
from langchain.chains.self_ask_with_search.base import SelfAskWithSearchChain
from langchain.chains.serpapi import SerpAPIChain
from langchain.chains.sql_database.base import SQLDatabaseChain
from langchain.chains.vector_db_qa.base import VectorDBQA
__all__ = [
"LLMChain",
@ -17,4 +18,5 @@ __all__ = [
"ReActChain",
"SQLDatabaseChain",
"MRKLChain",
"VectorDBQA",
]

@ -0,0 +1 @@
"""Chain for question-answering against a vector database."""

@ -0,0 +1,80 @@
"""Chain for question-answering against a vector database."""
from typing import Dict, List
from pydantic import BaseModel, Extra
from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.chains.vector_db_qa.prompt import prompt
from langchain.llms.base import LLM
from langchain.vectorstores.base import VectorStore
class VectorDBQA(Chain, BaseModel):
"""Chain for question-answering against a vector database.
Example:
.. code-block:: python
from langchain import OpenAI, VectorDBQA
from langchain.faiss import FAISS
vectordb = FAISS(...)
vectordbQA = VectorDBQA(llm=OpenAI(), vector_db=vectordb)
"""
llm: LLM
"""LLM wrapper to use."""
vectorstore: VectorStore
"""Vector Database to connect to."""
input_key: str = "query" #: :meta private:
output_key: str = "result" #: :meta private:
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
@property
def input_keys(self) -> List[str]:
"""Return the singular input key.
:meta private:
"""
return [self.input_key]
@property
def output_keys(self) -> List[str]:
"""Return the singular output key.
:meta private:
"""
return [self.output_key]
def _run(self, inputs: Dict[str, str]) -> Dict[str, str]:
question = inputs[self.input_key]
llm_chain = LLMChain(llm=self.llm, prompt=prompt)
docs = self.vectorstore.similarity_search(question)
contexts = []
for j, doc in enumerate(docs):
contexts.append(f"Context {j}:\n{doc.page_content}")
# TODO: handle cases where this context is too long.
answer = llm_chain.predict(question=question, context="\n\n".join(contexts))
return {self.output_key: answer}
def run(self, question: str) -> str:
"""Run Question-Answering on a vector database.
Args:
question: Question to get the answer for.
Returns:
The final answer
Example:
.. code-block:: python
answer = vectordbqa.run("What is the capital of Idaho?")
"""
return self({self.input_key: question})[self.output_key]

@ -0,0 +1,10 @@
# flake8: noqa
from langchain.prompts import Prompt
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Helpful Answer:"""
prompt = Prompt(template=prompt_template, input_variables=["context", "question"])
Loading…
Cancel
Save