from langchain_community.chat_models import ChatOpenAI
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import RunnableParallel
from hyde.prompts import hyde_prompt
# Example for document loading (from url), splitting, and creating vectostore
# Load
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("")
data = loader.load()
# Split
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)
# Add to vectorDB
vectorstore = Chroma.from_documents(documents=all_splits,
retriever = vectorstore.as_retriever()
# Embed a single document as a test
vectorstore = Chroma.from_texts(
["harrison worked at kensho"],
retriever = vectorstore.as_retriever()
# RAG prompt
template = """Answer the question based only on the following context:
Question: {question}
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI()
# Query transformation chain
# This transforms the query into the hypothetical document
hyde_chain = hyde_prompt | model | StrOutputParser()
# RAG chain
chain = (
# Generate a hypothetical document and then pass it to the retriever
"context": hyde_chain | retriever,
"question": lambda x: x["question"],
| prompt
| model
| StrOutputParser()
# Add input types for playground
class ChainInput(BaseModel):
question: str
chain = chain.with_types(input_type=ChainInput)