langchain/templates/rag-multi-index-fusion/rag_multi_index_fusion/chain.py
Bagatur 9ffca3b92a
docs[patch], templates[patch]: Import from core (#14575)
Update imports to use core for the low-hanging fruit changes. Ran
following

```bash
git grep -l 'langchain.schema.runnable' {docs,templates,cookbook}  | xargs sed -i '' 's/langchain\.schema\.runnable/langchain_core.runnables/g'
git grep -l 'langchain.schema.output_parser' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.output_parser/langchain_core.output_parsers/g'
git grep -l 'langchain.schema.messages' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.messages/langchain_core.messages/g'
git grep -l 'langchain.schema.chat_histry' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.chat_history/langchain_core.chat_history/g'
git grep -l 'langchain.schema.prompt_template' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.prompt_template/langchain_core.prompts/g'
git grep -l 'from langchain.pydantic_v1' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.pydantic_v1/from langchain_core.pydantic_v1/g'
git grep -l 'from langchain.tools.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.tools\.base/from langchain_core.tools/g'
git grep -l 'from langchain.chat_models.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.chat_models.base/from langchain_core.language_models.chat_models/g'
git grep -l 'from langchain.llms.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.llms\.base\ /from langchain_core.language_models.llms\ /g'
git grep -l 'from langchain.embeddings.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.embeddings\.base/from langchain_core.embeddings/g'
git grep -l 'from langchain.vectorstores.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.vectorstores\.base/from langchain_core.vectorstores/g'
git grep -l 'from langchain.agents.tools' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.agents\.tools/from langchain_core.tools/g'
git grep -l 'from langchain.schema.output' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.output\ /from langchain_core.outputs\ /g'
git grep -l 'from langchain.schema.embeddings' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.embeddings/from langchain_core.embeddings/g'
git grep -l 'from langchain.schema.document' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.document/from langchain_core.documents/g'
git grep -l 'from langchain.schema.agent' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.agent/from langchain_core.agents/g'
git grep -l 'from langchain.schema.prompt ' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.prompt\ /from langchain_core.prompt_values /g'
git grep -l 'from langchain.schema.language_model' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.language_model/from langchain_core.language_models/g'


```
2023-12-11 16:49:10 -08:00

103 lines
2.9 KiB
Python

from operator import itemgetter
import numpy as np
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.retrievers import (
ArxivRetriever,
KayAiRetriever,
PubMedRetriever,
WikipediaRetriever,
)
from langchain.schema import StrOutputParser
from langchain.utils.math import cosine_similarity
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.runnables import (
RunnableParallel,
RunnablePassthrough,
)
pubmed = PubMedRetriever(top_k_results=5).with_config(run_name="pubmed")
arxiv = ArxivRetriever(top_k_results=5).with_config(run_name="arxiv")
sec = KayAiRetriever.create(
dataset_id="company", data_types=["10-K"], num_contexts=5
).with_config(run_name="sec_filings")
wiki = WikipediaRetriever(top_k_results=5, doc_content_chars_max=2000).with_config(
run_name="wiki"
)
embeddings = OpenAIEmbeddings()
def fuse_retrieved_docs(input):
results_map = input["sources"]
query = input["question"]
embedded_query = embeddings.embed_query(query)
names, docs = zip(
*((name, doc) for name, docs in results_map.items() for doc in docs)
)
embedded_docs = embeddings.embed_documents([doc.page_content for doc in docs])
similarity = cosine_similarity(
[embedded_query],
embedded_docs,
)
most_similar = np.flip(np.argsort(similarity[0]))[:5]
return [
(
names[i],
docs[i],
)
for i in most_similar
]
def format_named_docs(named_docs):
return "\n\n".join(
f"Source: {source}\n\n{doc.page_content}" for source, doc in named_docs
)
system = """Answer the user question. Use the following sources to help \
answer the question. If you don't know the answer say "I'm not sure, I couldn't \
find information on {{topic}}."
Sources:
{sources}"""
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{question}")])
retrieve_all = RunnableParallel(
{"ArXiv": arxiv, "Wikipedia": wiki, "PubMed": pubmed, "SEC 10-K Forms": sec}
).with_config(run_name="retrieve_all")
class Question(BaseModel):
__root__: str
answer_chain = (
{
"question": itemgetter("question"),
"sources": lambda x: format_named_docs(x["sources"]),
}
| prompt
| ChatOpenAI(model="gpt-3.5-turbo-1106")
| StrOutputParser()
).with_config(run_name="answer")
chain = (
(
RunnableParallel(
{"question": RunnablePassthrough(), "sources": retrieve_all}
).with_config(run_name="add_sources")
| RunnablePassthrough.assign(sources=fuse_retrieved_docs).with_config(
run_name="fuse"
)
| RunnablePassthrough.assign(answer=answer_chain).with_config(
run_name="add_answer"
)
)
.with_config(run_name="QA with fused results")
.with_types(input_type=Question)
)