mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
9ffca3b92a
Update imports to use core for the low-hanging fruit changes. Ran following ```bash git grep -l 'langchain.schema.runnable' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.runnable/langchain_core.runnables/g' git grep -l 'langchain.schema.output_parser' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.output_parser/langchain_core.output_parsers/g' git grep -l 'langchain.schema.messages' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.messages/langchain_core.messages/g' git grep -l 'langchain.schema.chat_histry' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.chat_history/langchain_core.chat_history/g' git grep -l 'langchain.schema.prompt_template' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.prompt_template/langchain_core.prompts/g' git grep -l 'from langchain.pydantic_v1' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.pydantic_v1/from langchain_core.pydantic_v1/g' git grep -l 'from langchain.tools.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.tools\.base/from langchain_core.tools/g' git grep -l 'from langchain.chat_models.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.chat_models.base/from langchain_core.language_models.chat_models/g' git grep -l 'from langchain.llms.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.llms\.base\ /from langchain_core.language_models.llms\ /g' git grep -l 'from langchain.embeddings.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.embeddings\.base/from langchain_core.embeddings/g' git grep -l 'from langchain.vectorstores.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.vectorstores\.base/from langchain_core.vectorstores/g' git grep -l 'from langchain.agents.tools' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.agents\.tools/from langchain_core.tools/g' git grep -l 'from langchain.schema.output' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.output\ /from langchain_core.outputs\ /g' git grep -l 'from langchain.schema.embeddings' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.embeddings/from langchain_core.embeddings/g' git grep -l 'from langchain.schema.document' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.document/from langchain_core.documents/g' git grep -l 'from langchain.schema.agent' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.agent/from langchain_core.agents/g' git grep -l 'from langchain.schema.prompt ' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.prompt\ /from langchain_core.prompt_values /g' git grep -l 'from langchain.schema.language_model' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.language_model/from langchain_core.language_models/g' ```
103 lines
2.9 KiB
Python
103 lines
2.9 KiB
Python
from operator import itemgetter
|
|
|
|
import numpy as np
|
|
from langchain.chat_models import ChatOpenAI
|
|
from langchain.embeddings import OpenAIEmbeddings
|
|
from langchain.prompts import ChatPromptTemplate
|
|
from langchain.retrievers import (
|
|
ArxivRetriever,
|
|
KayAiRetriever,
|
|
PubMedRetriever,
|
|
WikipediaRetriever,
|
|
)
|
|
from langchain.schema import StrOutputParser
|
|
from langchain.utils.math import cosine_similarity
|
|
from langchain_core.pydantic_v1 import BaseModel
|
|
from langchain_core.runnables import (
|
|
RunnableParallel,
|
|
RunnablePassthrough,
|
|
)
|
|
|
|
pubmed = PubMedRetriever(top_k_results=5).with_config(run_name="pubmed")
|
|
arxiv = ArxivRetriever(top_k_results=5).with_config(run_name="arxiv")
|
|
sec = KayAiRetriever.create(
|
|
dataset_id="company", data_types=["10-K"], num_contexts=5
|
|
).with_config(run_name="sec_filings")
|
|
wiki = WikipediaRetriever(top_k_results=5, doc_content_chars_max=2000).with_config(
|
|
run_name="wiki"
|
|
)
|
|
|
|
embeddings = OpenAIEmbeddings()
|
|
|
|
|
|
def fuse_retrieved_docs(input):
|
|
results_map = input["sources"]
|
|
query = input["question"]
|
|
embedded_query = embeddings.embed_query(query)
|
|
names, docs = zip(
|
|
*((name, doc) for name, docs in results_map.items() for doc in docs)
|
|
)
|
|
embedded_docs = embeddings.embed_documents([doc.page_content for doc in docs])
|
|
similarity = cosine_similarity(
|
|
[embedded_query],
|
|
embedded_docs,
|
|
)
|
|
most_similar = np.flip(np.argsort(similarity[0]))[:5]
|
|
return [
|
|
(
|
|
names[i],
|
|
docs[i],
|
|
)
|
|
for i in most_similar
|
|
]
|
|
|
|
|
|
def format_named_docs(named_docs):
|
|
return "\n\n".join(
|
|
f"Source: {source}\n\n{doc.page_content}" for source, doc in named_docs
|
|
)
|
|
|
|
|
|
system = """Answer the user question. Use the following sources to help \
|
|
answer the question. If you don't know the answer say "I'm not sure, I couldn't \
|
|
find information on {{topic}}."
|
|
|
|
Sources:
|
|
|
|
{sources}"""
|
|
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{question}")])
|
|
|
|
retrieve_all = RunnableParallel(
|
|
{"ArXiv": arxiv, "Wikipedia": wiki, "PubMed": pubmed, "SEC 10-K Forms": sec}
|
|
).with_config(run_name="retrieve_all")
|
|
|
|
|
|
class Question(BaseModel):
|
|
__root__: str
|
|
|
|
|
|
answer_chain = (
|
|
{
|
|
"question": itemgetter("question"),
|
|
"sources": lambda x: format_named_docs(x["sources"]),
|
|
}
|
|
| prompt
|
|
| ChatOpenAI(model="gpt-3.5-turbo-1106")
|
|
| StrOutputParser()
|
|
).with_config(run_name="answer")
|
|
chain = (
|
|
(
|
|
RunnableParallel(
|
|
{"question": RunnablePassthrough(), "sources": retrieve_all}
|
|
).with_config(run_name="add_sources")
|
|
| RunnablePassthrough.assign(sources=fuse_retrieved_docs).with_config(
|
|
run_name="fuse"
|
|
)
|
|
| RunnablePassthrough.assign(answer=answer_chain).with_config(
|
|
run_name="add_answer"
|
|
)
|
|
)
|
|
.with_config(run_name="QA with fused results")
|
|
.with_types(input_type=Question)
|
|
)
|