mirror of https://github.com/hwchase17/langchain
TEMPLATES: Add multi-index templates (#13490)
One that routes and one that fuses --------- Co-authored-by: Erick Friis <erick@langchain.dev>pull/13401/head^2
parent
35e04f204b
commit
b4312aac5c
@ -0,0 +1 @@
|
||||
__pycache__
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,73 @@
|
||||
# RAG with Mulitple Indexes (Fusion)
|
||||
|
||||
A QA application that queries multiple domain-specific retrievers and selects the most relevant documents from across all retrieved results.
|
||||
|
||||
## Environment Setup
|
||||
|
||||
This application queries PubMed, ArXiv, Wikipedia, and [Kay AI](https://www.kay.ai) (for SEC filings).
|
||||
|
||||
You will need to create a free Kay AI account and [get your API key here](https://www.kay.ai).
|
||||
Then set environment variable:
|
||||
|
||||
```bash
|
||||
export KAY_API_KEY="<YOUR_API_KEY>"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
To use this package, you should first have the LangChain CLI installed:
|
||||
|
||||
```shell
|
||||
pip install -U langchain-cli
|
||||
```
|
||||
|
||||
To create a new LangChain project and install this as the only package, you can do:
|
||||
|
||||
```shell
|
||||
langchain app new my-app --package rag-multi-index-fusion
|
||||
```
|
||||
|
||||
If you want to add this to an existing project, you can just run:
|
||||
|
||||
```shell
|
||||
langchain app add rag-multi-index-fusion
|
||||
```
|
||||
|
||||
And add the following code to your `server.py` file:
|
||||
```python
|
||||
from rag_multi_index_fusion import chain as rag_multi_index_fusion_chain
|
||||
|
||||
add_routes(app, rag_multi_index_fusion_chain, path="/rag-multi-index-fusion")
|
||||
```
|
||||
|
||||
(Optional) Let's now configure LangSmith.
|
||||
LangSmith will help us trace, monitor and debug LangChain applications.
|
||||
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
|
||||
If you don't have access, you can skip this section
|
||||
|
||||
|
||||
```shell
|
||||
export LANGCHAIN_TRACING_V2=true
|
||||
export LANGCHAIN_API_KEY=<your-api-key>
|
||||
export LANGCHAIN_PROJECT=<your-project> # if not specified, defaults to "default"
|
||||
```
|
||||
|
||||
If you are inside this directory, then you can spin up a LangServe instance directly by:
|
||||
|
||||
```shell
|
||||
langchain serve
|
||||
```
|
||||
|
||||
This will start the FastAPI app with a server is running locally at
|
||||
[http://localhost:8000](http://localhost:8000)
|
||||
|
||||
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
|
||||
We can access the playground at [http://127.0.0.1:8000/rag-multi-index-fusion/playground](http://127.0.0.1:8000/rag-multi-index-fusion/playground)
|
||||
|
||||
We can access the template from code with:
|
||||
|
||||
```python
|
||||
from langserve.client import RemoteRunnable
|
||||
|
||||
runnable = RemoteRunnable("http://localhost:8000/rag-multi-index-fusion")
|
||||
```
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,29 @@
|
||||
[tool.poetry]
|
||||
name = "rag-multi-index-fusion"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain = ">=0.0.313, <0.1"
|
||||
openai = "<2"
|
||||
xmltodict = "^0.13.0"
|
||||
kay = "^0.1.2"
|
||||
wikipedia = "^1.4.0"
|
||||
arxiv = "^2.0.0"
|
||||
tiktoken = "^0.5.1"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
langchain-cli = ">=0.0.15"
|
||||
fastapi = "^0.104.0"
|
||||
sse-starlette = "^1.6.5"
|
||||
|
||||
[tool.langserve]
|
||||
export_module = "rag_multi_index_fusion"
|
||||
export_attr = "chain"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
@ -0,0 +1,3 @@
|
||||
from rag_multi_index_fusion.chain import chain
|
||||
|
||||
__all__ = ["chain"]
|
@ -0,0 +1,102 @@
|
||||
import numpy as np
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel
|
||||
from langchain.retrievers import (
|
||||
ArxivRetriever,
|
||||
KayAiRetriever,
|
||||
PubMedRetriever,
|
||||
WikipediaRetriever,
|
||||
)
|
||||
from langchain.schema import StrOutputParser
|
||||
from langchain.schema.runnable import (
|
||||
RunnableLambda,
|
||||
RunnableParallel,
|
||||
RunnablePassthrough,
|
||||
)
|
||||
from langchain.utils.math import cosine_similarity
|
||||
|
||||
pubmed = PubMedRetriever(top_k_results=5).with_config(run_name="pubmed")
|
||||
arxiv = ArxivRetriever(top_k_results=5).with_config(run_name="arxiv")
|
||||
sec = KayAiRetriever.create(
|
||||
dataset_id="company", data_types=["10-K"], num_contexts=5
|
||||
).with_config(run_name="sec_filings")
|
||||
wiki = WikipediaRetriever(top_k_results=5, doc_content_chars_max=2000).with_config(
|
||||
run_name="wiki"
|
||||
)
|
||||
|
||||
embeddings = OpenAIEmbeddings()
|
||||
|
||||
|
||||
def fuse_retrieved_docs(input):
|
||||
results_map = input["sources"]
|
||||
query = input["question"]
|
||||
embedded_query = embeddings.embed_query(query)
|
||||
names, docs = zip(
|
||||
*((name, doc) for name, docs in results_map.items() for doc in docs)
|
||||
)
|
||||
embedded_docs = embeddings.embed_documents([doc.page_content for doc in docs])
|
||||
similarity = cosine_similarity(
|
||||
[embedded_query],
|
||||
embedded_docs,
|
||||
)
|
||||
most_similar = np.flip(np.argsort(similarity[0]))[:5]
|
||||
return [
|
||||
(
|
||||
names[i],
|
||||
docs[i],
|
||||
)
|
||||
for i in most_similar
|
||||
]
|
||||
|
||||
|
||||
retriever_map = {
|
||||
"medical paper": pubmed,
|
||||
"scientific paper": arxiv,
|
||||
"public company finances report": sec,
|
||||
"general": wiki,
|
||||
}
|
||||
|
||||
|
||||
def format_named_docs(named_docs):
|
||||
return "\n\n".join(
|
||||
f"Source: {source}\n\n{doc.page_content}" for source, doc in named_docs
|
||||
)
|
||||
|
||||
|
||||
system = """Answer the user question. Use the following sources to help \
|
||||
answer the question. If you don't know the answer say "I'm not sure, I couldn't \
|
||||
find information on {{topic}}."
|
||||
|
||||
Sources:
|
||||
|
||||
{sources}"""
|
||||
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{question}")])
|
||||
|
||||
retrieve_all = RunnableParallel(
|
||||
{"ArXiv": arxiv, "Wikipedia": wiki, "PubMed": pubmed, "SEC 10-K Forms": sec}
|
||||
).with_config(run_name="retrieve_all")
|
||||
|
||||
|
||||
class Question(BaseModel):
|
||||
__root__: str
|
||||
|
||||
|
||||
chain = (
|
||||
(
|
||||
RunnableParallel(
|
||||
{"question": RunnablePassthrough(), "sources": retrieve_all}
|
||||
).with_config(run_name="add_sources")
|
||||
| RunnablePassthrough.assign(
|
||||
sources=(
|
||||
RunnableLambda(fuse_retrieved_docs) | format_named_docs
|
||||
).with_config(run_name="fuse_and_format")
|
||||
).with_config(run_name="update_sources")
|
||||
| prompt
|
||||
| ChatOpenAI(model="gpt-3.5-turbo-1106")
|
||||
| StrOutputParser()
|
||||
)
|
||||
.with_config(run_name="QA with fused results")
|
||||
.with_types(input_type=Question)
|
||||
)
|
@ -0,0 +1 @@
|
||||
__pycache__
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,73 @@
|
||||
# RAG with Multiple Indexes (Routing)
|
||||
|
||||
A QA application that routes between different domain-specific retrievers given a user question.
|
||||
|
||||
## Environment Setup
|
||||
|
||||
This application queries PubMed, ArXiv, Wikipedia, and [Kay AI](https://www.kay.ai) (for SEC filings).
|
||||
|
||||
You will need to create a free Kay AI account and [get your API key here](https://www.kay.ai).
|
||||
Then set environment variable:
|
||||
|
||||
```bash
|
||||
export KAY_API_KEY="<YOUR_API_KEY>"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
To use this package, you should first have the LangChain CLI installed:
|
||||
|
||||
```shell
|
||||
pip install -U langchain-cli
|
||||
```
|
||||
|
||||
To create a new LangChain project and install this as the only package, you can do:
|
||||
|
||||
```shell
|
||||
langchain app new my-app --package rag-multi-index-router
|
||||
```
|
||||
|
||||
If you want to add this to an existing project, you can just run:
|
||||
|
||||
```shell
|
||||
langchain app add rag-multi-index-router
|
||||
```
|
||||
|
||||
And add the following code to your `server.py` file:
|
||||
```python
|
||||
from rag_multi_index_router import chain as rag_multi_index_router_chain
|
||||
|
||||
add_routes(app, rag_multi_index_router_chain, path="/rag-multi-index-router")
|
||||
```
|
||||
|
||||
(Optional) Let's now configure LangSmith.
|
||||
LangSmith will help us trace, monitor and debug LangChain applications.
|
||||
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
|
||||
If you don't have access, you can skip this section
|
||||
|
||||
|
||||
```shell
|
||||
export LANGCHAIN_TRACING_V2=true
|
||||
export LANGCHAIN_API_KEY=<your-api-key>
|
||||
export LANGCHAIN_PROJECT=<your-project> # if not specified, defaults to "default"
|
||||
```
|
||||
|
||||
If you are inside this directory, then you can spin up a LangServe instance directly by:
|
||||
|
||||
```shell
|
||||
langchain serve
|
||||
```
|
||||
|
||||
This will start the FastAPI app with a server is running locally at
|
||||
[http://localhost:8000](http://localhost:8000)
|
||||
|
||||
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
|
||||
We can access the playground at [http://127.0.0.1:8000/rag-multi-index-router/playground](http://127.0.0.1:8000/rag-multi-index-router/playground)
|
||||
|
||||
We can access the template from code with:
|
||||
|
||||
```python
|
||||
from langserve.client import RemoteRunnable
|
||||
|
||||
runnable = RemoteRunnable("http://localhost:8000/rag-multi-index-router")
|
||||
```
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,29 @@
|
||||
[tool.poetry]
|
||||
name = "rag-multi-index-router"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain = ">=0.0.313, <0.1"
|
||||
openai = "<2"
|
||||
xmltodict = "^0.13.0"
|
||||
kay = "^0.1.2"
|
||||
wikipedia = "^1.4.0"
|
||||
arxiv = "^2.0.0"
|
||||
tiktoken = "^0.5.1"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
langchain-cli = ">=0.0.15"
|
||||
fastapi = "^0.104.0"
|
||||
sse-starlette = "^1.6.5"
|
||||
|
||||
[tool.langserve]
|
||||
export_module = "rag_multi_index_router"
|
||||
export_attr = "chain"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
@ -0,0 +1,3 @@
|
||||
from rag_multi_index_router.chain import chain
|
||||
|
||||
__all__ = ["chain"]
|
@ -0,0 +1,96 @@
|
||||
from operator import itemgetter
|
||||
from typing import Literal
|
||||
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.output_parsers.openai_functions import PydanticAttrOutputFunctionsParser
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.retrievers import (
|
||||
ArxivRetriever,
|
||||
KayAiRetriever,
|
||||
PubMedRetriever,
|
||||
WikipediaRetriever,
|
||||
)
|
||||
from langchain.schema import StrOutputParser
|
||||
from langchain.schema.runnable import (
|
||||
RouterRunnable,
|
||||
RunnableParallel,
|
||||
RunnablePassthrough,
|
||||
)
|
||||
from langchain.utils.openai_functions import convert_pydantic_to_openai_function
|
||||
|
||||
pubmed = PubMedRetriever(top_k_results=5).with_config(run_name="pubmed")
|
||||
arxiv = ArxivRetriever(top_k_results=5).with_config(run_name="arxiv")
|
||||
sec = KayAiRetriever.create(
|
||||
dataset_id="company", data_types=["10-K"], num_contexts=5
|
||||
).with_config(run_name="sec_filings")
|
||||
wiki = WikipediaRetriever(top_k_results=5, doc_content_chars_max=2000).with_config(
|
||||
run_name="wiki"
|
||||
)
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-1106")
|
||||
|
||||
|
||||
class Search(BaseModel):
|
||||
"""Search for relevant documents by question topic."""
|
||||
|
||||
question_resource: Literal[
|
||||
"medical paper", "scientific paper", "public company finances report", "general"
|
||||
] = Field(
|
||||
...,
|
||||
description=(
|
||||
"The type of resource that would best help answer the user's question. "
|
||||
"If none of the types are relevant return 'general'."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
classifier = llm.bind(
|
||||
functions=[convert_pydantic_to_openai_function(Search)],
|
||||
function_call={"name": "Search"},
|
||||
) | PydanticAttrOutputFunctionsParser(
|
||||
pydantic_schema=Search, attr_name="question_resource"
|
||||
)
|
||||
|
||||
retriever_map = {
|
||||
"medical paper": pubmed,
|
||||
"scientific paper": arxiv,
|
||||
"public company finances report": sec,
|
||||
"general": wiki,
|
||||
}
|
||||
router_retriever = RouterRunnable(runnables=retriever_map)
|
||||
|
||||
|
||||
def format_docs(docs):
|
||||
return "\n\n".join(f"Source {i}:\n{doc.page_content}" for i, doc in enumerate(docs))
|
||||
|
||||
|
||||
system = """Answer the user question. Use the following sources to help \
|
||||
answer the question. If you don't know the answer say "I'm not sure, I couldn't \
|
||||
find information on {{topic}}."
|
||||
|
||||
Sources:
|
||||
|
||||
{sources}"""
|
||||
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{question}")])
|
||||
|
||||
|
||||
class Question(BaseModel):
|
||||
__root__: str
|
||||
|
||||
|
||||
chain = (
|
||||
(
|
||||
RunnableParallel(
|
||||
{"input": RunnablePassthrough(), "key": classifier}
|
||||
).with_config(run_name="classify")
|
||||
| RunnableParallel(
|
||||
{"question": itemgetter("input"), "sources": router_retriever | format_docs}
|
||||
).with_config(run_name="retrieve")
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
.with_config(run_name="QA with router")
|
||||
.with_types(input_type=Question)
|
||||
)
|
Loading…
Reference in New Issue