AWS Bedrock RAG template (#12450)

pull/12451/head
Lance Martin 7 months ago committed by GitHub
parent 5d40e36c75
commit 5c564e62e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,29 @@
# RAG AWS Bedrock
AWS Bedrock is a managed serve that offers a set of foundation models.
Here we will use `Anthropic Claude` for text generation and `Amazon Titan` for text embedding.
We will use Pinecode as our vectorstore.
(See [this notebook](https://github.com/aws-samples/amazon-bedrock-workshop/blob/main/03_QuestionAnswering/01_qa_w_rag_claude.ipynb) for additional context on the RAG pipeline.)
(See [this notebook](https://github.com/aws-samples/amazon-bedrock-workshop/blob/58f238a183e7e629c9ae11dd970393af4e64ec44/00_Intro/bedrock_boto3_setup.ipynb#Prerequisites) for additional context on setup.)
## Pinecone
This connects to a hosted Pinecone vectorstore.
Be sure that you have set a few env variables in `chain.py`:
* `PINECONE_API_KEY`
* `PINECONE_ENV`
* `index_name`
## LLM and Embeddings
Be sure to set AWS enviorment variables:
* `AWS_DEFAULT_REGION`
# `AWS_PROFILE`
* `BEDROCK_ASSUME_ROLE`

File diff suppressed because it is too large Load Diff

@ -0,0 +1,24 @@
[tool.poetry]
name = "rag-aws-bedrock"
version = "0.1.0"
description = ""
authors = ["Lance Martin <lance@langchain.dev>"]
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain = ">=0.0.313, <0.1"
openai = ">=0.28.1"
tiktoken = ">=0.5.1"
pinecone-client = ">=2.2.4"
boto3 = ">=1.28.57"
awscli = ">=1.29.57"
botocore = ">=1.31.57"
[tool.langserve]
export_module = "rag_aws_bedrock"
export_attr = "chain"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

@ -0,0 +1,50 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "681a5d1e",
"metadata": {},
"source": [
"## Connect to template\n",
"\n",
"In `server.py`, set -\n",
"```\n",
"add_routes(app, chain_ext, path=\"/rag_aws_bedrock\")\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d774be2a",
"metadata": {},
"outputs": [],
"source": [
"from langserve.client import RemoteRunnable\n",
"rag_app_pinecone = RemoteRunnable('http://0.0.0.0:8001/rag_aws_bedrock')\n",
"rag_app_pinecone.invoke(\"What are the different types of agent memory\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -0,0 +1,3 @@
from rag_aws_bedrock.chain import chain
__all__ = ["chain"]

@ -0,0 +1,73 @@
import os
from langchain.embeddings import BedrockEmbeddings
from langchain.llms.bedrock import Bedrock
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
from langchain.vectorstores import Pinecone
from utils import bedrock
if os.environ.get("PINECONE_API_KEY", None) is None:
raise Exception("Missing `PINECONE_API_KEY` environment variable.")
if os.environ.get("PINECONE_ENVIRONMENT", None) is None:
raise Exception("Missing `PINECONE_ENVIRONMENT` environment variable.")
if os.environ.get("AWS_DEFAULT_REGION", None) is None:
raise Exception("Missing `AWS_DEFAULT_REGION` environment variable.")
if os.environ.get("AWS_PROFILE", None) is None:
raise Exception("Missing `AWS_PROFILE` environment variable.")
if os.environ.get("BEDROCK_ASSUME_ROLE", None) is None:
raise Exception("Missing `BEDROCK_ASSUME_ROLE` environment variable.")
PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX", "langchain-test")
### Ingest code - you may need to run this the first time
# Load
# from langchain.document_loaders import WebBaseLoader
# loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
# data = loader.load()
# # Split
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
# all_splits = text_splitter.split_documents(data)
# # Add to vectorDB
# vectorstore = Pinecone.from_documents(
# documents=all_splits, embedding=OpenAIEmbeddings(), index_name=PINECONE_INDEX_NAME
# )
# retriever = vectorstore.as_retriever()
# Set LLM and embeddings
boto3_bedrock = bedrock.get_bedrock_client(
assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
region=os.environ.get("AWS_DEFAULT_REGION", None)
)
model = Bedrock(model_id="anthropic.claude-v2",
client=boto3_bedrock,
model_kwargs={'max_tokens_to_sample':200})
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1",
client=boto3_bedrock)
# Set vectostore
vectorstore = Pinecone.from_existing_index(PINECONE_INDEX_NAME, bedrock_embeddings)
retriever = vectorstore.as_retriever()
# RAG prompt
template = """Answer the question based only on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
# RAG
chain = (
RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
| prompt
| model
| StrOutputParser()
)

@ -16,25 +16,112 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "888494ca-0509-4070-b36f-600a042f352c",
"id": "78fb41d3-d2aa-40a6-b144-491f38a7cf88",
"metadata": {},
"outputs": [],
"source": [
"from langserve.client import RemoteRunnable\n",
"rag_app = RemoteRunnable('http://0.0.0.0:8001/rag_chroma_private/')\n",
"for item in rag_app.stream(\"How does agent memory work?\"):\n",
" print(item)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "ce39d358-1934-4404-bd3e-3fd497974aff",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Based on the provided context, agent memory is a long-term memory module that records a comprehensive list of agents' experiences in natural language. Each element is an observation or event directly provided by the agent, and inter-agent communication can trigger new natural language statements. The agent memory is complemented by several key components, including LLM (large language model) as the agent's brain, planning, reflection, and memory mechanisms. The design of generative agents combines LLM with memory, planning, and reflection mechanisms to enable agents to behave conditioned on past experiences and interact with other agents. The agent learns to call external APIs for missing information, including current information, code execution capability, access to proprietary information sources, and more. In summary, the agent memory works by recording and storing observations and events in natural language, allowing the agent to retrieve and use this information to inform its behavior.\n"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a554971a-e724-4c99-84d1-5d646ae4ac3e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6891d028-43ac-4a70-b2ad-6fbd3d937283",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"' Based on the given context, the answer to the question \"How does agent memory work?\" can be inferred as follows:\\n\\nAgent memory refers to the long-term memory module of an autonomous agent system, which records a comprehensive list of agents\\' experiences in natural language. Each element is an observation or event directly provided by the agent, and inter-agent communication can trigger new natural language statements. The retrieval model surfaces the context to inform the agent\\'s behavior according to relevance, recency, and importance.\\n\\nIn other words, the agent memory is a component of the autonomous agent system that stores and manages the agent\\'s experiences and observations in a long-term memory module, which is based on natural language processing and generation capabilities of a large language model (LLM). The memory is used to inform the agent\\'s behavior and decision-making, and it can be triggered by inter-agent communication.'"
"<generator object RemoteRunnable.stream at 0x1245d25f0>"
]
},
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langserve.client import RemoteRunnable\n",
"rag_app = RemoteRunnable('http://0.0.0.0:8001/rag_chroma_private/')\n",
"rag_app.invoke(\"How does agent memory work?\")"
"rag_app.stream(\"How does agent memory work?\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "888494ca-0509-4070-b36f-600a042f352c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Based on the provided context, agent memory is a long-term memory module that records a comprehensive list of agents' experiences in natural language. Each element is an observation, an event directly provided by the agent, and inter-agent communication can trigger new natural language statements. The memory module surfaces the context to inform the agent's behavior according to relevance, recency, and importance.\n"
]
}
],
"source": [
"\n",
"stream = \n",
"for i in stream:\n",
" print(i)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ff2169c9-dab2-41c4-8f38-1f8aebb16814",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting httpx_sse\n",
" Obtaining dependency information for httpx_sse from https://files.pythonhosted.org/packages/62/33/d35b4ccf8c1ac7266bd1d068c48f842d3c7392cca87e32751c79ee553d7a/httpx_sse-0.3.1-py3-none-any.whl.metadata\n",
" Using cached httpx_sse-0.3.1-py3-none-any.whl.metadata (8.6 kB)\n",
"Using cached httpx_sse-0.3.1-py3-none-any.whl (7.7 kB)\n",
"Installing collected packages: httpx_sse\n",
"Successfully installed httpx_sse-0.3.1\n"
]
}
],
"source": [
"! pip install httpx_sse"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d843f23-686a-4138-8a9d-087bb00b2e13",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

@ -6,7 +6,12 @@
"id": "681a5d1e",
"metadata": {},
"source": [
"## Connect to template"
"## Connect to template\n",
"\n",
"In `server.py`, set -\n",
"```\n",
"add_routes(app, chain_ext, path=\"/rag_pinecone_multi_query\")\n",
"```"
]
},
{

@ -5,7 +5,12 @@
"id": "681a5d1e",
"metadata": {},
"source": [
"## Connect to template"
"## Connect to template\n",
"\n",
"In `server.py`, set -\n",
"```\n",
"add_routes(app, chain_ext, path=\"/rag_pinecone\")\n",
"```"
]
},
{
@ -16,7 +21,7 @@
"outputs": [],
"source": [
"from langserve.client import RemoteRunnable\n",
"rag_app_pinecone = RemoteRunnable('http://localhost:8000/rag-pinecone')\n",
"rag_app_pinecone = RemoteRunnable('http://0.0.0.0:8001/rag_pinecone')\n",
"rag_app_pinecone.invoke(\"How does agent memory work?\")"
]
}

Loading…
Cancel
Save