AWS Bedrock RAG template (#12450)

2024-10-31 15:20:26 +00:00 · 2023-10-27 13:15:54 -07:00 · 2023-10-27 13:15:54 -07:00 · 5c564e62e1
commit 5c564e62e1
parent 5d40e36c75
11 changed files with 2602 additions and 9 deletions
--- a/templates/rag-aws-bedrock/LICENSE
+++ b/templates/rag-aws-bedrock/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 LangChain, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/templates/rag-aws-bedrock/README.md
+++ b/templates/rag-aws-bedrock/README.md
@ -0,0 +1,29 @@
+# RAG AWS Bedrock
+
+AWS Bedrock is a managed serve that offers a set of foundation models.
+
+Here we will use `Anthropic Claude` for text generation and `Amazon Titan` for text embedding.
+
+We will use Pinecode as our vectorstore.
+
+(See [this notebook](https://github.com/aws-samples/amazon-bedrock-workshop/blob/main/03_QuestionAnswering/01_qa_w_rag_claude.ipynb) for additional context on the RAG pipeline.)
+
+(See [this notebook](https://github.com/aws-samples/amazon-bedrock-workshop/blob/58f238a183e7e629c9ae11dd970393af4e64ec44/00_Intro/bedrock_boto3_setup.ipynb#Prerequisites) for additional context on setup.)
+
+##  Pinecone
+
+This connects to a hosted Pinecone vectorstore.
+
+Be sure that you have set a few env variables in `chain.py`:
+
+* `PINECONE_API_KEY`
+* `PINECONE_ENV`
+* `index_name`
+
+##  LLM and Embeddings
+
+Be sure to set AWS enviorment variables:
+
+* `AWS_DEFAULT_REGION`
+# `AWS_PROFILE`
+* `BEDROCK_ASSUME_ROLE`
--- a/templates/rag-aws-bedrock/poetry.lock
+++ b/templates/rag-aws-bedrock/poetry.lock
--- a/templates/rag-aws-bedrock/pyproject.toml
+++ b/templates/rag-aws-bedrock/pyproject.toml
@ -0,0 +1,24 @@
+[tool.poetry]
+name = "rag-aws-bedrock"
+version = "0.1.0"
+description = ""
+authors = ["Lance Martin <lance@langchain.dev>"]
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+langchain = ">=0.0.313, <0.1"
+openai = ">=0.28.1"
+tiktoken = ">=0.5.1"
+pinecone-client = ">=2.2.4"
+boto3 = ">=1.28.57" 
+awscli = ">=1.29.57" 
+botocore = ">=1.31.57"
+
+[tool.langserve]
+export_module = "rag_aws_bedrock"
+export_attr = "chain"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
--- a/templates/rag-aws-bedrock/rag_aws_bedrock.ipynb
+++ b/templates/rag-aws-bedrock/rag_aws_bedrock.ipynb
@ -0,0 +1,50 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "681a5d1e",
+   "metadata": {},
+   "source": [
+    "## Connect to template\n",
+    "\n",
+    "In `server.py`, set -\n",
+    "```\n",
+    "add_routes(app, chain_ext, path=\"/rag_aws_bedrock\")\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d774be2a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langserve.client import RemoteRunnable\n",
+    "rag_app_pinecone = RemoteRunnable('http://0.0.0.0:8001/rag_aws_bedrock')\n",
+    "rag_app_pinecone.invoke(\"What are the different types of agent memory\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/templates/rag-aws-bedrock/rag_aws_bedrock/init.py
+++ b/templates/rag-aws-bedrock/rag_aws_bedrock/init.py
@ -0,0 +1,3 @@
+from rag_aws_bedrock.chain import chain
+
+__all__ = ["chain"]
--- a/templates/rag-aws-bedrock/rag_aws_bedrock/chain.py
+++ b/templates/rag-aws-bedrock/rag_aws_bedrock/chain.py
@ -0,0 +1,73 @@
+import os
+
+from langchain.embeddings import BedrockEmbeddings
+from langchain.llms.bedrock import Bedrock
+from langchain.prompts import ChatPromptTemplate
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
+from langchain.vectorstores import Pinecone
+from utils import bedrock
+
+if os.environ.get("PINECONE_API_KEY", None) is None:
+    raise Exception("Missing `PINECONE_API_KEY` environment variable.")
+
+if os.environ.get("PINECONE_ENVIRONMENT", None) is None:
+    raise Exception("Missing `PINECONE_ENVIRONMENT` environment variable.")
+
+if os.environ.get("AWS_DEFAULT_REGION", None) is None:
+    raise Exception("Missing `AWS_DEFAULT_REGION` environment variable.")
+
+if os.environ.get("AWS_PROFILE", None) is None:
+    raise Exception("Missing `AWS_PROFILE` environment variable.")
+
+if os.environ.get("BEDROCK_ASSUME_ROLE", None) is None:
+    raise Exception("Missing `BEDROCK_ASSUME_ROLE` environment variable.")
+
+PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX", "langchain-test")
+
+### Ingest code - you may need to run this the first time
+# Load
+# from langchain.document_loaders import WebBaseLoader
+# loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
+# data = loader.load()
+
+# # Split
+# from langchain.text_splitter import RecursiveCharacterTextSplitter
+# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
+# all_splits = text_splitter.split_documents(data)
+
+# # Add to vectorDB
+# vectorstore = Pinecone.from_documents(
+#     documents=all_splits, embedding=OpenAIEmbeddings(), index_name=PINECONE_INDEX_NAME
+# )
+# retriever = vectorstore.as_retriever()
+
+# Set LLM and embeddings
+boto3_bedrock = bedrock.get_bedrock_client(
+    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
+    region=os.environ.get("AWS_DEFAULT_REGION", None)
+)
+model = Bedrock(model_id="anthropic.claude-v2", 
+                client=boto3_bedrock, 
+                model_kwargs={'max_tokens_to_sample':200})
+bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", 
+                                       client=boto3_bedrock)
+
+# Set vectostore
+vectorstore = Pinecone.from_existing_index(PINECONE_INDEX_NAME, bedrock_embeddings)
+retriever = vectorstore.as_retriever()
+
+# RAG prompt
+template = """Answer the question based only on the following context:
+{context}
+Question: {question}
+"""
+prompt = ChatPromptTemplate.from_template(template)
+
+# RAG
+chain = (
+    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
+    | prompt
+    | model
+    | StrOutputParser()
+)
--- a/templates/rag-aws-bedrock/tests/init.py
+++ b/templates/rag-aws-bedrock/tests/init.py
--- a/templates/rag-chroma-private/rag_chroma_private.ipynb
+++ b/templates/rag-chroma-private/rag_chroma_private.ipynb
@ -16,25 +16,112 @@
  {
   "cell_type": "code",
   "execution_count": 1,
-   "id": "888494ca-0509-4070-b36f-600a042f352c",
+   "id": "78fb41d3-d2aa-40a6-b144-491f38a7cf88",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langserve.client import RemoteRunnable\n",
+    "rag_app = RemoteRunnable('http://0.0.0.0:8001/rag_chroma_private/')\n",
+    "for item in rag_app.stream(\"How does agent memory work?\"):\n",
+    "    print(item)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "ce39d358-1934-4404-bd3e-3fd497974aff",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " Based on the provided context, agent memory is a long-term memory module that records a comprehensive list of agents' experiences in natural language. Each element is an observation or event directly provided by the agent, and inter-agent communication can trigger new natural language statements. The agent memory is complemented by several key components, including LLM (large language model) as the agent's brain, planning, reflection, and memory mechanisms. The design of generative agents combines LLM with memory, planning, and reflection mechanisms to enable agents to behave conditioned on past experiences and interact with other agents. The agent learns to call external APIs for missing information, including current information, code execution capability, access to proprietary information sources, and more. In summary, the agent memory works by recording and storing observations and events in natural language, allowing the agent to retrieve and use this information to inform its behavior.\n"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a554971a-e724-4c99-84d1-5d646ae4ac3e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "6891d028-43ac-4a70-b2ad-6fbd3d937283",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "' Based on the given context, the answer to the question \"How does agent memory work?\" can be inferred as follows:\\n\\nAgent memory refers to the long-term memory module of an autonomous agent system, which records a comprehensive list of agents\\' experiences in natural language. Each element is an observation or event directly provided by the agent, and inter-agent communication can trigger new natural language statements. The retrieval model surfaces the context to inform the agent\\'s behavior according to relevance, recency, and importance.\\n\\nIn other words, the agent memory is a component of the autonomous agent system that stores and manages the agent\\'s experiences and observations in a long-term memory module, which is based on natural language processing and generation capabilities of a large language model (LLM). The memory is used to inform the agent\\'s behavior and decision-making, and it can be triggered by inter-agent communication.'"
+       "<generator object RemoteRunnable.stream at 0x1245d25f0>"
      ]
     },
-     "execution_count": 1,
+     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "from langserve.client import RemoteRunnable\n",
-    "rag_app = RemoteRunnable('http://0.0.0.0:8001/rag_chroma_private/')\n",
-    "rag_app.invoke(\"How does agent memory work?\")"
+    "rag_app.stream(\"How does agent memory work?\")"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "888494ca-0509-4070-b36f-600a042f352c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " Based on the provided context, agent memory is a long-term memory module that records a comprehensive list of agents' experiences in natural language. Each element is an observation, an event directly provided by the agent, and inter-agent communication can trigger new natural language statements. The memory module surfaces the context to inform the agent's behavior according to relevance, recency, and importance.\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "stream = \n",
+    "for i in stream:\n",
+    "    print(i)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ff2169c9-dab2-41c4-8f38-1f8aebb16814",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting httpx_sse\n",
+      "  Obtaining dependency information for httpx_sse from https://files.pythonhosted.org/packages/62/33/d35b4ccf8c1ac7266bd1d068c48f842d3c7392cca87e32751c79ee553d7a/httpx_sse-0.3.1-py3-none-any.whl.metadata\n",
+      "  Using cached httpx_sse-0.3.1-py3-none-any.whl.metadata (8.6 kB)\n",
+      "Using cached httpx_sse-0.3.1-py3-none-any.whl (7.7 kB)\n",
+      "Installing collected packages: httpx_sse\n",
+      "Successfully installed httpx_sse-0.3.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "! pip install httpx_sse"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d843f23-686a-4138-8a9d-087bb00b2e13",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
--- a/templates/rag-pinecone-multi-query/rag_pinecone_multi_query.ipynb
+++ b/templates/rag-pinecone-multi-query/rag_pinecone_multi_query.ipynb
@ -6,7 +6,12 @@
   "id": "681a5d1e",
   "metadata": {},
   "source": [
-    "## Connect to template"
+    "## Connect to template\n",
+    "\n",
+    "In `server.py`, set -\n",
+    "```\n",
+    "add_routes(app, chain_ext, path=\"/rag_pinecone_multi_query\")\n",
+    "```"
   ]
  },
  {
--- a/templates/rag-pinecone/rag_pinecone.ipynb
+++ b/templates/rag-pinecone/rag_pinecone.ipynb
@ -5,7 +5,12 @@
   "id": "681a5d1e",
   "metadata": {},
   "source": [
-    "## Connect to template"
+    "## Connect to template\n",
+    "\n",
+    "In `server.py`, set -\n",
+    "```\n",
+    "add_routes(app, chain_ext, path=\"/rag_pinecone\")\n",
+    "```"
   ]
  },
  {
@ -16,7 +21,7 @@
   "outputs": [],
   "source": [
    "from langserve.client import RemoteRunnable\n",
-    "rag_app_pinecone = RemoteRunnable('http://localhost:8000/rag-pinecone')\n",
+    "rag_app_pinecone = RemoteRunnable('http://0.0.0.0:8001/rag_pinecone')\n",
    "rag_app_pinecone.invoke(\"How does agent memory work?\")"
   ]
  }