mirror of https://github.com/hwchase17/langchain
Kay retriever (#10657)
- **Description**: Adding retrievers for [kay.ai](https://kay.ai) and SEC filings powered by Kay and Cybersyn. Kay provides context as a service: it's an API built for RAG. - **Issue**: N/A - **Dependencies**: Just added a dep to the [kay](https://pypi.org/project/kay/) package - **Tag maintainer**: @baskaryan @hwchase17 Discussed in slack - **Twtter handle:** [@vishalrohra_](https://twitter.com/vishalrohra_) --------- Co-authored-by: Bagatur <baskaryan@gmail.com>pull/11051/head
parent
5f13668fa0
commit
89ef440c14
@ -1,156 +1,159 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "a634365e",
|
"id": "a634365e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# AWS S3 Directory\n",
|
"# AWS S3 Directory\n",
|
||||||
"\n",
|
"\n",
|
||||||
">[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service\n",
|
">[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service\n",
|
||||||
"\n",
|
"\n",
|
||||||
">[AWS S3 Directory](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html)\n",
|
">[AWS S3 Directory](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This covers how to load document objects from an `AWS S3 Directory` object."
|
"This covers how to load document objects from an `AWS S3 Directory` object."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "49815096",
|
"id": "49815096",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"#!pip install boto3"
|
"#!pip install boto3"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 2,
|
||||||
"id": "2f0cd6a5",
|
"id": "2f0cd6a5",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders import S3DirectoryLoader"
|
"from langchain.document_loaders import S3DirectoryLoader"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 3,
|
||||||
"id": "321cc7f1",
|
"id": "321cc7f1",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = S3DirectoryLoader(\"testing-hwc\")"
|
"loader = S3DirectoryLoader(\"testing-hwc\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "2b11d155",
|
"id": "2b11d155",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader.load()"
|
"loader.load()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "0690c40a",
|
"id": "0690c40a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Specifying a prefix\n",
|
"## Specifying a prefix\n",
|
||||||
"You can also specify a prefix for more finegrained control over what files to load."
|
"You can also specify a prefix for more finegrained control over what files to load."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 5,
|
||||||
"id": "72d44781",
|
"id": "72d44781",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = S3DirectoryLoader(\"testing-hwc\", prefix=\"fake\")"
|
"loader = S3DirectoryLoader(\"testing-hwc\", prefix=\"fake\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 6,
|
|
||||||
"id": "2d3c32db",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
{
|
||||||
"data": {
|
"cell_type": "code",
|
||||||
"text/plain": [
|
"execution_count": 6,
|
||||||
"[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 's3://testing-hwc/fake.docx'}, lookup_index=0)]"
|
"id": "2d3c32db",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 's3://testing-hwc/fake.docx'}, lookup_index=0)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loader.load()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 6,
|
{
|
||||||
"metadata": {},
|
"cell_type": "markdown",
|
||||||
"output_type": "execute_result"
|
"source": [
|
||||||
|
"## Configuring the AWS Boto3 client\n",
|
||||||
|
"You can configure the AWS [Boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client by passing\n",
|
||||||
|
"named arguments when creating the S3DirectoryLoader.\n",
|
||||||
|
"This is useful for instance when AWS credentials can't be set as environment variables.\n",
|
||||||
|
"See the [list of parameters](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session) that can be configured."
|
||||||
|
],
|
||||||
|
"metadata": {},
|
||||||
|
"id": "91a7ac07"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"loader = S3DirectoryLoader(\"testing-hwc\", aws_access_key_id=\"xxxx\", aws_secret_access_key=\"yyyy\")"
|
||||||
|
],
|
||||||
|
"metadata": {},
|
||||||
|
"id": "f485ec8c"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"loader.load()"
|
||||||
|
],
|
||||||
|
"metadata": {},
|
||||||
|
"id": "c0fa76ae"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.6"
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"loader.load()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"source": [
|
|
||||||
"## Configuring the AWS Boto3 client\n",
|
|
||||||
"You can configure the AWS [Boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client by passing\n",
|
|
||||||
"named arguments when creating the S3DirectoryLoader.\n",
|
|
||||||
"This is useful for instance when AWS credentials can't be set as environment variables.\n",
|
|
||||||
"See the [list of parameters](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session) that can be configured."
|
|
||||||
],
|
|
||||||
"metadata": {}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"loader = S3DirectoryLoader(\"testing-hwc\", aws_access_key_id=\"xxxx\", aws_secret_access_key=\"yyyy\")"
|
|
||||||
],
|
|
||||||
"metadata": {}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"loader.load()"
|
|
||||||
],
|
|
||||||
"metadata": {}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
},
|
||||||
"language_info": {
|
"nbformat": 4,
|
||||||
"codemirror_mode": {
|
"nbformat_minor": 5
|
||||||
"name": "ipython",
|
}
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.10.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
@ -1,121 +1,122 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "66a7777e",
|
"id": "66a7777e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# AWS S3 File\n",
|
"# AWS S3 File\n",
|
||||||
"\n",
|
"\n",
|
||||||
">[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service.\n",
|
">[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service.\n",
|
||||||
"\n",
|
"\n",
|
||||||
">[AWS S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html)\n",
|
">[AWS S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This covers how to load document objects from an `AWS S3 File` object."
|
"This covers how to load document objects from an `AWS S3 File` object."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 1,
|
||||||
"id": "9ec8a3b3",
|
"id": "9ec8a3b3",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders import S3FileLoader"
|
"from langchain.document_loaders import S3FileLoader"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 2,
|
||||||
"id": "43128d8d",
|
"id": "43128d8d",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"#!pip install boto3"
|
"#!pip install boto3"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 8,
|
|
||||||
"id": "35d6809a",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"loader = S3FileLoader(\"testing-hwc\", \"fake.docx\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 9,
|
|
||||||
"id": "efd6be84",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
{
|
||||||
"data": {
|
"cell_type": "code",
|
||||||
"text/plain": [
|
"execution_count": 8,
|
||||||
"[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 's3://testing-hwc/fake.docx'}, lookup_index=0)]"
|
"id": "35d6809a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"loader = S3FileLoader(\"testing-hwc\", \"fake.docx\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 9,
|
{
|
||||||
"metadata": {},
|
"cell_type": "code",
|
||||||
"output_type": "execute_result"
|
"execution_count": 9,
|
||||||
|
"id": "efd6be84",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 's3://testing-hwc/fake.docx'}, lookup_index=0)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loader.load()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "93689594",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configuring the AWS Boto3 client\n",
|
||||||
|
"You can configure the AWS [Boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client by passing\n",
|
||||||
|
"named arguments when creating the S3DirectoryLoader.\n",
|
||||||
|
"This is useful for instance when AWS credentials can't be set as environment variables.\n",
|
||||||
|
"See the [list of parameters](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session) that can be configured."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"loader = S3FileLoader(\"testing-hwc\", \"fake.docx\", aws_access_key_id=\"xxxx\", aws_secret_access_key=\"yyyy\")"
|
||||||
|
],
|
||||||
|
"metadata": {},
|
||||||
|
"id": "43106ee8"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"loader.load()"
|
||||||
|
],
|
||||||
|
"metadata": {},
|
||||||
|
"id": "1764a727"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.6"
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"loader.load()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "93689594",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configuring the AWS Boto3 client\n",
|
|
||||||
"You can configure the AWS [Boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client by passing\n",
|
|
||||||
"named arguments when creating the S3DirectoryLoader.\n",
|
|
||||||
"This is useful for instance when AWS credentials can't be set as environment variables.\n",
|
|
||||||
"See the [list of parameters](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session) that can be configured."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"loader = S3FileLoader(\"testing-hwc\", \"fake.docx\", aws_access_key_id=\"xxxx\", aws_secret_access_key=\"yyyy\")"
|
|
||||||
],
|
|
||||||
"metadata": {}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"loader.load()"
|
|
||||||
],
|
|
||||||
"metadata": {}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
},
|
||||||
"language_info": {
|
"nbformat": 4,
|
||||||
"codemirror_mode": {
|
"nbformat_minor": 5
|
||||||
"name": "ipython",
|
}
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.10.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
|
|
@ -1,350 +1,352 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "91c6a7ef",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Dynamodb Chat Message History\n",
|
|
||||||
"\n",
|
|
||||||
"This notebook goes over how to use Dynamodb to store chat message history."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "3f608be0",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"First make sure you have correctly configured the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html). Then make sure you have installed boto3."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "030d784f",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Next, create the DynamoDB Table where we will be storing messages:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 10,
|
|
||||||
"id": "93ce1811",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"cell_type": "markdown",
|
||||||
"output_type": "stream",
|
"id": "91c6a7ef",
|
||||||
"text": [
|
"metadata": {},
|
||||||
"0\n"
|
"source": [
|
||||||
]
|
"# Dynamodb Chat Message History\n",
|
||||||
}
|
"\n",
|
||||||
],
|
"This notebook goes over how to use Dynamodb to store chat message history."
|
||||||
"source": [
|
]
|
||||||
"import boto3\n",
|
},
|
||||||
"\n",
|
|
||||||
"# Get the service resource.\n",
|
|
||||||
"dynamodb = boto3.resource(\"dynamodb\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Create the DynamoDB table.\n",
|
|
||||||
"table = dynamodb.create_table(\n",
|
|
||||||
" TableName=\"SessionTable\",\n",
|
|
||||||
" KeySchema=[{\"AttributeName\": \"SessionId\", \"KeyType\": \"HASH\"}],\n",
|
|
||||||
" AttributeDefinitions=[{\"AttributeName\": \"SessionId\", \"AttributeType\": \"S\"}],\n",
|
|
||||||
" BillingMode=\"PAY_PER_REQUEST\",\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"# Wait until the table exists.\n",
|
|
||||||
"table.meta.client.get_waiter(\"table_exists\").wait(TableName=\"SessionTable\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Print out some data about the table.\n",
|
|
||||||
"print(table.item_count)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "1a9b310b",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## DynamoDBChatMessageHistory"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 11,
|
|
||||||
"id": "d15e3302",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.memory.chat_message_histories import DynamoDBChatMessageHistory\n",
|
|
||||||
"\n",
|
|
||||||
"history = DynamoDBChatMessageHistory(table_name=\"SessionTable\", session_id=\"0\")\n",
|
|
||||||
"\n",
|
|
||||||
"history.add_user_message(\"hi!\")\n",
|
|
||||||
"\n",
|
|
||||||
"history.add_ai_message(\"whats up?\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 12,
|
|
||||||
"id": "64fc465e",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
{
|
||||||
"data": {
|
"cell_type": "markdown",
|
||||||
"text/plain": "[HumanMessage(content='hi!', additional_kwargs={}, example=False),\n AIMessage(content='whats up?', additional_kwargs={}, example=False),\n HumanMessage(content='hi!', additional_kwargs={}, example=False),\n AIMessage(content='whats up?', additional_kwargs={}, example=False)]"
|
"id": "3f608be0",
|
||||||
},
|
"metadata": {},
|
||||||
"execution_count": 12,
|
"source": [
|
||||||
"metadata": {},
|
"First make sure you have correctly configured the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html). Then make sure you have installed boto3."
|
||||||
"output_type": "execute_result"
|
]
|
||||||
}
|
},
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"history.messages"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "955f1b15",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## DynamoDBChatMessageHistory with Custom Endpoint URL\n",
|
|
||||||
"\n",
|
|
||||||
"Sometimes it is useful to specify the URL to the AWS endpoint to connect to. For instance, when you are running locally against [Localstack](https://localstack.cloud/). For those cases you can specify the URL via the `endpoint_url` parameter in the constructor."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 13,
|
|
||||||
"id": "225713c8",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.memory.chat_message_histories import DynamoDBChatMessageHistory\n",
|
|
||||||
"\n",
|
|
||||||
"history = DynamoDBChatMessageHistory(\n",
|
|
||||||
" table_name=\"SessionTable\",\n",
|
|
||||||
" session_id=\"0\",\n",
|
|
||||||
" endpoint_url=\"http://localhost.localstack.cloud:4566\",\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"source": [
|
|
||||||
"## DynamoDBChatMessageHistory With Different Keys Composite Keys\n",
|
|
||||||
"The default key for DynamoDBChatMessageHistory is ```{\"SessionId\": self.session_id}```, but you can modify this to match your table design.\n",
|
|
||||||
"\n",
|
|
||||||
"### Primary Key Name\n",
|
|
||||||
"You may modify the primary key by passing in a primary_key_name value in the constructor, resulting in the following:\n",
|
|
||||||
"```{self.primary_key_name: self.session_id}```\n",
|
|
||||||
"\n",
|
|
||||||
"### Composite Keys\n",
|
|
||||||
"When using an existing DynamoDB table, you may need to modify the key structure from the default of to something including a Sort Key. To do this you may use the ```key``` parameter.\n",
|
|
||||||
"\n",
|
|
||||||
"Passing a value for key will override the primary_key parameter, and the resulting key structure will be the passed value.\n"
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 14,
|
|
||||||
"outputs": [
|
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"cell_type": "markdown",
|
||||||
"output_type": "stream",
|
"id": "030d784f",
|
||||||
"text": [
|
"metadata": {},
|
||||||
"0\n"
|
"source": [
|
||||||
]
|
"Next, create the DynamoDB Table where we will be storing messages:"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"data": {
|
"cell_type": "code",
|
||||||
"text/plain": "[HumanMessage(content='hello, composite dynamodb table!', additional_kwargs={}, example=False)]"
|
"execution_count": 10,
|
||||||
},
|
"id": "93ce1811",
|
||||||
"execution_count": 14,
|
"metadata": {},
|
||||||
"metadata": {},
|
"outputs": [
|
||||||
"output_type": "execute_result"
|
{
|
||||||
}
|
"name": "stdout",
|
||||||
],
|
"output_type": "stream",
|
||||||
"source": [
|
"text": [
|
||||||
"from langchain.memory.chat_message_histories import DynamoDBChatMessageHistory\n",
|
"0\n"
|
||||||
"\n",
|
]
|
||||||
"composite_table = dynamodb.create_table(\n",
|
}
|
||||||
" TableName=\"CompositeTable\",\n",
|
],
|
||||||
" KeySchema=[{\"AttributeName\": \"PK\", \"KeyType\": \"HASH\"}, {\"AttributeName\": \"SK\", \"KeyType\": \"RANGE\"}],\n",
|
"source": [
|
||||||
" AttributeDefinitions=[{\"AttributeName\": \"PK\", \"AttributeType\": \"S\"}, {\"AttributeName\": \"SK\", \"AttributeType\": \"S\"}],\n",
|
"import boto3\n",
|
||||||
" BillingMode=\"PAY_PER_REQUEST\",\n",
|
"\n",
|
||||||
")\n",
|
"# Get the service resource.\n",
|
||||||
"\n",
|
"dynamodb = boto3.resource(\"dynamodb\")\n",
|
||||||
"# Wait until the table exists.\n",
|
"\n",
|
||||||
"composite_table.meta.client.get_waiter(\"table_exists\").wait(TableName=\"CompositeTable\")\n",
|
"# Create the DynamoDB table.\n",
|
||||||
"\n",
|
"table = dynamodb.create_table(\n",
|
||||||
"# Print out some data about the table.\n",
|
" TableName=\"SessionTable\",\n",
|
||||||
"print(composite_table.item_count)\n",
|
" KeySchema=[{\"AttributeName\": \"SessionId\", \"KeyType\": \"HASH\"}],\n",
|
||||||
"\n",
|
" AttributeDefinitions=[{\"AttributeName\": \"SessionId\", \"AttributeType\": \"S\"}],\n",
|
||||||
"my_key = {\n",
|
" BillingMode=\"PAY_PER_REQUEST\",\n",
|
||||||
" \"PK\": \"session_id::0\",\n",
|
")\n",
|
||||||
" \"SK\": \"langchain_history\",\n",
|
"\n",
|
||||||
"}\n",
|
"# Wait until the table exists.\n",
|
||||||
"\n",
|
"table.meta.client.get_waiter(\"table_exists\").wait(TableName=\"SessionTable\")\n",
|
||||||
"composite_key_history = DynamoDBChatMessageHistory(\n",
|
"\n",
|
||||||
" table_name=\"CompositeTable\",\n",
|
"# Print out some data about the table.\n",
|
||||||
" session_id=\"0\",\n",
|
"print(table.item_count)"
|
||||||
" endpoint_url=\"http://localhost.localstack.cloud:4566\",\n",
|
]
|
||||||
" key=my_key,\n",
|
},
|
||||||
")\n",
|
{
|
||||||
"\n",
|
"cell_type": "markdown",
|
||||||
"composite_key_history.add_user_message(\"hello, composite dynamodb table!\")\n",
|
"id": "1a9b310b",
|
||||||
"\n",
|
"metadata": {},
|
||||||
"composite_key_history.messages"
|
"source": [
|
||||||
],
|
"## DynamoDBChatMessageHistory"
|
||||||
"metadata": {
|
]
|
||||||
"collapsed": false
|
},
|
||||||
}
|
{
|
||||||
},
|
"cell_type": "code",
|
||||||
{
|
"execution_count": 11,
|
||||||
"attachments": {},
|
"id": "d15e3302",
|
||||||
"cell_type": "markdown",
|
"metadata": {},
|
||||||
"id": "3b33c988",
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"source": [
|
"from langchain.memory.chat_message_histories import DynamoDBChatMessageHistory\n",
|
||||||
"## Agent with DynamoDB Memory"
|
"\n",
|
||||||
]
|
"history = DynamoDBChatMessageHistory(table_name=\"SessionTable\", session_id=\"0\")\n",
|
||||||
},
|
"\n",
|
||||||
{
|
"history.add_user_message(\"hi!\")\n",
|
||||||
"cell_type": "code",
|
"\n",
|
||||||
"execution_count": 15,
|
"history.add_ai_message(\"whats up?\")"
|
||||||
"id": "f92d9499",
|
]
|
||||||
"metadata": {},
|
},
|
||||||
"outputs": [],
|
{
|
||||||
"source": [
|
"cell_type": "code",
|
||||||
"from langchain.agents import Tool\n",
|
"execution_count": 12,
|
||||||
"from langchain.memory import ConversationBufferMemory\n",
|
"id": "64fc465e",
|
||||||
"from langchain.chat_models import ChatOpenAI\n",
|
"metadata": {},
|
||||||
"from langchain.agents import initialize_agent\n",
|
"outputs": [
|
||||||
"from langchain.agents import AgentType\n",
|
{
|
||||||
"from langchain.utilities import PythonREPL\n",
|
"data": {
|
||||||
"from getpass import getpass\n",
|
"text/plain": "[HumanMessage(content='hi!', additional_kwargs={}, example=False),\n AIMessage(content='whats up?', additional_kwargs={}, example=False),\n HumanMessage(content='hi!', additional_kwargs={}, example=False),\n AIMessage(content='whats up?', additional_kwargs={}, example=False)]"
|
||||||
"\n",
|
},
|
||||||
"message_history = DynamoDBChatMessageHistory(table_name=\"SessionTable\", session_id=\"1\")\n",
|
"execution_count": 12,
|
||||||
"memory = ConversationBufferMemory(\n",
|
"metadata": {},
|
||||||
" memory_key=\"chat_history\", chat_memory=message_history, return_messages=True\n",
|
"output_type": "execute_result"
|
||||||
")"
|
}
|
||||||
]
|
],
|
||||||
},
|
"source": [
|
||||||
{
|
"history.messages"
|
||||||
"cell_type": "code",
|
]
|
||||||
"execution_count": 16,
|
},
|
||||||
"id": "1167eeba",
|
{
|
||||||
"metadata": {},
|
"cell_type": "markdown",
|
||||||
"outputs": [],
|
"id": "955f1b15",
|
||||||
"source": [
|
"metadata": {},
|
||||||
"python_repl = PythonREPL()\n",
|
"source": [
|
||||||
"\n",
|
"## DynamoDBChatMessageHistory with Custom Endpoint URL\n",
|
||||||
"# You can create the tool to pass to an agent\n",
|
"\n",
|
||||||
"tools = [\n",
|
"Sometimes it is useful to specify the URL to the AWS endpoint to connect to. For instance, when you are running locally against [Localstack](https://localstack.cloud/). For those cases you can specify the URL via the `endpoint_url` parameter in the constructor."
|
||||||
" Tool(\n",
|
]
|
||||||
" name=\"python_repl\",\n",
|
},
|
||||||
" description=\"A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.\",\n",
|
{
|
||||||
" func=python_repl.run,\n",
|
"cell_type": "code",
|
||||||
" )\n",
|
"execution_count": 13,
|
||||||
"]"
|
"id": "225713c8",
|
||||||
]
|
"metadata": {},
|
||||||
},
|
"outputs": [],
|
||||||
{
|
"source": [
|
||||||
"cell_type": "code",
|
"from langchain.memory.chat_message_histories import DynamoDBChatMessageHistory\n",
|
||||||
"execution_count": 17,
|
"\n",
|
||||||
"id": "fce085c5",
|
"history = DynamoDBChatMessageHistory(\n",
|
||||||
"metadata": {},
|
" table_name=\"SessionTable\",\n",
|
||||||
"outputs": [
|
" session_id=\"0\",\n",
|
||||||
|
" endpoint_url=\"http://localhost.localstack.cloud:4566\",\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"## DynamoDBChatMessageHistory With Different Keys Composite Keys\n",
|
||||||
|
"The default key for DynamoDBChatMessageHistory is ```{\"SessionId\": self.session_id}```, but you can modify this to match your table design.\n",
|
||||||
|
"\n",
|
||||||
|
"### Primary Key Name\n",
|
||||||
|
"You may modify the primary key by passing in a primary_key_name value in the constructor, resulting in the following:\n",
|
||||||
|
"```{self.primary_key_name: self.session_id}```\n",
|
||||||
|
"\n",
|
||||||
|
"### Composite Keys\n",
|
||||||
|
"When using an existing DynamoDB table, you may need to modify the key structure from the default of to something including a Sort Key. To do this you may use the ```key``` parameter.\n",
|
||||||
|
"\n",
|
||||||
|
"Passing a value for key will override the primary_key parameter, and the resulting key structure will be the passed value.\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"id": "c9bc0693"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": "[HumanMessage(content='hello, composite dynamodb table!', additional_kwargs={}, example=False)]"
|
||||||
|
},
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from langchain.memory.chat_message_histories import DynamoDBChatMessageHistory\n",
|
||||||
|
"\n",
|
||||||
|
"composite_table = dynamodb.create_table(\n",
|
||||||
|
" TableName=\"CompositeTable\",\n",
|
||||||
|
" KeySchema=[{\"AttributeName\": \"PK\", \"KeyType\": \"HASH\"}, {\"AttributeName\": \"SK\", \"KeyType\": \"RANGE\"}],\n",
|
||||||
|
" AttributeDefinitions=[{\"AttributeName\": \"PK\", \"AttributeType\": \"S\"}, {\"AttributeName\": \"SK\", \"AttributeType\": \"S\"}],\n",
|
||||||
|
" BillingMode=\"PAY_PER_REQUEST\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Wait until the table exists.\n",
|
||||||
|
"composite_table.meta.client.get_waiter(\"table_exists\").wait(TableName=\"CompositeTable\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Print out some data about the table.\n",
|
||||||
|
"print(composite_table.item_count)\n",
|
||||||
|
"\n",
|
||||||
|
"my_key = {\n",
|
||||||
|
" \"PK\": \"session_id::0\",\n",
|
||||||
|
" \"SK\": \"langchain_history\",\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"composite_key_history = DynamoDBChatMessageHistory(\n",
|
||||||
|
" table_name=\"CompositeTable\",\n",
|
||||||
|
" session_id=\"0\",\n",
|
||||||
|
" endpoint_url=\"http://localhost.localstack.cloud:4566\",\n",
|
||||||
|
" key=my_key,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"composite_key_history.add_user_message(\"hello, composite dynamodb table!\")\n",
|
||||||
|
"\n",
|
||||||
|
"composite_key_history.messages"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"id": "a7fa0331"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"ename": "ValidationError",
|
"attachments": {},
|
||||||
"evalue": "1 validation error for ChatOpenAI\n__root__\n Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. (type=value_error)",
|
"cell_type": "markdown",
|
||||||
"output_type": "error",
|
"id": "3b33c988",
|
||||||
"traceback": [
|
"metadata": {},
|
||||||
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
"source": [
|
||||||
"\u001B[0;31mValidationError\u001B[0m Traceback (most recent call last)",
|
"## Agent with DynamoDB Memory"
|
||||||
"Cell \u001B[0;32mIn[17], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m llm \u001B[38;5;241m=\u001B[39m \u001B[43mChatOpenAI\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtemperature\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m0\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[1;32m 2\u001B[0m agent_chain \u001B[38;5;241m=\u001B[39m initialize_agent(\n\u001B[1;32m 3\u001B[0m tools,\n\u001B[1;32m 4\u001B[0m llm,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 7\u001B[0m memory\u001B[38;5;241m=\u001B[39mmemory,\n\u001B[1;32m 8\u001B[0m )\n",
|
]
|
||||||
"File \u001B[0;32m~/Documents/projects/langchain/libs/langchain/langchain/load/serializable.py:74\u001B[0m, in \u001B[0;36mSerializable.__init__\u001B[0;34m(self, **kwargs)\u001B[0m\n\u001B[1;32m 73\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m__init__\u001B[39m(\u001B[38;5;28mself\u001B[39m, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs: Any) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m---> 74\u001B[0m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[38;5;21;43m__init__\u001B[39;49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 75\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_lc_kwargs \u001B[38;5;241m=\u001B[39m kwargs\n",
|
},
|
||||||
"File \u001B[0;32m~/Documents/projects/langchain/.venv/lib/python3.9/site-packages/pydantic/main.py:341\u001B[0m, in \u001B[0;36mpydantic.main.BaseModel.__init__\u001B[0;34m()\u001B[0m\n",
|
{
|
||||||
"\u001B[0;31mValidationError\u001B[0m: 1 validation error for ChatOpenAI\n__root__\n Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. (type=value_error)"
|
"cell_type": "code",
|
||||||
]
|
"execution_count": 15,
|
||||||
|
"id": "f92d9499",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.agents import Tool\n",
|
||||||
|
"from langchain.memory import ConversationBufferMemory\n",
|
||||||
|
"from langchain.chat_models import ChatOpenAI\n",
|
||||||
|
"from langchain.agents import initialize_agent\n",
|
||||||
|
"from langchain.agents import AgentType\n",
|
||||||
|
"from langchain.utilities import PythonREPL\n",
|
||||||
|
"from getpass import getpass\n",
|
||||||
|
"\n",
|
||||||
|
"message_history = DynamoDBChatMessageHistory(table_name=\"SessionTable\", session_id=\"1\")\n",
|
||||||
|
"memory = ConversationBufferMemory(\n",
|
||||||
|
" memory_key=\"chat_history\", chat_memory=message_history, return_messages=True\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"id": "1167eeba",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"python_repl = PythonREPL()\n",
|
||||||
|
"\n",
|
||||||
|
"# You can create the tool to pass to an agent\n",
|
||||||
|
"tools = [\n",
|
||||||
|
" Tool(\n",
|
||||||
|
" name=\"python_repl\",\n",
|
||||||
|
" description=\"A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.\",\n",
|
||||||
|
" func=python_repl.run,\n",
|
||||||
|
" )\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"id": "fce085c5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "ValidationError",
|
||||||
|
"evalue": "1 validation error for ChatOpenAI\n__root__\n Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. (type=value_error)",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m llm \u001b[38;5;241m=\u001b[39m \u001b[43mChatOpenAI\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtemperature\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m agent_chain \u001b[38;5;241m=\u001b[39m initialize_agent(\n\u001b[1;32m 3\u001b[0m tools,\n\u001b[1;32m 4\u001b[0m llm,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 7\u001b[0m memory\u001b[38;5;241m=\u001b[39mmemory,\n\u001b[1;32m 8\u001b[0m )\n",
|
||||||
|
"File \u001b[0;32m~/Documents/projects/langchain/libs/langchain/langchain/load/serializable.py:74\u001b[0m, in \u001b[0;36mSerializable.__init__\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 74\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lc_kwargs \u001b[38;5;241m=\u001b[39m kwargs\n",
|
||||||
|
"File \u001b[0;32m~/Documents/projects/langchain/.venv/lib/python3.9/site-packages/pydantic/main.py:341\u001b[0m, in \u001b[0;36mpydantic.main.BaseModel.__init__\u001b[0;34m()\u001b[0m\n",
|
||||||
|
"\u001b[0;31mValidationError\u001b[0m: 1 validation error for ChatOpenAI\n__root__\n Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. (type=value_error)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"llm = ChatOpenAI(temperature=0)\n",
|
||||||
|
"agent_chain = initialize_agent(\n",
|
||||||
|
" tools,\n",
|
||||||
|
" llm,\n",
|
||||||
|
" agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,\n",
|
||||||
|
" verbose=True,\n",
|
||||||
|
" memory=memory,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "952a3103",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"agent_chain.run(input=\"Hello!\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "54c4aaf4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"agent_chain.run(input=\"Who owns Twitter?\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f9013118",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"agent_chain.run(input=\"My name is Bob.\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "405e5315",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"agent_chain.run(input=\"Who am I?\")\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.3"
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"llm = ChatOpenAI(temperature=0)\n",
|
|
||||||
"agent_chain = initialize_agent(\n",
|
|
||||||
" tools,\n",
|
|
||||||
" llm,\n",
|
|
||||||
" agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,\n",
|
|
||||||
" verbose=True,\n",
|
|
||||||
" memory=memory,\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "952a3103",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"agent_chain.run(input=\"Hello!\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "54c4aaf4",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"agent_chain.run(input=\"Who owns Twitter?\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "f9013118",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"agent_chain.run(input=\"My name is Bob.\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "405e5315",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"agent_chain.run(input=\"Who am I?\")\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
},
|
||||||
"language_info": {
|
"nbformat": 4,
|
||||||
"codemirror_mode": {
|
"nbformat_minor": 5
|
||||||
"name": "ipython",
|
}
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
@ -0,0 +1,165 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "263f914c-9d67-4316-8b3d-03c3b99ba9d8",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"SEC filings data\n",
|
||||||
|
"=\n",
|
||||||
|
"\n",
|
||||||
|
"SEC filings data powered by [Kay.ai](https://kay.ai) and [Cybersyn](https://www.cybersyn.com/).\n",
|
||||||
|
"\n",
|
||||||
|
">The SEC filing is a financial statement or other formal document submitted to the U.S. Securities and Exchange Commission (SEC). Public companies, certain insiders, and broker-dealers are required to make regular SEC filings. Investors and financial professionals rely on these filings for information about companies they are evaluating for investment purposes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "fc507b8e-ea51-417c-93da-42bf998a1195",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Setup\n",
|
||||||
|
"=\n",
|
||||||
|
"\n",
|
||||||
|
"First you will need to install the `kay` package. You will also need an API key: you can get one for free at [https://kay.ai](https://kay.ai/). Once you have an API key, you must set it as an environment variable `KAY_API_KEY`.\n",
|
||||||
|
"\n",
|
||||||
|
"In this example we're going to use the `KayAiRetriever`. Take a look at the [kay notebook](/docs/integrations/retrievers/kay) for more detailed information for the parmeters that it accepts.`"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "c923bea0-585a-4f62-8662-efc167e8d793",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Examples\n",
|
||||||
|
"=\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "f7b8c99c-0341-4f3c-912f-a11e98f7de71",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdin",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" ········\n",
|
||||||
|
" ········\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Setup API keys for Kay and OpenAI\n",
|
||||||
|
"from getpass import getpass\n",
|
||||||
|
"KAY_API_KEY = getpass()\n",
|
||||||
|
"OPENAI_API_KEY = getpass()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "04ee2d6b-c2ab-4e15-8a8b-afaf6ef8c0f6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"os.environ[\"KAY_API_KEY\"] = KAY_API_KEY\n",
|
||||||
|
"os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "0c504bcd-f6e0-4028-a797-b31fb4b6d027",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||||
|
"from langchain.chat_models import ChatOpenAI\n",
|
||||||
|
"from langchain.retrievers import KayAiRetriever\n",
|
||||||
|
"\n",
|
||||||
|
"model = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n",
|
||||||
|
"retriever = KayAiRetriever.create(dataset_id=\"company\", data_types=[\"10-K\", \"10-Q\"], num_contexts=6)\n",
|
||||||
|
"qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "977f158b-38d3-4b5f-9379-7cdd09436327",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"-> **Question**: What are patterns in Nvidia's spend over the past three quarters? \n",
|
||||||
|
"\n",
|
||||||
|
"**Answer**: Based on the provided information, here are the patterns in NVIDIA's spend over the past three quarters:\n",
|
||||||
|
"\n",
|
||||||
|
"1. Research and Development Expenses:\n",
|
||||||
|
" - Q3 2022: Increased by 34% compared to Q3 2021.\n",
|
||||||
|
" - Q1 2023: Increased by 40% compared to Q1 2022.\n",
|
||||||
|
" - Q2 2022: Increased by 25% compared to Q2 2021.\n",
|
||||||
|
" \n",
|
||||||
|
" Overall, research and development expenses have been consistently increasing over the past three quarters.\n",
|
||||||
|
"\n",
|
||||||
|
"2. Sales, General and Administrative Expenses:\n",
|
||||||
|
" - Q3 2022: Increased by 8% compared to Q3 2021.\n",
|
||||||
|
" - Q1 2023: Increased by 14% compared to Q1 2022.\n",
|
||||||
|
" - Q2 2022: Decreased by 16% compared to Q2 2021.\n",
|
||||||
|
" \n",
|
||||||
|
" The pattern for sales, general and administrative expenses is not as consistent, with some quarters showing an increase and others showing a decrease.\n",
|
||||||
|
"\n",
|
||||||
|
"3. Total Operating Expenses:\n",
|
||||||
|
" - Q3 2022: Increased by 25% compared to Q3 2021.\n",
|
||||||
|
" - Q1 2023: Increased by 113% compared to Q1 2022.\n",
|
||||||
|
" - Q2 2022: Increased by 9% compared to Q2 2021.\n",
|
||||||
|
" \n",
|
||||||
|
" Total operating expenses have generally been increasing over the past three quarters, with a significant increase in Q1 2023.\n",
|
||||||
|
"\n",
|
||||||
|
"Overall, the pattern indicates a consistent increase in research and development expenses and total operating expenses, while sales, general and administrative expenses show some fluctuations. \n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"questions = [\n",
|
||||||
|
" \"What are patterns in Nvidia's spend over the past three quarters?\",\n",
|
||||||
|
" #\"What are some recent challenges faced by the renewable energy sector?\",\n",
|
||||||
|
"]\n",
|
||||||
|
"chat_history = []\n",
|
||||||
|
"\n",
|
||||||
|
"for question in questions:\n",
|
||||||
|
" result = qa({\"question\": question, \"chat_history\": chat_history})\n",
|
||||||
|
" chat_history.append((question, result[\"answer\"]))\n",
|
||||||
|
" print(f\"-> **Question**: {question} \\n\")\n",
|
||||||
|
" print(f\"**Answer**: {result['answer']} \\n\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.18"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,59 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, List
|
||||||
|
|
||||||
|
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||||
|
from langchain.schema import BaseRetriever, Document
|
||||||
|
|
||||||
|
|
||||||
|
class KayAiRetriever(BaseRetriever):
|
||||||
|
"""
|
||||||
|
Retriever for Kay.ai datasets.
|
||||||
|
|
||||||
|
To work properly, expects you to have KAY_API_KEY env variable set.
|
||||||
|
You can get one for free at https://kay.ai/.
|
||||||
|
"""
|
||||||
|
|
||||||
|
client: Any
|
||||||
|
num_contexts: int
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create(
|
||||||
|
cls,
|
||||||
|
dataset_id: str,
|
||||||
|
data_types: List[str],
|
||||||
|
num_contexts: int = 6,
|
||||||
|
) -> KayAiRetriever:
|
||||||
|
"""
|
||||||
|
Create a KayRetriever given a Kay dataset id and a list of datasources.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dataset_id: A dataset id category in Kay, like "company"
|
||||||
|
data_types: A list of datasources present within a dataset. For
|
||||||
|
"company" the corresponding datasources could be
|
||||||
|
["10-K", "10-Q", "8-K", "PressRelease"].
|
||||||
|
num_contexts: The number of documents to retrieve on each query.
|
||||||
|
Defaults to 6.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from kay.rag.retrievers import KayRetriever
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"Could not import kay python package. Please install it with "
|
||||||
|
"`pip install kay`.",
|
||||||
|
)
|
||||||
|
|
||||||
|
client = KayRetriever(dataset_id, data_types)
|
||||||
|
return cls(client=client, num_contexts=num_contexts)
|
||||||
|
|
||||||
|
def _get_relevant_documents(
|
||||||
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||||
|
) -> List[Document]:
|
||||||
|
ctxs = self.client.query(query=query, num_context=self.num_contexts)
|
||||||
|
docs = []
|
||||||
|
for ctx in ctxs:
|
||||||
|
page_content = ctx.pop("chunk_embed_text", None)
|
||||||
|
if page_content is None:
|
||||||
|
continue
|
||||||
|
docs.append(Document(page_content=page_content, metadata={**ctx}))
|
||||||
|
return docs
|
@ -0,0 +1,24 @@
|
|||||||
|
"""Integration test for Kay.ai API Wrapper."""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.retrievers import KayAiRetriever
|
||||||
|
from langchain.schema import Document
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("kay")
|
||||||
|
def test_kay_retriever() -> None:
|
||||||
|
retriever = KayAiRetriever.create(
|
||||||
|
dataset_id="company",
|
||||||
|
data_types=["10-K", "10-Q", "8-K", "PressRelease"],
|
||||||
|
num_contexts=3,
|
||||||
|
)
|
||||||
|
docs = retriever.get_relevant_documents(
|
||||||
|
"What were the biggest strategy changes and partnerships made by Roku "
|
||||||
|
"in 2023?",
|
||||||
|
)
|
||||||
|
assert len(docs) == 3
|
||||||
|
for doc in docs:
|
||||||
|
assert isinstance(doc, Document)
|
||||||
|
assert doc.page_content
|
||||||
|
assert doc.metadata
|
||||||
|
assert len(list(doc.metadata.items())) > 0
|
Loading…
Reference in New Issue