mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
8eea46ed0e
## Description This PR adds the `aembed_query` and `aembed_documents` async methods for improving the embeddings generation for large documents. The implementation uses asyncio tasks and gather to achieve concurrency as there is no bedrock async API in boto3. ### Maintainers @agola11 @aarora79 ### Open questions To avoid throttling from the Bedrock API, should there be an option to limit the concurrency of the calls?
100 lines
2.1 KiB
Plaintext
100 lines
2.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "75e378f5-55d7-44b6-8e2e-6d7b8b171ec4",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Bedrock Embeddings"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2dbe40fa-7c0b-4bcb-a712-230bf613a42f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%pip install boto3"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "282239c8-e03a-4abc-86c1-ca6120231a20",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.embeddings import BedrockEmbeddings\n",
|
|
"\n",
|
|
"embeddings = BedrockEmbeddings(\n",
|
|
" credentials_profile_name=\"bedrock-admin\", region_name=\"us-east-1\"\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "19a46868-4bed-40cd-89ca-9813fbfda9cb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"embeddings.embed_query(\"This is a content of the document\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cf0349c4-6408-4342-8691-69276a388784",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"embeddings.embed_documents([\"This is a content of the document\", \"This is another document\"])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9f6b364d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# async embed query\n",
|
|
"await embeddings.aembed_query(\"This is a content of the document\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c9240a5a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# async embed documents\n",
|
|
"await embeddings.aembed_documents([\"This is a content of the document\", \"This is another document\"])"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.13"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|