You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
119 lines
3.1 KiB
Plaintext
119 lines
3.1 KiB
Plaintext
2 years ago
|
{
|
||
|
"cells": [
|
||
|
{
|
||
8 months ago
|
"attachments": {},
|
||
2 years ago
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
8 months ago
|
"## Using embeddings\n",
|
||
2 years ago
|
"\n",
|
||
10 months ago
|
"This notebook contains some helpful snippets you can use to embed text with the 'text-embedding-ada-002' model via the OpenAI API."
|
||
2 years ago
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
2 years ago
|
"1536"
|
||
2 years ago
|
]
|
||
|
},
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import openai\n",
|
||
|
"\n",
|
||
2 years ago
|
"embedding = openai.Embedding.create(\n",
|
||
1 year ago
|
" input=\"Your text goes here\", model=\"text-embedding-ada-002\"\n",
|
||
2 years ago
|
")[\"data\"][0][\"embedding\"]\n",
|
||
|
"len(embedding)\n"
|
||
2 years ago
|
]
|
||
|
},
|
||
10 months ago
|
{
|
||
8 months ago
|
"attachments": {},
|
||
10 months ago
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"It's recommended to use the 'tenacity' package or another exponential backoff implementation to better manage API rate limits, as hitting the API too much too fast can trigger rate limits. Using the following function ensures you get your embeddings as fast as possible."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Negative example (slow and rate-limited)\n",
|
||
|
"import openai\n",
|
||
|
"\n",
|
||
|
"num_embeddings = 10000 # Some large number\n",
|
||
|
"for i in range(num_embeddings):\n",
|
||
|
" embedding = openai.Embedding.create(\n",
|
||
|
" input=\"Your text goes here\", model=\"text-embedding-ada-002\"\n",
|
||
|
" )[\"data\"][0][\"embedding\"]\n",
|
||
|
" print(len(embedding))"
|
||
|
]
|
||
|
},
|
||
2 years ago
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
2 years ago
|
"1536\n"
|
||
2 years ago
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
10 months ago
|
"# Best practice\n",
|
||
2 years ago
|
"import openai\n",
|
||
|
"from tenacity import retry, wait_random_exponential, stop_after_attempt\n",
|
||
|
"\n",
|
||
10 months ago
|
"# Retry up to 6 times with exponential backoff, starting at 1 second and maxing out at 20 seconds delay\n",
|
||
2 years ago
|
"@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))\n",
|
||
1 year ago
|
"def get_embedding(text: str, model=\"text-embedding-ada-002\") -> list[float]:\n",
|
||
|
" return openai.Embedding.create(input=[text], model=model)[\"data\"][0][\"embedding\"]\n",
|
||
2 years ago
|
"\n",
|
||
1 year ago
|
"embedding = get_embedding(\"Your text goes here\", model=\"text-embedding-ada-002\")\n",
|
||
10 months ago
|
"print(len(embedding))"
|
||
2 years ago
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
2 years ago
|
"display_name": "Python 3.9.9 ('openai')",
|
||
|
"language": "python",
|
||
2 years ago
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
2 years ago
|
"version": "3.9.9"
|
||
2 years ago
|
},
|
||
2 years ago
|
"orig_nbformat": 4,
|
||
|
"vscode": {
|
||
|
"interpreter": {
|
||
|
"hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
|
||
|
}
|
||
|
}
|
||
2 years ago
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|