You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
openai-cookbook/examples/Using_embeddings.ipynb

122 lines
3.2 KiB
Plaintext

{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using embeddings\n",
"\n",
"This notebook contains some helpful snippets you can use to embed text with the `text-embedding-3-small` model via the OpenAI API."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1536"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from openai import OpenAI\n",
"client = OpenAI()\n",
"\n",
"embedding = client.embeddings.create(\n",
" input=\"Your text goes here\", model=\"text-embedding-3-small\"\n",
").data[0].embedding\n",
"len(embedding)\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"It's recommended to use the 'tenacity' package or another exponential backoff implementation to better manage API rate limits, as hitting the API too much too fast can trigger rate limits. Using the following function ensures you get your embeddings as fast as possible."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Negative example (slow and rate-limited)\n",
"from openai import OpenAI\n",
"client = OpenAI()\n",
"\n",
"num_embeddings = 10000 # Some large number\n",
"for i in range(num_embeddings):\n",
" embedding = client.embeddings.create(\n",
" input=\"Your text goes here\", model=\"text-embedding-3-small\"\n",
" ).data[0].embedding\n",
" print(len(embedding))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1536\n"
]
}
],
"source": [
"# Best practice\n",
"from tenacity import retry, wait_random_exponential, stop_after_attempt\n",
"from openai import OpenAI\n",
"client = OpenAI()\n",
"\n",
"# Retry up to 6 times with exponential backoff, starting at 1 second and maxing out at 20 seconds delay\n",
"@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))\n",
"def get_embedding(text: str, model=\"text-embedding-3-small\") -> list[float]:\n",
" return client.embeddings.create(input=[text], model=model).data[0].embedding\n",
"\n",
"embedding = get_embedding(\"Your text goes here\", model=\"text-embedding-3-small\")\n",
"print(len(embedding))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.9 ('openai')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.9"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}