{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Get embeddings\n", "\n", "The function `get_embedding` will give us an embedding for an input text." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12288" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import openai\n", "\n", "embedding = openai.Embedding.create(\n", " input=\"Sample document text goes here\",\n", " engine=\"text-similarity-davinci-001\"\n", ")[\"data\"][0][\"embedding\"]\n", "len(embedding)\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1024\n" ] } ], "source": [ "import openai\n", "from tenacity import retry, wait_random_exponential, stop_after_attempt\n", "\n", "\n", "@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))\n", "def get_embedding(text: str, engine=\"text-similarity-davinci-001\") -> list[float]:\n", "\n", " # replace newlines, which can negatively affect performance.\n", " text = text.replace(\"\\n\", \" \")\n", "\n", " return openai.Embedding.create(input=[text], engine=engine)[\"data\"][0][\"embedding\"]\n", "\n", "\n", "embedding = get_embedding(\"Sample query text goes here\", engine=\"text-search-ada-query-001\")\n", "print(len(embedding))\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1024\n" ] } ], "source": [ "embedding = get_embedding(\"Sample document text goes here\", engine=\"text-search-ada-doc-001\")\n", "print(len(embedding))\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.9 ('openai')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.9" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" } } }, "nbformat": 4, "nbformat_minor": 2 }