diff --git a/examples/Code_search_using_embeddings.ipynb b/examples/Code_search_using_embeddings.ipynb index a50babf..3a86560 100644 --- a/examples/Code_search_using_embeddings.ipynb +++ b/examples/Code_search_using_embeddings.ipynb @@ -235,10 +235,10 @@ } ], "source": [ - "from openai.embeddings_utils import get_embedding\n", + "from utils.embeddings_utils import get_embedding\n", "\n", "df = pd.DataFrame(all_funcs)\n", - "df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))\n", + "df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, model='text-embedding-ada-002'))\n", "df['filepath'] = df['filepath'].map(lambda x: Path(x).relative_to(code_root))\n", "df.to_csv(\"data/code_search_openai-python.csv\", index=False)\n", "df.head()" @@ -266,10 +266,10 @@ "metadata": {}, "outputs": [], "source": [ - "from openai.embeddings_utils import cosine_similarity\n", + "from utils.embeddings_utils import cosine_similarity\n", "\n", "def search_functions(df, code_query, n=3, pprint=True, n_lines=7):\n", - " embedding = get_embedding(code_query, engine='text-embedding-ada-002')\n", + " embedding = get_embedding(code_query, model='text-embedding-ada-002')\n", " df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))\n", "\n", " res = df.sort_values('similarities', ascending=False).head(n)\n", diff --git a/examples/Customizing_embeddings.ipynb b/examples/Customizing_embeddings.ipynb index 4068254..eaaa871 100644 --- a/examples/Customizing_embeddings.ipynb +++ b/examples/Customizing_embeddings.ipynb @@ -51,7 +51,7 @@ "from sklearn.model_selection import train_test_split # for splitting train & test data\n", "import torch # for matrix optimization\n", "\n", - "from openai.embeddings_utils import get_embedding, cosine_similarity # for embeddings\n" + "from utils.embeddings_utils import get_embedding, cosine_similarity # for embeddings\n" ] }, { diff --git a/examples/Multiclass_classification_for_transactions.ipynb b/examples/Multiclass_classification_for_transactions.ipynb index 9f500c9..cc8b7f7 100644 --- a/examples/Multiclass_classification_for_transactions.ipynb +++ b/examples/Multiclass_classification_for_transactions.ipynb @@ -193,7 +193,7 @@ "source": [ "def request_completion(prompt):\n", "\n", - " completion_response = openai.Completion.create(\n", + " completion_response = openai.completions.create(\n", " prompt=prompt,\n", " temperature=0,\n", " max_tokens=5,\n", @@ -211,7 +211,7 @@ " prompt = prompt.replace('DESCRIPTION_TEXT',transaction['Description'])\n", " prompt = prompt.replace('TRANSACTION_VALUE',str(transaction['Transaction value (£)']))\n", "\n", - " classification = request_completion(prompt)['choices'][0]['text'].replace('\\n','')\n", + " classification = request_completion(prompt).choices[0].text.replace('\\n','')\n", "\n", " return classification\n", "\n", @@ -304,7 +304,7 @@ "\n", "# Use our completion function to return a prediction\n", "completion_response = request_completion(prompt)\n", - "print(completion_response['choices'][0]['text'])\n" + "print(completion_response.choices[0].text)\n" ] }, { @@ -351,7 +351,7 @@ " Building Improvement 14\n", " Could not classify 5\n", " Literature & Archive 3\n", - " Software/IT 2\n", +" Software/IT 2\n", " Utility Bills 1\n", "Name: Classification, dtype: int64" ] @@ -916,8 +916,8 @@ "source": [ "from utils.embeddings_utils import get_embedding\n", "\n", - "df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, engine='text-similarity-babbage-001'))\n", - "df['babbage_search'] = df.combined.apply(lambda x: get_embedding(x, engine='text-search-babbage-doc-001'))\n", + "df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, model='text-similarity-babbage-001'))\n", + "df['babbage_search'] = df.combined.apply(lambda x: get_embedding(x, model='text-search-babbage-doc-001'))\n", "df.to_csv(embedding_path)\n" ] }, @@ -2203,7 +2203,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/examples/Semantic_text_search_using_embeddings.ipynb b/examples/Semantic_text_search_using_embeddings.ipynb index f6e59ff..6d3ee37 100644 --- a/examples/Semantic_text_search_using_embeddings.ipynb +++ b/examples/Semantic_text_search_using_embeddings.ipynb @@ -59,7 +59,7 @@ "def search_reviews(df, product_description, n=3, pprint=True):\n", " product_embedding = get_embedding(\n", " product_description,\n", - " engine=\"text-embedding-ada-002\"\n", + " model=\"text-embedding-ada-002\"\n", " )\n", " df[\"similarity\"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding))\n", "\n", diff --git a/examples/Visualizing_embeddings_in_3D.ipynb b/examples/Visualizing_embeddings_in_3D.ipynb index d0b923c..34b55f9 100644 --- a/examples/Visualizing_embeddings_in_3D.ipynb +++ b/examples/Visualizing_embeddings_in_3D.ipynb @@ -138,7 +138,7 @@ "source": [ "from utils.embeddings_utils import get_embeddings\n", "# NOTE: The following code will send a query of batch size 200 to /embeddings\n", - "matrix = get_embeddings(samples[\"text\"].to_list(), engine=\"text-embedding-ada-002\")\n" + "matrix = get_embeddings(samples[\"text\"].to_list(), model=\"text-embedding-ada-002\")\n" ] }, { diff --git a/examples/Zero-shot_classification_with_embeddings.ipynb b/examples/Zero-shot_classification_with_embeddings.ipynb index 7bb331c..ebe2eca 100644 --- a/examples/Zero-shot_classification_with_embeddings.ipynb +++ b/examples/Zero-shot_classification_with_embeddings.ipynb @@ -93,7 +93,7 @@ " labels = ['negative', 'positive'],\n", " model = EMBEDDING_MODEL,\n", "):\n", - " label_embeddings = [get_embedding(label, engine=model) for label in labels]\n", + " label_embeddings = [get_embedding(label, model=model) for label in labels]\n", "\n", " def label_score(review_embedding, label_embeddings):\n", " return cosine_similarity(review_embedding, label_embeddings[1]) - cosine_similarity(review_embedding, label_embeddings[0])\n", diff --git a/examples/utils/embeddings_utils.py b/examples/utils/embeddings_utils.py index efb306d..ed39114 100644 --- a/examples/utils/embeddings_utils.py +++ b/examples/utils/embeddings_utils.py @@ -15,51 +15,53 @@ import pandas as pd @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) -def get_embedding(text: str, engine="text-similarity-davinci-001", **kwargs) -> List[float]: +def get_embedding(text: str, model="text-similarity-davinci-001", **kwargs) -> List[float]: # replace newlines, which can negatively affect performance. text = text.replace("\n", " ") - return openai.Embedding.create(input=[text], engine=engine, **kwargs)["data"][0]["embedding"] + response = openai.embeddings.create(input=[text], model=model, **kwargs) + + return response.data[0].embedding @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) async def aget_embedding( - text: str, engine="text-similarity-davinci-001", **kwargs + text: str, model="text-similarity-davinci-001", **kwargs ) -> List[float]: # replace newlines, which can negatively affect performance. text = text.replace("\n", " ") - return (await openai.Embedding.acreate(input=[text], engine=engine, **kwargs))["data"][0][ + return (await openai.embeddings.create(input=[text], model=model, **kwargs))["data"][0][ "embedding" ] @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) def get_embeddings( - list_of_text: List[str], engine="text-similarity-babbage-001", **kwargs + list_of_text: List[str], model="text-similarity-babbage-001", **kwargs ) -> List[List[float]]: assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048." # replace newlines, which can negatively affect performance. list_of_text = [text.replace("\n", " ") for text in list_of_text] - data = openai.Embedding.create(input=list_of_text, engine=engine, **kwargs).data - return [d["embedding"] for d in data] + data = openai.embeddings.create(input=list_of_text, model=model, **kwargs).data + return [d.embedding for d in data] @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) async def aget_embeddings( - list_of_text: List[str], engine="text-similarity-babbage-001", **kwargs + list_of_text: List[str], model="text-similarity-babbage-001", **kwargs ) -> List[List[float]]: assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048." # replace newlines, which can negatively affect performance. list_of_text = [text.replace("\n", " ") for text in list_of_text] - data = (await openai.Embedding.acreate(input=list_of_text, engine=engine, **kwargs)).data - return [d["embedding"] for d in data] + data = (await openai.embeddings.create(input=list_of_text, model=model, **kwargs)).data + return [d.embedding for d in data] def cosine_similarity(a, b):