mirror of
https://github.com/openai/openai-cookbook
synced 2024-11-13 07:10:30 +00:00
This commit is contained in:
parent
d4eaa46471
commit
6197cfc8b6
@ -235,10 +235,10 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from openai.embeddings_utils import get_embedding\n",
|
||||
"from utils.embeddings_utils import get_embedding\n",
|
||||
"\n",
|
||||
"df = pd.DataFrame(all_funcs)\n",
|
||||
"df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))\n",
|
||||
"df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, model='text-embedding-ada-002'))\n",
|
||||
"df['filepath'] = df['filepath'].map(lambda x: Path(x).relative_to(code_root))\n",
|
||||
"df.to_csv(\"data/code_search_openai-python.csv\", index=False)\n",
|
||||
"df.head()"
|
||||
@ -266,10 +266,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from openai.embeddings_utils import cosine_similarity\n",
|
||||
"from utils.embeddings_utils import cosine_similarity\n",
|
||||
"\n",
|
||||
"def search_functions(df, code_query, n=3, pprint=True, n_lines=7):\n",
|
||||
" embedding = get_embedding(code_query, engine='text-embedding-ada-002')\n",
|
||||
" embedding = get_embedding(code_query, model='text-embedding-ada-002')\n",
|
||||
" df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))\n",
|
||||
"\n",
|
||||
" res = df.sort_values('similarities', ascending=False).head(n)\n",
|
||||
|
@ -51,7 +51,7 @@
|
||||
"from sklearn.model_selection import train_test_split # for splitting train & test data\n",
|
||||
"import torch # for matrix optimization\n",
|
||||
"\n",
|
||||
"from openai.embeddings_utils import get_embedding, cosine_similarity # for embeddings\n"
|
||||
"from utils.embeddings_utils import get_embedding, cosine_similarity # for embeddings\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -193,7 +193,7 @@
|
||||
"source": [
|
||||
"def request_completion(prompt):\n",
|
||||
"\n",
|
||||
" completion_response = openai.Completion.create(\n",
|
||||
" completion_response = openai.completions.create(\n",
|
||||
" prompt=prompt,\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=5,\n",
|
||||
@ -211,7 +211,7 @@
|
||||
" prompt = prompt.replace('DESCRIPTION_TEXT',transaction['Description'])\n",
|
||||
" prompt = prompt.replace('TRANSACTION_VALUE',str(transaction['Transaction value (£)']))\n",
|
||||
"\n",
|
||||
" classification = request_completion(prompt)['choices'][0]['text'].replace('\\n','')\n",
|
||||
" classification = request_completion(prompt).choices[0].text.replace('\\n','')\n",
|
||||
"\n",
|
||||
" return classification\n",
|
||||
"\n",
|
||||
@ -304,7 +304,7 @@
|
||||
"\n",
|
||||
"# Use our completion function to return a prediction\n",
|
||||
"completion_response = request_completion(prompt)\n",
|
||||
"print(completion_response['choices'][0]['text'])\n"
|
||||
"print(completion_response.choices[0].text)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -351,7 +351,7 @@
|
||||
" Building Improvement 14\n",
|
||||
" Could not classify 5\n",
|
||||
" Literature & Archive 3\n",
|
||||
" Software/IT 2\n",
|
||||
" Software/IT 2\n",
|
||||
" Utility Bills 1\n",
|
||||
"Name: Classification, dtype: int64"
|
||||
]
|
||||
@ -916,8 +916,8 @@
|
||||
"source": [
|
||||
"from utils.embeddings_utils import get_embedding\n",
|
||||
"\n",
|
||||
"df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, engine='text-similarity-babbage-001'))\n",
|
||||
"df['babbage_search'] = df.combined.apply(lambda x: get_embedding(x, engine='text-search-babbage-doc-001'))\n",
|
||||
"df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, model='text-similarity-babbage-001'))\n",
|
||||
"df['babbage_search'] = df.combined.apply(lambda x: get_embedding(x, model='text-search-babbage-doc-001'))\n",
|
||||
"df.to_csv(embedding_path)\n"
|
||||
]
|
||||
},
|
||||
@ -2203,7 +2203,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -59,7 +59,7 @@
|
||||
"def search_reviews(df, product_description, n=3, pprint=True):\n",
|
||||
" product_embedding = get_embedding(\n",
|
||||
" product_description,\n",
|
||||
" engine=\"text-embedding-ada-002\"\n",
|
||||
" model=\"text-embedding-ada-002\"\n",
|
||||
" )\n",
|
||||
" df[\"similarity\"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding))\n",
|
||||
"\n",
|
||||
|
@ -138,7 +138,7 @@
|
||||
"source": [
|
||||
"from utils.embeddings_utils import get_embeddings\n",
|
||||
"# NOTE: The following code will send a query of batch size 200 to /embeddings\n",
|
||||
"matrix = get_embeddings(samples[\"text\"].to_list(), engine=\"text-embedding-ada-002\")\n"
|
||||
"matrix = get_embeddings(samples[\"text\"].to_list(), model=\"text-embedding-ada-002\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -93,7 +93,7 @@
|
||||
" labels = ['negative', 'positive'],\n",
|
||||
" model = EMBEDDING_MODEL,\n",
|
||||
"):\n",
|
||||
" label_embeddings = [get_embedding(label, engine=model) for label in labels]\n",
|
||||
" label_embeddings = [get_embedding(label, model=model) for label in labels]\n",
|
||||
"\n",
|
||||
" def label_score(review_embedding, label_embeddings):\n",
|
||||
" return cosine_similarity(review_embedding, label_embeddings[1]) - cosine_similarity(review_embedding, label_embeddings[0])\n",
|
||||
|
@ -15,51 +15,53 @@ import pandas as pd
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
||||
def get_embedding(text: str, engine="text-similarity-davinci-001", **kwargs) -> List[float]:
|
||||
def get_embedding(text: str, model="text-similarity-davinci-001", **kwargs) -> List[float]:
|
||||
|
||||
# replace newlines, which can negatively affect performance.
|
||||
text = text.replace("\n", " ")
|
||||
|
||||
return openai.Embedding.create(input=[text], engine=engine, **kwargs)["data"][0]["embedding"]
|
||||
response = openai.embeddings.create(input=[text], model=model, **kwargs)
|
||||
|
||||
return response.data[0].embedding
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
||||
async def aget_embedding(
|
||||
text: str, engine="text-similarity-davinci-001", **kwargs
|
||||
text: str, model="text-similarity-davinci-001", **kwargs
|
||||
) -> List[float]:
|
||||
|
||||
# replace newlines, which can negatively affect performance.
|
||||
text = text.replace("\n", " ")
|
||||
|
||||
return (await openai.Embedding.acreate(input=[text], engine=engine, **kwargs))["data"][0][
|
||||
return (await openai.embeddings.create(input=[text], model=model, **kwargs))["data"][0][
|
||||
"embedding"
|
||||
]
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
||||
def get_embeddings(
|
||||
list_of_text: List[str], engine="text-similarity-babbage-001", **kwargs
|
||||
list_of_text: List[str], model="text-similarity-babbage-001", **kwargs
|
||||
) -> List[List[float]]:
|
||||
assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
|
||||
|
||||
# replace newlines, which can negatively affect performance.
|
||||
list_of_text = [text.replace("\n", " ") for text in list_of_text]
|
||||
|
||||
data = openai.Embedding.create(input=list_of_text, engine=engine, **kwargs).data
|
||||
return [d["embedding"] for d in data]
|
||||
data = openai.embeddings.create(input=list_of_text, model=model, **kwargs).data
|
||||
return [d.embedding for d in data]
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
|
||||
async def aget_embeddings(
|
||||
list_of_text: List[str], engine="text-similarity-babbage-001", **kwargs
|
||||
list_of_text: List[str], model="text-similarity-babbage-001", **kwargs
|
||||
) -> List[List[float]]:
|
||||
assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
|
||||
|
||||
# replace newlines, which can negatively affect performance.
|
||||
list_of_text = [text.replace("\n", " ") for text in list_of_text]
|
||||
|
||||
data = (await openai.Embedding.acreate(input=list_of_text, engine=engine, **kwargs)).data
|
||||
return [d["embedding"] for d in data]
|
||||
data = (await openai.embeddings.create(input=list_of_text, model=model, **kwargs)).data
|
||||
return [d.embedding for d in data]
|
||||
|
||||
|
||||
def cosine_similarity(a, b):
|
||||
|
Loading…
Reference in New Issue
Block a user