[web-q-and-a] remove unnecessary cell (#309)

pull/740/head
Hoang Viet Khoa 8 months ago committed by GitHub
parent 6610ba54ad
commit dd95f4215d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -932,102 +932,6 @@
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>n_tokens</th>\n",
" <th>embeddings</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>blog authors maddie. Maddie Hall - OpenAI ...</td>\n",
" <td>175</td>\n",
" <td>[-0.012958061881363392, -0.006103983614593744,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>blog authors tom. Tom Brown - OpenAI ...</td>\n",
" <td>228</td>\n",
" <td>[-0.0053874170407652855, -0.009962032549083233...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>blog openai scholars 2019 final projects. Op...</td>\n",
" <td>492</td>\n",
" <td>[0.0019150723237544298, -0.0070442273281514645...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>In this project, I used curiosity-driven explo...</td>\n",
" <td>478</td>\n",
" <td>[-0.0067560747265815735, 0.0004431474662851542...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Results revealed that the optimal RL policies ...</td>\n",
" <td>499</td>\n",
" <td>[-0.012868616729974747, 0.0029640409629791975,...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" text n_tokens \\\n",
"0 blog authors maddie. Maddie Hall - OpenAI ... 175 \n",
"1 blog authors tom. Tom Brown - OpenAI ... 228 \n",
"2 blog openai scholars 2019 final projects. Op... 492 \n",
"3 In this project, I used curiosity-driven explo... 478 \n",
"4 Results revealed that the optimal RL policies ... 499 \n",
"\n",
" embeddings \n",
"0 [-0.012958061881363392, -0.006103983614593744,... \n",
"1 [-0.0053874170407652855, -0.009962032549083233... \n",
"2 [0.0019150723237544298, -0.0070442273281514645... \n",
"3 [-0.0067560747265815735, 0.0004431474662851542... \n",
"4 [-0.012868616729974747, 0.0029640409629791975,... "
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from openai.embeddings_utils import distances_from_embeddings\n",
"\n",
"df['embeddings'] = df.text.apply(lambda x: openai.Embedding.create(input=x, engine='text-embedding-ada-002')['data'][0]['embedding'])\n",
"\n",
"df.to_csv('processed/embeddings.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 31,

Loading…
Cancel
Save