diff --git a/examples/Regression_using_embeddings.ipynb b/examples/Regression_using_embeddings.ipynb index cf99894a..abaaad6b 100644 --- a/examples/Regression_using_embeddings.ipynb +++ b/examples/Regression_using_embeddings.ipynb @@ -13,14 +13,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Babbage similarity embedding performance on 1k Amazon reviews: mse=0.38, mae=0.39\n" + "Babbage similarity embedding performance on 1k Amazon reviews: mse=0.39, mae=0.38\n" ] } ], @@ -32,39 +32,41 @@ "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_squared_error, mean_absolute_error\n", "\n", - "df = pd.read_csv('output/embedded_1k_reviews.csv')\n", - "df['babbage_similarity'] = df.babbage_similarity.apply(eval).apply(np.array)\n", + "datafile_path = \"https://cdn.openai.com/API/examples/data/fine_food_reviews_with_embeddings_1k.csv\" # for your convenience, we precomputed the embeddings\n", + "df = pd.read_csv(datafile_path)\n", + "df[\"babbage_similarity\"] = df.babbage_similarity.apply(eval).apply(np.array)\n", "\n", - "X_train, X_test, y_train, y_test = train_test_split(list(df.babbage_similarity.values), df.Score, test_size = 0.2, random_state=42)\n", + "X_train, X_test, y_train, y_test = train_test_split(list(df.babbage_similarity.values), df.Score, test_size=0.2, random_state=42)\n", "\n", "rfr = RandomForestRegressor(n_estimators=100)\n", "rfr.fit(X_train, y_train)\n", "preds = rfr.predict(X_test)\n", "\n", - "\n", "mse = mean_squared_error(y_test, preds)\n", "mae = mean_absolute_error(y_test, preds)\n", "\n", - "print(f\"Babbage similarity embedding performance on 1k Amazon reviews: mse={mse:.2f}, mae={mae:.2f}\")" + "print(f\"Babbage similarity embedding performance on 1k Amazon reviews: mse={mse:.2f}, mae={mae:.2f}\")\n" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Dummy mean prediction performance on Amazon reviews: mse=1.77, mae=1.04\n" + "Dummy mean prediction performance on Amazon reviews: mse=1.81, mae=1.08\n" ] } ], "source": [ "bmse = mean_squared_error(y_test, np.repeat(y_test.mean(), len(y_test)))\n", "bmae = mean_absolute_error(y_test, np.repeat(y_test.mean(), len(y_test)))\n", - "print(f\"Dummy mean prediction performance on Amazon reviews: mse={bmse:.2f}, mae={bmae:.2f}\")" + "print(\n", + " f\"Dummy mean prediction performance on Amazon reviews: mse={bmse:.2f}, mae={bmae:.2f}\"\n", + ")\n" ] }, { @@ -83,11 +85,9 @@ } ], "metadata": { - "interpreter": { - "hash": "be4b5d5b73a21c599de40d6deb1129796d12dc1cc33a738f7bac13269cfcafe8" - }, "kernelspec": { - "display_name": "Python 3.7.3 64-bit ('base': conda)", + "display_name": "Python 3.9.9 ('openai')", + "language": "python", "name": "python3" }, "language_info": { @@ -100,9 +100,14 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.9.9" }, - "orig_nbformat": 4 + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" + } + } }, "nbformat": 4, "nbformat_minor": 2