From d8cdb1c3d8597e3fdb4de49605be3788837a7c8b Mon Sep 17 00:00:00 2001 From: Andriy Mulyar Date: Tue, 28 Mar 2023 19:11:05 -0400 Subject: [PATCH] Visualizing Embeddings with Atlas (#152) * Embedding visualization in Atlas * Updated Atlas Visualization Example * Atlas for Embedding Visualization: removed extra outputs * Rename Atlas_for_visualizing_embeddings.ipynb to Visualizing_embeddings_with_Atlas.ipynb --- .../Visualizing_embeddings_with_Atlas.ipynb | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 examples/Visualizing_embeddings_with_Atlas.ipynb diff --git a/examples/Visualizing_embeddings_with_Atlas.ipynb b/examples/Visualizing_embeddings_with_Atlas.ipynb new file mode 100644 index 00000000..a47590f0 --- /dev/null +++ b/examples/Visualizing_embeddings_with_Atlas.ipynb @@ -0,0 +1,154 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualizing Open AI Embeddings in Atlas\n", + "\n", + "In this example, we will upload food review embeddings to [Atlas](https://atlas.nomic.ai) to visualize the embeddings." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What is Atlas?\n", + "\n", + "[Atlas](https://atlas.nomic.ai) is a machine learning tool used to visualize massive datasets of embeddings in your web browser. Upload millions of embeddings to Atlas and interact with them in your web browser or jupyter notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Login to Atlas.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ] + } + ], + "source": [ + "!pip install nomic" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Load the embeddings\n", + "datafile_path = \"data/fine_food_reviews_with_embeddings_1k.csv\"\n", + "df = pd.read_csv(datafile_path)\n", + "\n", + "# Convert to a list of lists of floats\n", + "embeddings = np.array(df.embedding.apply(eval).to_list())\n", + "df = df.drop('embedding', axis=1)\n", + "df = df.rename(columns={'Unnamed: 0': 'id'})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ] + } + ], + "source": [ + "import nomic\n", + "from nomic import atlas\n", + "nomic.login('7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6') #demo account\n", + "\n", + "data = df.to_dict('records')\n", + "project = atlas.map_embeddings(embeddings=embeddings, data=data,\n", + " id_field='id',\n", + " colorable_fields=['Score'])\n", + "map = project.maps[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Interact with your embeddings in Jupyter" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "data": { + "text/plain": "meek-laborer: https://atlas.nomic.ai/map/fddc0e07-97c5-477c-827c-96bca44519aa/463f4614-7689-47e4-b55b-1da0cc679559", + "text/html": "\n

Project: meek-laborer

\n \n\n

Projection ID: 463f4614-7689-47e4-b55b-1da0cc679559

\n
\n
Hide embedded project
\n \n
\n \n \n\n \n \n \n \n " + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "map" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + }, + "vscode": { + "interpreter": { + "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}