From 5b523fca95f7e0dc3b9e646e2f0d69f82241f8c0 Mon Sep 17 00:00:00 2001 From: colin-openai Date: Thu, 9 Feb 2023 02:14:24 -0800 Subject: [PATCH] Fixed commented out vector --- ...ctor_databases_for_embeddings_search.ipynb | 167 ++++++++++++++++-- 1 file changed, 157 insertions(+), 10 deletions(-) diff --git a/examples/vector_databases/Using_vector_databases_for_embeddings_search.ipynb b/examples/vector_databases/Using_vector_databases_for_embeddings_search.ipynb index 23b8b0e0..ec5f1c99 100644 --- a/examples/vector_databases/Using_vector_databases_for_embeddings_search.ipynb +++ b/examples/vector_databases/Using_vector_databases_for_embeddings_search.ipynb @@ -66,10 +66,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "5be94df6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":914: ImportWarning: _SixMetaPathImporter.find_spec() not found; falling back to find_module()\n" + ] + } + ], "source": [ "import openai\n", "\n", @@ -132,30 +140,158 @@ "source": [ "import zipfile\n", "with zipfile.ZipFile(\"vector_database_wikipedia_articles_embedded.zip\",\"r\") as zip_ref:\n", - " zip_ref.extractall(\"../data\")\n", - " \n", + " zip_ref.extractall(\"../data\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "70bbd8ba", + "metadata": {}, + "outputs": [], + "source": [ "article_df = pd.read_csv('../data/vector_database_wikipedia_articles_embedded.csv')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "1721e45d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idurltitletexttitle_vectorcontent_vectorvector_id
01https://simple.wikipedia.org/wiki/AprilAprilApril is the fourth month of the year in the J...[0.001009464613161981, -0.020700545981526375, ...[-0.011253940872848034, -0.013491976074874401,...0
12https://simple.wikipedia.org/wiki/AugustAugustAugust (Aug.) is the eighth month of the year ...[0.0009286514250561595, 0.000820168002974242, ...[0.0003609954728744924, 0.007262262050062418, ...1
26https://simple.wikipedia.org/wiki/ArtArtArt is a creative activity that expresses imag...[0.003393713850528002, 0.0061537534929811954, ...[-0.004959689453244209, 0.015772193670272827, ...2
38https://simple.wikipedia.org/wiki/AAA or a is the first letter of the English alph...[0.0153952119871974, -0.013759135268628597, 0....[0.024894846603274345, -0.022186409682035446, ...3
49https://simple.wikipedia.org/wiki/AirAirAir refers to the Earth's atmosphere. Air is a...[0.02224554680287838, -0.02044147066771984, -0...[0.021524671465158463, 0.018522677943110466, -...4
\n", + "
" + ], + "text/plain": [ + " id url title \\\n", + "0 1 https://simple.wikipedia.org/wiki/April April \n", + "1 2 https://simple.wikipedia.org/wiki/August August \n", + "2 6 https://simple.wikipedia.org/wiki/Art Art \n", + "3 8 https://simple.wikipedia.org/wiki/A A \n", + "4 9 https://simple.wikipedia.org/wiki/Air Air \n", + "\n", + " text \\\n", + "0 April is the fourth month of the year in the J... \n", + "1 August (Aug.) is the eighth month of the year ... \n", + "2 Art is a creative activity that expresses imag... \n", + "3 A or a is the first letter of the English alph... \n", + "4 Air refers to the Earth's atmosphere. Air is a... \n", + "\n", + " title_vector \\\n", + "0 [0.001009464613161981, -0.020700545981526375, ... \n", + "1 [0.0009286514250561595, 0.000820168002974242, ... \n", + "2 [0.003393713850528002, 0.0061537534929811954, ... \n", + "3 [0.0153952119871974, -0.013759135268628597, 0.... \n", + "4 [0.02224554680287838, -0.02044147066771984, -0... \n", + "\n", + " content_vector vector_id \n", + "0 [-0.011253940872848034, -0.013491976074874401,... 0 \n", + "1 [0.0003609954728744924, 0.007262262050062418, ... 1 \n", + "2 [-0.004959689453244209, 0.015772193670272827, ... 2 \n", + "3 [0.024894846603274345, -0.022186409682035446, ... 3 \n", + "4 [0.021524671465158463, 0.018522677943110466, -... 4 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "article_df.head()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "960b82af", "metadata": {}, "outputs": [], "source": [ "# Read vectors from strings back into a list\n", - "#article_df['title_vector'] = article_df.title_vector.apply(literal_eval)\n", + "article_df['title_vector'] = article_df.title_vector.apply(literal_eval)\n", "article_df['content_vector'] = article_df.content_vector.apply(literal_eval)\n", "\n", "# Set vector_id to be a string\n", @@ -164,10 +300,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "a334ab8b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "34471" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "len(article_df['title_vector'][0])" ]