mirror of
https://github.com/openai/openai-cookbook
synced 2024-11-04 06:00:33 +00:00
commit
c31fe72f1a
@ -66,10 +66,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "5be94df6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<frozen importlib._bootstrap>:914: ImportWarning: _SixMetaPathImporter.find_spec() not found; falling back to find_module()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import openai\n",
|
||||
"\n",
|
||||
@ -132,30 +140,158 @@
|
||||
"source": [
|
||||
"import zipfile\n",
|
||||
"with zipfile.ZipFile(\"vector_database_wikipedia_articles_embedded.zip\",\"r\") as zip_ref:\n",
|
||||
" zip_ref.extractall(\"../data\")\n",
|
||||
" \n",
|
||||
" zip_ref.extractall(\"../data\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "70bbd8ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"article_df = pd.read_csv('../data/vector_database_wikipedia_articles_embedded.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "1721e45d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>url</th>\n",
|
||||
" <th>title</th>\n",
|
||||
" <th>text</th>\n",
|
||||
" <th>title_vector</th>\n",
|
||||
" <th>content_vector</th>\n",
|
||||
" <th>vector_id</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>https://simple.wikipedia.org/wiki/April</td>\n",
|
||||
" <td>April</td>\n",
|
||||
" <td>April is the fourth month of the year in the J...</td>\n",
|
||||
" <td>[0.001009464613161981, -0.020700545981526375, ...</td>\n",
|
||||
" <td>[-0.011253940872848034, -0.013491976074874401,...</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>https://simple.wikipedia.org/wiki/August</td>\n",
|
||||
" <td>August</td>\n",
|
||||
" <td>August (Aug.) is the eighth month of the year ...</td>\n",
|
||||
" <td>[0.0009286514250561595, 0.000820168002974242, ...</td>\n",
|
||||
" <td>[0.0003609954728744924, 0.007262262050062418, ...</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>https://simple.wikipedia.org/wiki/Art</td>\n",
|
||||
" <td>Art</td>\n",
|
||||
" <td>Art is a creative activity that expresses imag...</td>\n",
|
||||
" <td>[0.003393713850528002, 0.0061537534929811954, ...</td>\n",
|
||||
" <td>[-0.004959689453244209, 0.015772193670272827, ...</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>8</td>\n",
|
||||
" <td>https://simple.wikipedia.org/wiki/A</td>\n",
|
||||
" <td>A</td>\n",
|
||||
" <td>A or a is the first letter of the English alph...</td>\n",
|
||||
" <td>[0.0153952119871974, -0.013759135268628597, 0....</td>\n",
|
||||
" <td>[0.024894846603274345, -0.022186409682035446, ...</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>https://simple.wikipedia.org/wiki/Air</td>\n",
|
||||
" <td>Air</td>\n",
|
||||
" <td>Air refers to the Earth's atmosphere. Air is a...</td>\n",
|
||||
" <td>[0.02224554680287838, -0.02044147066771984, -0...</td>\n",
|
||||
" <td>[0.021524671465158463, 0.018522677943110466, -...</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" id url title \\\n",
|
||||
"0 1 https://simple.wikipedia.org/wiki/April April \n",
|
||||
"1 2 https://simple.wikipedia.org/wiki/August August \n",
|
||||
"2 6 https://simple.wikipedia.org/wiki/Art Art \n",
|
||||
"3 8 https://simple.wikipedia.org/wiki/A A \n",
|
||||
"4 9 https://simple.wikipedia.org/wiki/Air Air \n",
|
||||
"\n",
|
||||
" text \\\n",
|
||||
"0 April is the fourth month of the year in the J... \n",
|
||||
"1 August (Aug.) is the eighth month of the year ... \n",
|
||||
"2 Art is a creative activity that expresses imag... \n",
|
||||
"3 A or a is the first letter of the English alph... \n",
|
||||
"4 Air refers to the Earth's atmosphere. Air is a... \n",
|
||||
"\n",
|
||||
" title_vector \\\n",
|
||||
"0 [0.001009464613161981, -0.020700545981526375, ... \n",
|
||||
"1 [0.0009286514250561595, 0.000820168002974242, ... \n",
|
||||
"2 [0.003393713850528002, 0.0061537534929811954, ... \n",
|
||||
"3 [0.0153952119871974, -0.013759135268628597, 0.... \n",
|
||||
"4 [0.02224554680287838, -0.02044147066771984, -0... \n",
|
||||
"\n",
|
||||
" content_vector vector_id \n",
|
||||
"0 [-0.011253940872848034, -0.013491976074874401,... 0 \n",
|
||||
"1 [0.0003609954728744924, 0.007262262050062418, ... 1 \n",
|
||||
"2 [-0.004959689453244209, 0.015772193670272827, ... 2 \n",
|
||||
"3 [0.024894846603274345, -0.022186409682035446, ... 3 \n",
|
||||
"4 [0.021524671465158463, 0.018522677943110466, -... 4 "
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"article_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"id": "960b82af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Read vectors from strings back into a list\n",
|
||||
"#article_df['title_vector'] = article_df.title_vector.apply(literal_eval)\n",
|
||||
"article_df['title_vector'] = article_df.title_vector.apply(literal_eval)\n",
|
||||
"article_df['content_vector'] = article_df.content_vector.apply(literal_eval)\n",
|
||||
"\n",
|
||||
"# Set vector_id to be a string\n",
|
||||
@ -164,10 +300,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"id": "a334ab8b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"34471"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(article_df['title_vector'][0])"
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user