Updated to align wording

pull/1077/head
colin-openai 2 years ago
parent 6d0203788c
commit c8b7280ec5

@ -23,15 +23,15 @@
"- **Setup**: Import packages and set any required variables\n",
"- **Load data**: Load a dataset and embed it using OpenAI embeddings\n",
"- **Pinecone**\n",
" - *Setup*: Here we setup the Python client for Pinecone. For more details go [here](https://docs.pinecone.io/docs/quickstart)\n",
" - *Setup*: Here we'll set up the Python client for Pinecone. For more details go [here](https://docs.pinecone.io/docs/quickstart)\n",
" - *Index Data*: We'll create an index with namespaces for __titles__ and __content__\n",
" - *Search Data*: We'll test out both namespaces with search queries to confirm it works\n",
"- **Weaviate**\n",
" - *Setup*: Here we setup the Python client for Weaviate. For more details go [here](https://weaviate.io/developers/weaviate/current/client-libraries/python.html)\n",
" - *Setup*: Here we'll set up the Python client for Weaviate. For more details go [here](https://weaviate.io/developers/weaviate/current/client-libraries/python.html)\n",
" - *Index Data*: We'll create an index with __title__ search vectors in it\n",
" - *Search Data*: We'll run a few searches to confirm it works\n",
"- **Qdrant**\n",
" - *Setup*: Here we setup the Python client for Qdrant. For more details go [here](https://github.com/qdrant/qdrant_client)\n",
" - *Setup*: Here we'll set up the Python client for Qdrant. For more details go [here](https://github.com/qdrant/qdrant_client)\n",
" - *Index Data*: We'll create a collection with vectors for __titles__ and __content__\n",
" - *Search Data*: We'll run a few searches to confirm it works\n",
"\n",
@ -55,7 +55,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Here we install the clients for all vector databases\n",
"# We'll need to install the clients for all vector databases\n",
"!pip install pinecone-client\n",
"!pip install weaviate-client\n",
"!pip install qdrant-client"
@ -63,7 +63,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"id": "5be94df6",
"metadata": {},
"outputs": [],
@ -111,7 +111,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"id": "bd99e08e",
"metadata": {},
"outputs": [],
@ -172,32 +172,10 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "0c1c73cb",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Found cached dataset wikipedia (/Users/colin.jarvis/.cache/huggingface/datasets/wikipedia/20220301.simple/2.0.0/aa542ed919df55cc5d3347f42dd4521d05ca68751f50dbc32bae2a7f1e167559)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6aa3a11d70424916915334e267f4964b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"# We'll use the datasets library to pull the Simple Wikipedia dataset for embedding\n",
"dataset = list(load_dataset(\"wikipedia\", \"20220301.simple\")[\"train\"])\n",
@ -207,7 +185,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 6,
"id": "e6ee90ce",
"metadata": {},
"outputs": [
@ -222,15 +200,15 @@
"name": "stderr",
"output_type": "stream",
"text": [
"25024it [00:59, 423.81it/s] "
"25024it [01:06, 377.31it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 17.9 s, sys: 3.15 s, total: 21 s\n",
"Wall time: 1min 1s\n"
"CPU times: user 16.3 s, sys: 2.24 s, total: 18.5 s\n",
"Wall time: 1min 8s\n"
]
},
{
@ -249,7 +227,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"id": "850c7215",
"metadata": {},
"outputs": [
@ -264,7 +242,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"25024it [00:40, 616.58it/s] \n"
"25024it [00:36, 683.22it/s] \n"
]
}
],
@ -275,7 +253,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 122,
"id": "1410daaa",
"metadata": {},
"outputs": [
@ -316,7 +294,7 @@
" <td>https://simple.wikipedia.org/wiki/April</td>\n",
" <td>April</td>\n",
" <td>April is the fourth month of the year in the J...</td>\n",
" <td>[0.0010547508718445897, -0.020757636055350304,...</td>\n",
" <td>[0.001009464613161981, -0.020700545981526375, ...</td>\n",
" <td>[-0.011253940872848034, -0.013491976074874401,...</td>\n",
" <td>0</td>\n",
" </tr>\n",
@ -326,7 +304,7 @@
" <td>https://simple.wikipedia.org/wiki/August</td>\n",
" <td>August</td>\n",
" <td>August (Aug.) is the eighth month of the year ...</td>\n",
" <td>[0.0009623901569284499, 0.0008108559413813055,...</td>\n",
" <td>[0.0009286514250561595, 0.000820168002974242, ...</td>\n",
" <td>[0.0003609954728744924, 0.007262262050062418, ...</td>\n",
" <td>1</td>\n",
" </tr>\n",
@ -336,7 +314,7 @@
" <td>https://simple.wikipedia.org/wiki/Art</td>\n",
" <td>Art</td>\n",
" <td>Art is a creative activity that expresses imag...</td>\n",
" <td>[0.0033528385683894157, 0.006173426751047373, ...</td>\n",
" <td>[0.003393713850528002, 0.0061537534929811954, ...</td>\n",
" <td>[-0.004959689453244209, 0.015772193670272827, ...</td>\n",
" <td>2</td>\n",
" </tr>\n",
@ -346,7 +324,7 @@
" <td>https://simple.wikipedia.org/wiki/A</td>\n",
" <td>A</td>\n",
" <td>A or a is the first letter of the English alph...</td>\n",
" <td>[0.015449387952685356, -0.013746200129389763, ...</td>\n",
" <td>[0.0153952119871974, -0.013759135268628597, 0....</td>\n",
" <td>[0.024894846603274345, -0.022186409682035446, ...</td>\n",
" <td>3</td>\n",
" </tr>\n",
@ -356,7 +334,7 @@
" <td>https://simple.wikipedia.org/wiki/Air</td>\n",
" <td>Air</td>\n",
" <td>Air refers to the Earth's atmosphere. Air is a...</td>\n",
" <td>[0.0222249086946249, -0.020463958382606506, -0...</td>\n",
" <td>[0.02224554680287838, -0.02044147066771984, -0...</td>\n",
" <td>[0.021524671465158463, 0.018522677943110466, -...</td>\n",
" <td>4</td>\n",
" </tr>\n",
@ -380,11 +358,11 @@
"4 Air refers to the Earth's atmosphere. Air is a... \n",
"\n",
" title_vector \\\n",
"0 [0.0010547508718445897, -0.020757636055350304,... \n",
"1 [0.0009623901569284499, 0.0008108559413813055,... \n",
"2 [0.0033528385683894157, 0.006173426751047373, ... \n",
"3 [0.015449387952685356, -0.013746200129389763, ... \n",
"4 [0.0222249086946249, -0.020463958382606506, -0... \n",
"0 [0.001009464613161981, -0.020700545981526375, ... \n",
"1 [0.0009286514250561595, 0.000820168002974242, ... \n",
"2 [0.003393713850528002, 0.0061537534929811954, ... \n",
"3 [0.0153952119871974, -0.013759135268628597, 0.... \n",
"4 [0.02224554680287838, -0.02044147066771984, -0... \n",
"\n",
" content_vector vector_id \n",
"0 [-0.011253940872848034, -0.013491976074874401,... 0 \n",
@ -394,13 +372,13 @@
"4 [0.021524671465158463, 0.018522677943110466, -... 4 "
]
},
"execution_count": 13,
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We then store the result in another dataframe, and prep the data for insertion into a vector DB\n",
"# We will then store the result in another dataframe, and prep the data for insertion into a vector DB\n",
"article_df = pd.DataFrame(dataset)\n",
"article_df['title_vector'] = title_embeddings\n",
"article_df['content_vector'] = dataset_embeddings\n",
@ -444,14 +422,14 @@
"source": [
"### Create Index\n",
"\n",
"First we need to create an index, which we'll call `wikipedia-articles`. Once we have an index, we can create multiple namespaces, which can make a single index searchable for various use cases. For more details, consult [Pinecone documentation](https://docs.pinecone.io/docs/namespaces#:~:text=Pinecone%20allows%20you%20to%20partition,different%20subsets%20of%20your%20index.).\n",
"First we will need to create an index, which we'll call `wikipedia-articles`. Once we have an index, we can create multiple namespaces, which can make a single index searchable for various use cases. For more details, consult [Pinecone documentation](https://docs.pinecone.io/docs/namespaces#:~:text=Pinecone%20allows%20you%20to%20partition,different%20subsets%20of%20your%20index.).\n",
"\n",
"If you want to batch insert to your index in parallel to increase insertion speed then there is a great guide in the Pinecone documentation on [batch inserts in parallel](https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel)."
]
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 108,
"id": "0a71c575",
"metadata": {},
"outputs": [],
@ -483,7 +461,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 124,
"id": "7ea9ad46",
"metadata": {},
"outputs": [
@ -493,7 +471,7 @@
"['wikipedia-articles']"
]
},
"execution_count": 20,
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
@ -516,7 +494,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 126,
"id": "5daeba00",
"metadata": {},
"outputs": [
@ -529,7 +507,7 @@
}
],
"source": [
"# Upsert content vectors in content namespace\n",
"# Upsert content vectors in content namespace - this can take a few minutes\n",
"print(\"Uploading vectors to content namespace..\")\n",
"for batch_df in df_batcher(article_df):\n",
" index.upsert(vectors=zip(batch_df.vector_id, batch_df.content_vector), namespace='content')"
@ -537,7 +515,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 127,
"id": "5fc1b083",
"metadata": {},
"outputs": [
@ -550,7 +528,7 @@
}
],
"source": [
"# Upsert title vectors in title namespace\n",
"# Upsert title vectors in title namespace - this can also take a few minutes\n",
"print(\"Uploading vectors to title namespace..\")\n",
"for batch_df in df_batcher(article_df):\n",
" index.upsert(vectors=zip(batch_df.vector_id, batch_df.title_vector), namespace='title')"
@ -558,7 +536,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 128,
"id": "f90c7fba",
"metadata": {},
"outputs": [
@ -572,7 +550,7 @@
" 'total_vector_count': 50000}"
]
},
"execution_count": 24,
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
@ -594,7 +572,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": null,
"id": "d701b3c7",
"metadata": {},
"outputs": [],
@ -606,7 +584,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 72,
"id": "3c8c2aa1",
"metadata": {},
"outputs": [],
@ -652,54 +630,20 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": null,
"id": "67b3584d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Most similar results to modern art in Europe in \"title\" namespace:\n",
"\n",
"Museum of Modern Art (score = 0.875286043)\n",
"Western Europe (score = 0.867383599)\n",
"Renaissance art (score = 0.864250064)\n",
"Pop art (score = 0.860506058)\n",
"Northern Europe (score = 0.854678154)\n",
"\n",
"\n"
]
}
],
"outputs": [],
"source": [
"query_output = query_article('modern art in Europe','title')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": null,
"id": "3e7ac79b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Most similar results to Famous battles in Scottish history in \"content\" namespace:\n",
"\n",
"Battle of Bannockburn (score = 0.869324744)\n",
"Wars of Scottish Independence (score = 0.861479)\n",
"1651 (score = 0.852555931)\n",
"First War of Scottish Independence (score = 0.84969604)\n",
"Robert I of Scotland (score = 0.846192539)\n",
"\n",
"\n"
]
}
],
"outputs": [],
"source": [
"content_query_output = query_article(\"Famous battles in Scottish history\",'content')"
]
@ -728,7 +672,7 @@
"source": [
"### Setup\n",
"\n",
"To get Weaviate running locally we used Docker and followed the instructions contained the Weaviate documentation here: https://weaviate.io/developers/weaviate/current/installation/docker-compose.html\n",
"To get Weaviate running locally we will use Docker and follow the instructions contained in the Weaviate documentation here: https://weaviate.io/developers/weaviate/current/installation/docker-compose.html\n",
"\n",
"For an example docker-compose.yaml file please refer to `./weaviate/docker-compose.yaml` in this repo\n",
"\n",
@ -737,7 +681,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 113,
"id": "b9ea472d",
"metadata": {},
"outputs": [],
@ -747,10 +691,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 114,
"id": "13be220d",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"{'classes': []}"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.schema.delete_all()\n",
"client.schema.get()"
@ -758,10 +713,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 115,
"id": "73d33184",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.is_ready()"
]
@ -782,7 +748,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 116,
"id": "e868d143",
"metadata": {},
"outputs": [
@ -824,7 +790,7 @@
" 'vectorizer': 'none'}]}"
]
},
"execution_count": 36,
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
@ -854,7 +820,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 117,
"id": "786d437f",
"metadata": {},
"outputs": [
@ -895,7 +861,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 118,
"id": "3658693c",
"metadata": {},
"outputs": [
@ -903,26 +869,28 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Cave Story\n",
"is a freeware video game released in 2004 for PC. It was thought of and created over five years by Daisuke Amaya, known by his pseudonym, or art name, Pixel. The game is an action-adventure game, and is similar to the Castlevania and Metroid games. It was first made in Japanese, and was translated to English by the fan translating group, Aeon Genesis.\n",
"Kim Jong-nam\n",
"Kim Jong-nam (May 10, 1971 - February 13, 2017) was the eldest son of Kim Jong-il, the former leader of North Korea.\n",
"\n",
"He tried to enter Japan using a fake passport in May 2001. This was to visit Disneyland. This caused his father to not approve of him. Kim Jong-nam's younger half-brother Kim Jong-un was made the heir in September 2010.\n",
"\n",
"In June 2010, Kim Jong-nam gave a brief interview to the Associated Press in Macau. He told the reporter that he had \"no plans\" to defect to Europe. The press had recently said this. Kim Jong-nam lived in an apartment on the southern tip of Macau's Coloane Island until 2007. An anonymous South Korean official reported in October 2010 that Jong-nam had not lived in Macau for \"months\", and now goes between China and \"another country.\"\n",
"\n",
"When his father died, Kim Jong-nam did not attend the funeral. This was to avoid rumours on the succession.\n",
"\n",
"He was assassinated in Malaysia on February 13, 2017, which is believed to be ordered by his half-brother Kim Jong-un.\n",
"\n",
"Personal life\n",
"The South Korean newspaper The Chosun Ilbo said that Kim Jong-nam has two wives, at least one mistress, and several children. His first wife Shin Jong-hui (born c. 1980) and their son Kum-sol (born c. 1996) live at a home called Dragon Villa on the northern outskirts of Beijing. His second wife Lee Hye-kyong (born c. 1970), their son Han-sol (born c. 1995) and their daughter Sol-hui (born c. 1998) live in an apartment building in Macau. Jong-nam's mistress, former Air Koryo flight attendant So Yong-la (born c. 1980), also lives in Macau. \n",
"\n",
"References \n",
"Jong-nam is often given attention by the media for his gambling and extravagant spending.\n",
"\n",
"Notes\n",
"References\n",
"\n",
"2004 video games\n",
"Amiga games\n",
"Dreamcast games\n",
"Freeware games\n",
"Indie video games\n",
"Nintendo 3DS games\n",
"Nintendo Switch games\n",
"MacOS games\n",
"Platform games\n",
"Sega Genesis games\n",
"Video games developed in Japan\n",
"Wii games\n",
"Windows games\n"
"1971 births\n",
"2017 deaths\n",
"Assassinated people\n",
"North Korean politicians\n"
]
},
{
@ -931,7 +899,7 @@
"{'Aggregate': {'Article': [{'meta': {'count': 25000}}]}}"
]
},
"execution_count": 47,
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
@ -960,7 +928,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 119,
"id": "5acd5437",
"metadata": {},
"outputs": [],
@ -986,7 +954,7 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 120,
"id": "15def653",
"metadata": {},
"outputs": [
@ -1013,7 +981,7 @@
"17. Impressionism (Score: 0.919)\n",
"18. Bauhaus (Score: 0.919)\n",
"19. Surrealism (Score: 0.919)\n",
"20. Expressionism (Score: 0.918)\n"
"20. Expressionism (Score: 0.919)\n"
]
}
],
@ -1027,7 +995,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 85,
"id": "93c4a696",
"metadata": {},
"outputs": [
@ -1035,11 +1003,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1. Historic Scotland (Score: 0.947)\n",
"1. Historic Scotland (Score: 0.946)\n",
"2. First War of Scottish Independence (Score: 0.946)\n",
"3. Battle of Bannockburn (Score: 0.946)\n",
"4. Wars of Scottish Independence (Score: 0.944)\n",
"5. Second War of Scottish Independence (Score: 0.94)\n",
"5. Second War of Scottish Independence (Score: 0.939)\n",
"6. List of Scottish monarchs (Score: 0.937)\n",
"7. Scottish Borders (Score: 0.932)\n",
"8. Braveheart (Score: 0.929)\n",
@ -1095,7 +1063,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 99,
"id": "76d697e9",
"metadata": {
"ExecuteTime": {
@ -1110,7 +1078,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 100,
"id": "1deeb539",
"metadata": {
"ExecuteTime": {
@ -1125,7 +1093,7 @@
"CollectionsResponse(collections=[])"
]
},
"execution_count": 8,
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
@ -1143,12 +1111,12 @@
"\n",
"Qdrant stores data in __collections__ where each object is described by at least one vector and may contain an additional metadata called __payload__. Our collection will be called **Articles** and each object will be described by both **title** and **content** vectors.\n",
"\n",
"We're going to be using an official [qdrant-client](https://github.com/qdrant/qdrant_client) package that has all the utility methods already built-in."
"We'll be using an official [qdrant-client](https://github.com/qdrant/qdrant_client) package that has all the utility methods already built-in."
]
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 101,
"id": "1a84ee1d",
"metadata": {
"ExecuteTime": {
@ -1163,7 +1131,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 102,
"id": "00876f92",
"metadata": {
"ExecuteTime": {
@ -1192,7 +1160,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": null,
"id": "f24e76ab",
"metadata": {
"ExecuteTime": {
@ -1200,18 +1168,7 @@
"start_time": "2023-01-18T09:36:24.108867Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"qdrant.upsert(\n",
" collection_name='Articles',\n",
@ -1231,7 +1188,7 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": null,
"id": "d1188a12",
"metadata": {
"ExecuteTime": {
@ -1239,18 +1196,7 @@
"start_time": "2023-01-18T09:58:13.816248Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"CountResult(count=250)"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Check the collection size to make sure all the points have been stored\n",
"qdrant.count(collection_name='Articles')"
@ -1263,12 +1209,12 @@
"source": [
"### Search Data\n",
"\n",
"Once the data is put into Qdrant we can start querying the collection for the closest vectors. We may provide an additional parameter `vector_name` to switch from title to content based search."
"Once the data is put into Qdrant we will start querying the collection for the closest vectors. We may provide an additional parameter `vector_name` to switch from title to content based search."
]
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": null,
"id": "f1bac4ef",
"metadata": {
"ExecuteTime": {
@ -1299,7 +1245,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": null,
"id": "aa92f3d3",
"metadata": {
"ExecuteTime": {
@ -1307,34 +1253,7 @@
"start_time": "2023-01-18T09:50:35.711020Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0. Art (Score: 0.841)\n",
"1. Europe (Score: 0.839)\n",
"2. Italy (Score: 0.816)\n",
"3. Architecture (Score: 0.815)\n",
"4. Madrid (Score: 0.815)\n",
"5. France (Score: 0.812)\n",
"6. Belgium (Score: 0.808)\n",
"7. Austria (Score: 0.802)\n",
"8. London (Score: 0.799)\n",
"9. History (Score: 0.797)\n",
"10. Creativity (Score: 0.796)\n",
"11. Archaeology (Score: 0.795)\n",
"12. Cartography (Score: 0.794)\n",
"13. Denmark (Score: 0.793)\n",
"14. Finland (Score: 0.79)\n",
"15. English (Score: 0.789)\n",
"16. Catharism (Score: 0.788)\n",
"17. Dublin (Score: 0.787)\n",
"18. Ireland (Score: 0.787)\n",
"19. Japan (Score: 0.787)\n"
]
}
],
"outputs": [],
"source": [
"query_results = query_qdrant('modern art in Europe', 'Articles')\n",
"for i, article in enumerate(query_results):\n",
@ -1343,7 +1262,7 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": null,
"id": "7ed116b8",
"metadata": {
"ExecuteTime": {
@ -1351,36 +1270,9 @@
"start_time": "2023-01-18T09:52:55.248029Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1. History (Score: 0.797)\n",
"2. Dublin (Score: 0.787)\n",
"3. Ireland (Score: 0.786)\n",
"4. History of Australia (Score: 0.782)\n",
"5. Historian (Score: 0.778)\n",
"6. Belgium (Score: 0.776)\n",
"7. Black pudding (Score: 0.773)\n",
"8. London (Score: 0.769)\n",
"9. History of Spain (Score: 0.768)\n",
"10. Cartography (Score: 0.763)\n",
"11. March (Score: 0.762)\n",
"12. France (Score: 0.761)\n",
"13. Bubonic plague (Score: 0.76)\n",
"14. Great Lakes (Score: 0.759)\n",
"15. Inch (Score: 0.758)\n",
"16. Dissolution of the monasteries (Score: 0.758)\n",
"17. Austria (Score: 0.757)\n",
"18. English (Score: 0.757)\n",
"19. British English (Score: 0.757)\n",
"20. Armenia (Score: 0.756)\n"
]
}
],
"outputs": [],
"source": [
"# This time we're going to query using content vector\n",
"# This time we'll query using content vector\n",
"query_results = query_qdrant('Famous battles in Scottish history', 'Articles', 'content')\n",
"for i, article in enumerate(query_results):\n",
" print(f'{i + 1}. {article.payload[\"title\"]} (Score: {round(article.score, 3)})')"
@ -1397,9 +1289,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "vectordb",
"language": "python",
"name": "python3"
"name": "vectordb"
},
"language_info": {
"codemirror_mode": {

Loading…
Cancel
Save