Merge pull request #68 from openai/ted/update-embedding-examples

updates Q&A example with latest embedding and completion models
pull/72/head
Ted Sanders 1 year ago committed by GitHub
commit f607de50cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -19,13 +19,14 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import openai\n",
"import numpy as np\n",
"import openai\n",
"import pandas as pd\n",
"import pickle\n",
"from transformers import GPT2TokenizerFast\n",
"import tiktoken\n",
"\n",
"COMPLETIONS_MODEL = \"text-davinci-002\""
"COMPLETIONS_MODEL = \"text-davinci-003\"\n",
"EMBEDDING_MODEL = \"text-embedding-ada-002\""
]
},
{
@ -45,7 +46,7 @@
{
"data": {
"text/plain": [
"\"The 2020 Summer Olympics men's high jump was won by Mariusz Przybylski of Poland.\""
"\"Marcelo Chierighini of Brazil won the gold medal in the men's high jump at the 2020 Summer Olympics.\""
]
},
"execution_count": 2,
@ -60,19 +61,17 @@
" prompt=prompt,\n",
" temperature=0,\n",
" max_tokens=300,\n",
" top_p=1,\n",
" frequency_penalty=0,\n",
" presence_penalty=0,\n",
" model=COMPLETIONS_MODEL\n",
")[\"choices\"][0][\"text\"].strip(\" \\n\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "47204cce-a7d5-4c81-ab6e-53323026e08c",
"metadata": {},
"source": [
"Mariusz Przybylski is a professional footballer from Poland, and not much of a high jumper! Evidently GPT-3 needs some assistance here. \n",
"Marcelo is a gold medalist swimmer, and, we assume, not much of a high jumper! Evidently GPT-3 needs some assistance here. \n",
"\n",
"The first issue to tackle is that the model is hallucinating an answer rather than telling us \"I don't know\". This is bad because it makes it hard to trust the answer that the model gives us! \n",
"\n",
@ -108,9 +107,6 @@
" prompt=prompt,\n",
" temperature=0,\n",
" max_tokens=300,\n",
" top_p=1,\n",
" frequency_penalty=0,\n",
" presence_penalty=0,\n",
" model=COMPLETIONS_MODEL\n",
")[\"choices\"][0][\"text\"].strip(\" \\n\")"
]
@ -132,7 +128,7 @@
{
"data": {
"text/plain": [
"\"Gianmarco Tamberi and Mutaz Essa Barshim won the 2020 Summer Olympics men's high jump.\""
"'Gianmarco Tamberi and Mutaz Essa Barshim emerged as joint winners of the event.'"
]
},
"execution_count": 4,
@ -245,55 +241,55 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Nordic combined at the 2016 Winter Youth Olympics</th>\n",
" <th>Summary</th>\n",
" <td>Nordic combined at the 2016 Winter Youth Olymp...</td>\n",
" <td>56</td>\n",
" <th>Jamaica at the 2020 Summer Olympics</th>\n",
" <th>Swimming</th>\n",
" <td>Jamaican swimmers further achieved qualifying ...</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Morocco at the 2020 Summer Olympics</th>\n",
" <th>Judo</th>\n",
" <td>Morocco qualified two female judoka for each o...</td>\n",
" <td>106</td>\n",
" <th>Archery at the 2020 Summer Olympics Women's individual</th>\n",
" <th>Background</th>\n",
" <td>This is the 13th consecutive appearance of the...</td>\n",
" <td>136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Guinea-Bissau at the 2020 Summer Olympics</th>\n",
" <th>Wrestling</th>\n",
" <td>Guinea-Bissau qualified two wrestlers for each...</td>\n",
" <td>69</td>\n",
" <th>Germany at the 2020 Summer Olympics</th>\n",
" <th>Sport climbing</th>\n",
" <td>Germany entered two sport climbers into the Ol...</td>\n",
" <td>98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Rome bid for the 2020 Summer Olympics</th>\n",
" <th>History</th>\n",
" <td>The Italian National Olympic Committee (CONI) ...</td>\n",
" <td>738</td>\n",
" <th>Cycling at the 2020 Summer Olympics Women's BMX racing</th>\n",
" <th>Competition format</th>\n",
" <td>The competition was a three-round tournament, ...</td>\n",
" <td>215</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Italy at the 2020 Summer Olympics</th>\n",
" <th>Slalom</th>\n",
" <td>Italian canoeists qualified one boat for each ...</td>\n",
" <td>76</td>\n",
" <th>Volleyball at the 2020 Summer Olympics Men's tournament</th>\n",
" <th>Format</th>\n",
" <td>The preliminary round was a competition betwee...</td>\n",
" <td>104</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" content \\\n",
"title heading \n",
"Nordic combined at the 2016 Winter Youth Olympics Summary Nordic combined at the 2016 Winter Youth Olymp... \n",
"Morocco at the 2020 Summer Olympics Judo Morocco qualified two female judoka for each o... \n",
"Guinea-Bissau at the 2020 Summer Olympics Wrestling Guinea-Bissau qualified two wrestlers for each... \n",
"Rome bid for the 2020 Summer Olympics History The Italian National Olympic Committee (CONI) ... \n",
"Italy at the 2020 Summer Olympics Slalom Italian canoeists qualified one boat for each ... \n",
" content \\\n",
"title heading \n",
"Jamaica at the 2020 Summer Olympics Swimming Jamaican swimmers further achieved qualifying ... \n",
"Archery at the 2020 Summer Olympics Women's i... Background This is the 13th consecutive appearance of the... \n",
"Germany at the 2020 Summer Olympics Sport climbing Germany entered two sport climbers into the Ol... \n",
"Cycling at the 2020 Summer Olympics Women's B... Competition format The competition was a three-round tournament, ... \n",
"Volleyball at the 2020 Summer Olympics Men's ... Format The preliminary round was a competition betwee... \n",
"\n",
" tokens \n",
"title heading \n",
"Nordic combined at the 2016 Winter Youth Olympics Summary 56 \n",
"Morocco at the 2020 Summer Olympics Judo 106 \n",
"Guinea-Bissau at the 2020 Summer Olympics Wrestling 69 \n",
"Rome bid for the 2020 Summer Olympics History 738 \n",
"Italy at the 2020 Summer Olympics Slalom 76 "
" tokens \n",
"title heading \n",
"Jamaica at the 2020 Summer Olympics Swimming 51 \n",
"Archery at the 2020 Summer Olympics Women's i... Background 136 \n",
"Germany at the 2020 Summer Olympics Sport climbing 98 \n",
"Cycling at the 2020 Summer Olympics Women's B... Competition format 215 \n",
"Volleyball at the 2020 Summer Olympics Men's ... Format 104 "
]
},
"execution_count": 5,
@ -326,36 +322,17 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "4b874907-5109-4eef-ad9a-add4367925a3",
"metadata": {},
"outputs": [],
"source": [
"MODEL_NAME = \"curie\"\n",
"\n",
"DOC_EMBEDDINGS_MODEL = f\"text-search-{MODEL_NAME}-doc-001\"\n",
"QUERY_EMBEDDINGS_MODEL = f\"text-search-{MODEL_NAME}-query-001\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ba475f30-ef7f-431c-b60d-d5970b62ad09",
"metadata": {},
"outputs": [],
"source": [
"def get_embedding(text: str, model: str) -> list[float]:\n",
"def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:\n",
" result = openai.Embedding.create(\n",
" model=model,\n",
" input=text\n",
" )\n",
" return result[\"data\"][0][\"embedding\"]\n",
"\n",
"def get_doc_embedding(text: str) -> list[float]:\n",
" return get_embedding(text, DOC_EMBEDDINGS_MODEL)\n",
"\n",
"def get_query_embedding(text: str) -> list[float]:\n",
" return get_embedding(text, QUERY_EMBEDDINGS_MODEL)\n",
"\n",
"def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:\n",
" \"\"\"\n",
" Create an embedding for each row in the dataframe using the OpenAI Embeddings API.\n",
@ -363,13 +340,13 @@
" Return a dictionary that maps between each embedding vector and the index of the row that it corresponds to.\n",
" \"\"\"\n",
" return {\n",
" idx: get_doc_embedding(r.content.replace(\"\\n\", \" \")) for idx, r in df.iterrows()\n",
" idx: get_embedding(r.content) for idx, r in df.iterrows()\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"id": "737266aa-cbe7-4691-87c1-fce8a31632f1",
"metadata": {},
"outputs": [],
@ -399,7 +376,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"id": "ab50bfca-cb02-41c6-b338-4400abe1d86e",
"metadata": {},
"outputs": [],
@ -408,12 +385,12 @@
"\n",
"# ===== OR, uncomment the below line to recaculate the embeddings from scratch. ========\n",
"\n",
"# context_embeddings = compute_doc_embeddings(df)"
"# document_embeddings = compute_doc_embeddings(df)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"id": "b9a8c713-c8a9-47dc-85a4-871ee1395566",
"metadata": {},
"outputs": [
@ -421,7 +398,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"('2020 Summer Olympics', 'Summary') : [-0.00089670566, 0.0027141054, -0.00030984893, 0.0066024954, -0.009860336]... (4096 entries)\n"
"('2020 Summer Olympics', 'Summary') : [0.0037565305829048, -0.0061981128528714, -0.0087078781798481, -0.0071364338509738, -0.0025227521546185]... (1536 entries)\n"
]
}
],
@ -447,15 +424,16 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 10,
"id": "dcd680e9-f194-4180-b14f-fc357498eb92",
"metadata": {},
"outputs": [],
"source": [
"def vector_similarity(x: list[float], y: list[float]) -> float:\n",
" \"\"\"\n",
" We could use cosine similarity or dot product to calculate the similarity between vectors.\n",
" In practice, we have found it makes little difference. \n",
" Returns the similarity between two vectors.\n",
" \n",
" Because OpenAI Embeddings are normalized to length 1, the cosine similarity is the same as the dot product.\n",
" \"\"\"\n",
" return np.dot(np.array(x), np.array(y))\n",
"\n",
@ -466,7 +444,7 @@
" \n",
" Return the list of document sections, sorted by relevance in descending order.\n",
" \"\"\"\n",
" query_embedding = get_query_embedding(query)\n",
" query_embedding = get_embedding(query)\n",
" \n",
" document_similarities = sorted([\n",
" (vector_similarity(query_embedding, doc_embedding), doc_index) for doc_index, doc_embedding in contexts.items()\n",
@ -477,26 +455,27 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 11,
"id": "e3a27d73-f47f-480d-b336-079414f749cb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(0.42962625596241333,\n",
"[(0.884864308450606,\n",
" (\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')),\n",
" (0.40670511466655435,\n",
" (\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')),\n",
" (0.40469276614514266,\n",
" (\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Background')),\n",
" (0.4042442976710604,\n",
" (0.8633938355935518,\n",
" (\"Athletics at the 2020 Summer Olympics Men's pole vault\", 'Summary')),\n",
" (0.861639730583851,\n",
" (\"Athletics at the 2020 Summer Olympics Men's long jump\", 'Summary')),\n",
" (0.8560523857031264,\n",
" (\"Athletics at the 2020 Summer Olympics Men's triple jump\", 'Summary')),\n",
" (0.4021923631988294,\n",
" (\"Athletics at the 2020 Summer Olympics Women's long jump\", 'Summary'))]"
" (0.8469039130441247,\n",
" (\"Athletics at the 2020 Summer Olympics Men's 110 metres hurdles\",\n",
" 'Summary'))]"
]
},
"execution_count": 12,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@ -507,26 +486,27 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 12,
"id": "729c2ce7-8540-4ab2-bb3a-76c4dfcb689c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(0.42879291463492475,\n",
" (\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')),\n",
" (0.4194122846175017,\n",
"[(0.8726165220223294,\n",
" (\"Athletics at the 2020 Summer Olympics Women's long jump\", 'Summary')),\n",
" (0.41152657076657995,\n",
" (0.8682196158313358,\n",
" (\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')),\n",
" (0.863191526370672,\n",
" (\"Athletics at the 2020 Summer Olympics Women's pole vault\", 'Summary')),\n",
" (0.8609374262115406,\n",
" (\"Athletics at the 2020 Summer Olympics Women's triple jump\", 'Summary')),\n",
" (0.4096367709206329,\n",
" (\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')),\n",
" (0.4059521236876147,\n",
" (\"Athletics at the 2020 Summer Olympics Women's pole vault\", 'Summary'))]"
" (0.8581515607285688,\n",
" (\"Athletics at the 2020 Summer Olympics Women's 100 metres hurdles\",\n",
" 'Summary'))]"
]
},
"execution_count": 13,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@ -536,11 +516,12 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "3cf71fae-abb1-46b2-a483-c1b2f1a915c2",
"metadata": {},
"source": [
"We can see that the most relevant document sections for each question are the summaries for the Men's and Women's high jump competitions - which is exactly what we would expect."
"We can see that the most relevant document sections for each question include the summaries for the Men's and Women's high jump competitions - which is exactly what we would expect."
]
},
{
@ -555,7 +536,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 13,
"id": "b763ace2-1946-48e0-8ff1-91ba335d47a0",
"metadata": {},
"outputs": [
@ -565,7 +546,7 @@
"'Context separator contains 3 tokens'"
]
},
"execution_count": 14,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@ -573,16 +554,17 @@
"source": [
"MAX_SECTION_LEN = 500\n",
"SEPARATOR = \"\\n* \"\n",
"ENCODING = \"cl100k_base\" # encoding for text-embedding-ada-002\n",
"\n",
"tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")\n",
"separator_len = len(tokenizer.tokenize(SEPARATOR))\n",
"encoding = tiktoken.get_encoding(ENCODING)\n",
"separator_len = len(encoding.encode(SEPARATOR))\n",
"\n",
"f\"Context separator contains {separator_len} tokens\""
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 14,
"id": "0c5c0509-eeb9-4552-a5d4-6ace04ef73dd",
"metadata": {},
"outputs": [],
@ -619,7 +601,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 15,
"id": "f614045a-3917-4b28-9643-7e0c299ec1a7",
"metadata": {},
"outputs": [
@ -627,18 +609,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Selected 3 document sections:\n",
"(\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')\n",
"Selected 2 document sections:\n",
"(\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's triple jump\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's long jump\", 'Summary')\n",
"===\n",
" Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don't know.\"\n",
"\n",
"Context:\n",
"\n",
"* The women's high jump event at the 2020 Summer Olympics took place on 5 and 7 August 2021 at the Japan National Stadium. Even though 32 athletes qualified through the qualification system for the Games, only 31 took part in the competition. This was the 22nd appearance of the event, having appeared at every Olympics since women's athletics was introduced in 1928.\n",
"* The men's high jump event at the 2020 Summer Olympics took place between 30 July and 1 August 2021 at the Olympic Stadium. 33 athletes from 24 nations competed; the total possible number depended on how many nations would use universality places to enter athletes in addition to the 32 qualifying through mark or ranking (no universality places were used in 2021). Italian athlete Gianmarco Tamberi along with Qatari athlete Mutaz Essa Barshim emerged as joint winners of the event following a tie between both of them as they cleared 2.37m. Both Tamberi and Barshim agreed to share the gold medal in a rare instance where the athletes of different nations had agreed to share the same medal in the history of Olympics. Barshim in particular was heard to ask a competition official \"Can we have two golds?\" in response to being offered a 'jump off'. Maksim Nedasekau of Belarus took bronze. The medals were the first ever in the men's high jump for Italy and Belarus, the first gold in the men's high jump for Italy and Qatar, and the third consecutive medal in the men's high jump for Qatar (all by Barshim). Barshim became only the second man to earn three medals in high jump, joining Patrik Sjöberg of Sweden (1984 to 1992).\n",
"* The men's triple jump event at the 2020 Summer Olympics took place between 3 and 5 August 2021 at the Japan National Stadium. Approximately 35 athletes were expected to compete; the exact number was dependent on how many nations use universality places to enter athletes in addition to the 32 qualifying through time or ranking (2 universality places were used in 2016). 32 athletes from 19 nations competed. Pedro Pichardo of Portugal won the gold medal, the nation's second victory in the men's triple jump (after Nelson Évora in 2008). China's Zhu Yaming took silver, while Hugues Fabrice Zango earned Burkina Faso's first Olympic medal in any event.\n",
"* The men's long jump event at the 2020 Summer Olympics took place between 31 July and 2 August 2021 at the Japan National Stadium. Approximately 35 athletes were expected to compete; the exact number was dependent on how many nations use universality places to enter athletes in addition to the 32 qualifying through time or ranking (1 universality place was used in 2016). 31 athletes from 20 nations competed. Miltiadis Tentoglou won the gold medal, Greece's first medal in the men's long jump. Cuban athletes Juan Miguel Echevarría and Maykel Massó earned silver and bronze, respectively, the nation's first medals in the event since 2008.\n",
"\n",
" Q: Who won the 2020 Summer Olympics men's high jump?\n",
" A:\n"
@ -671,7 +651,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 16,
"id": "b0edfec7-9243-4573-92e0-253d31c771ad",
"metadata": {},
"outputs": [],
@ -686,7 +666,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 17,
"id": "9c1c9a69-848e-4099-a90d-c8da36c153d5",
"metadata": {},
"outputs": [],
@ -716,7 +696,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 18,
"id": "c233e449-bf33-4c9e-b095-6a4dd278c8fd",
"metadata": {},
"outputs": [
@ -724,19 +704,18 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Selected 3 document sections:\n",
"(\"Athletics at the 2020 Summer Olympics Women's high jump\", 'Summary')\n",
"Selected 2 document sections:\n",
"(\"Athletics at the 2020 Summer Olympics Men's high jump\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's triple jump\", 'Summary')\n"
"(\"Athletics at the 2020 Summer Olympics Men's long jump\", 'Summary')\n"
]
},
{
"data": {
"text/plain": [
"'Gianmarco Tamberi and Mutaz Essa Barshim emerged as joint winners of the event following a tie between both of them as they cleared 2.37m.'"
"'Gianmarco Tamberi and Mutaz Essa Barshim emerged as joint winners of the event following a tie between both of them as they cleared 2.37m. Both Tamberi and Barshim agreed to share the gold medal.'"
]
},
"execution_count": 19,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@ -761,7 +740,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 19,
"id": "1127867b-2884-44bb-9439-0e8ae171c835",
"metadata": {},
"outputs": [
@ -770,7 +749,7 @@
"output_type": "stream",
"text": [
"Selected 1 document sections:\n",
"('2020 Summer Olympics', 'Postponement to 2021')\n",
"('Concerns and controversies at the 2020 Summer Olympics', 'Summary')\n",
"\n",
"Q: Why was the 2020 Summer Olympics originally postponed?\n",
"A: The 2020 Summer Olympics were originally postponed due to the COVID-19 pandemic.\n"
@ -786,7 +765,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 20,
"id": "720d9e0b-b189-4101-91ee-babf736199e6",
"metadata": {},
"outputs": [
@ -794,8 +773,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Selected 1 document sections:\n",
"Selected 2 document sections:\n",
"('2020 Summer Olympics medal table', 'Summary')\n",
"('List of 2020 Summer Olympics medal winners', 'Summary')\n",
"\n",
"Q: In the 2020 Summer Olympics, how many gold medals did the country which won the most medals win?\n",
"A: The United States won the most medals overall, with 113, and the most gold medals, with 39.\n"
@ -811,7 +791,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 21,
"id": "4e8e51cc-e4eb-4557-9e09-2929d4df5b7f",
"metadata": {},
"outputs": [
@ -819,13 +799,12 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Selected 3 document sections:\n",
"Selected 2 document sections:\n",
"(\"Athletics at the 2020 Summer Olympics Men's shot put\", 'Summary')\n",
"(\"Athletics at the 2020 Summer Olympics Men's shot put\", 'Background')\n",
"(\"Athletics at the 2020 Summer Olympics Men's hammer throw\", 'Competition format')\n",
"(\"Athletics at the 2020 Summer Olympics Men's discus throw\", 'Summary')\n",
"\n",
"Q: What was unusual about the mens shotput competition?\n",
"A: The same three competitors received the same medals in back-to-back editions of an the same individual event.\n"
"A: The same three competitors received the same medals in back-to-back editions of the same individual event.\n"
]
}
],
@ -838,7 +817,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 22,
"id": "37c83519-e3c6-4c44-8b4a-98cbb3a5f5ba",
"metadata": {},
"outputs": [
@ -846,11 +825,12 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Selected 1 document sections:\n",
"Selected 2 document sections:\n",
"('Italy at the 2020 Summer Olympics', 'Summary')\n",
"('San Marino at the 2020 Summer Olympics', 'Summary')\n",
"\n",
"Q: In the 2020 Summer Olympics, how many silver medals did Italy win?\n",
"A: 10\n"
"A: 10 silver medals.\n"
]
}
],
@ -871,7 +851,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 23,
"id": "26a1a9ef-e1ee-4f80-a1b1-6164ccfa5bac",
"metadata": {},
"outputs": [
@ -879,10 +859,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Selected 3 document sections:\n",
"Selected 4 document sections:\n",
"('France at the 2020 Summer Olympics', 'Taekwondo')\n",
"('2020 Summer Olympics medal table', 'Medal count')\n",
"('Taekwondo at the 2020 Summer Olympics Qualification', 'Qualification summary')\n",
"('2020 Summer Olympics medal table', 'Medal count')\n",
"(\"Taekwondo at the 2020 Summer Olympics Men's 80 kg\", 'Competition format')\n",
"\n",
"Q: What is the total number of medals won by France, multiplied by the number of Taekwondo medals given out to all countries?\n",
"A: I don't know.\n"
@ -898,7 +879,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 24,
"id": "9fba8a63-eb81-4661-ae17-59bb5e2933d6",
"metadata": {},
"outputs": [
@ -906,14 +887,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Selected 7 document sections:\n",
"('Chile at the 2020 Summer Olympics', 'Mountain biking')\n",
"('South Korea at the 2020 Summer Olympics', 'Sport climbing')\n",
"(\"Cycling at the 2020 Summer Olympics Men's cross-country\", 'Competition format')\n",
"Selected 3 document sections:\n",
"(\"Sport climbing at the 2020 Summer Olympics Men's combined\", 'Route-setting')\n",
"(\"Ski mountaineering at the 2020 Winter Youth Olympics Boys' individual\", 'Summary')\n",
"(\"Cycling at the 2020 Summer Olympics Women's cross-country\", 'Competition format')\n",
"('Portugal at the 2020 Summer Olympics', 'Mountain biking')\n",
"('Slovenia at the 2020 Summer Olympics', 'Mountain biking')\n",
"(\"Ski mountaineering at the 2020 Winter Youth Olympics Girls' individual\", 'Summary')\n",
"\n",
"Q: What is the tallest mountain in the world?\n",
"A: I don't know.\n"
@ -929,7 +906,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 25,
"id": "2d4c693b-cdb9-4f4c-bd1b-f77b29097a1f",
"metadata": {},
"outputs": [
@ -937,15 +914,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Selected 8 document sections:\n",
"Selected 2 document sections:\n",
"(\"Gymnastics at the 2020 Summer Olympics Women's trampoline\", 'Summary')\n",
"(\"Rowing at the 2020 Summer Olympics Women's quadruple sculls\", 'Summary')\n",
"(\"Cycling at the 2020 Summer Olympics Women's sprint\", 'Summary')\n",
"(\"Cycling at the 2020 Summer Olympics Women's team sprint\", 'Summary')\n",
"(\"Wrestling at the 2020 Summer Olympics Women's freestyle 62 kg\", 'Summary')\n",
"(\"Cycling at the 2020 Summer Olympics Women's BMX freestyle\", 'Summary')\n",
"(\"Rowing at the 2020 Summer Olympics Women's lightweight double sculls\", 'Summary')\n",
"(\"Wrestling at the 2020 Summer Olympics Women's freestyle 68 kg\", 'Summary')\n",
"('Equestrian at the 2020 Summer Olympics Team jumping', 'Summary')\n",
"\n",
"Q: Who won the grimblesplatch competition at the 2020 Summer Olympic games?\n",
"A: I don't know.\n"

Loading…
Cancel
Save