deduplicate search results

pull/519/head
simonpfish 1 year ago
parent 38c5742ce6
commit c1946c66b7

@ -44,17 +44,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"%env NEWS_API_KEY = YOUR_NEWS_API_KEY\n"
"%env NEWS_API_KEY = YOUR_API_KEY\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -66,13 +66,14 @@
"import openai # for using GPT and getting embeddings\n",
"import os # for loading environment variables\n",
"import requests # for making the API requests\n",
"from tqdm import tqdm # for printing progress bars\n",
"from tqdm.notebook import tqdm # for printing progress bars\n",
"\n",
"# Load environment variables\n",
"news_api_key = os.getenv(\"NEWS_API_KEY\")\n",
"\n",
"GPT_MODEL = \"gpt-3.5-turbo\"\n",
"\n",
"\n",
"# Helper functions\n",
"def json_gpt(input: str):\n",
" completion = openai.ChatCompletion.create(\n",
@ -91,9 +92,8 @@
"\n",
"\n",
"def embeddings(input: list[str]) -> list[list[str]]:\n",
" response = openai.Embedding.create(\n",
" model=\"text-embedding-ada-002\", input=input)\n",
" return [data.embedding for data in response.data]\n"
" response = openai.Embedding.create(model=\"text-embedding-ada-002\", input=input)\n",
" return [data.embedding for data in response.data]"
]
},
{
@ -108,7 +108,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -126,9 +126,40 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"['NBA championship winner',\n",
" 'MVP of NBA championship',\n",
" 'Last game of NBA championship',\n",
" 'NBA finals winner',\n",
" 'Most valuable player of NBA championship',\n",
" 'Finals game of NBA',\n",
" 'Who won the NBA finals',\n",
" 'NBA championship game summary',\n",
" 'NBA finals MVP',\n",
" 'Champion of NBA playoffs',\n",
" 'NBA finals last game highlights',\n",
" 'NBA championship series result',\n",
" 'NBA finals game score',\n",
" 'NBA finals game recap',\n",
" 'NBA champion team and player',\n",
" 'NBA finals statistics',\n",
" 'NBA championship final score',\n",
" 'NBA finals best player',\n",
" 'NBA playoffs champion and MVP',\n",
" 'NBA finals game analysis',\n",
" 'Who won the NBA championship? And who was the MVP? Tell me a bit about the last game.']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"QUERIES_INPUT = f\"\"\"\n",
"You have access to a search API that returns recent news articles.\n",
@ -148,7 +179,7 @@
"# Let's include the original question as well for good measure\n",
"queries.append(USER_QUESTION)\n",
"\n",
"queries\n"
"queries"
]
},
{
@ -161,9 +192,54 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6c750d6e5b2846b6834bad47ea5bef8b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/21 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total number of articles: 554\n",
"Top 5 articles of query 1: \n",
"\n",
"Title: Nascar takes on Le Mans as LeBron James gets centenary race under way\n",
"Description: <ul><li>Nascar has presence at iconic race for first time since 1976</li><li>NBA superstar LeBron James waves flag as honorary starter</li></ul>The crowd chanted “U-S-A! U-S-A!” as Nascar driver lineup for the 24 Hours of Le Mans passed through the city cente…\n",
"Content: The crowd chanted U-S-A! U-S-A! as Nascar driver lineup for the 24 Hours of Le Mans passed through t...\n",
"\n",
"Title: NBA finals predictions: Nuggets or Heat? Our writers share their picks\n",
"Description: Denver or Miami? Our contributors pick the winner, key players and dark horses before the NBAs grand finale tips offA lot has been made of the importance of a balanced roster with continuity, but, somehow, still not enough. The Nuggets are the prime example …\n",
"Content: The Nuggets are here because \n",
"A lot has been made of the importance of a balanced roster with conti...\n",
"\n",
"Title: Unboxing: Michelob ULTRA and Artist Futura Enshrine the NBA Championship In Custom Hand-Painted Bottles\n",
"Description: As the 2022-2023 NBA Championship nears the end, Michelob ULTRA brings joy to sports fans who will gather to watch the showdown between the Denver Nuggets and Miami Heat. The beermaker teamed up with artist Futura to remix its newly-designed 2023 Champ Bottle…\n",
"Content: As the 2022-2023 NBA Championship nears the end, Michelob ULTRA brings joy to sports fans who will g...\n",
"\n",
"Title: Futura and Michelob ULTRA Toast to the NBA Finals With Abstract Artwork Crafted From the Brands 2023 Limited-Edition Championship Bottles\n",
"Description: The sun is out to play, and so is Michelob ULTRA. With the 2022-2023 NBA Finals underway, the beermaker is back with its celebratory NBA Champ Bottles. This year, the self-proclaimed MVP of joy is dropping a limited-edition bottle made in collaboration with a…\n",
"Content: The sun is out to play, and so is Michelob ULTRA. With the 2022-2023 NBA Finals underway, the beerma...\n",
"\n",
"Title: Signed and Delivered, Futura and Michelob ULTRA Will Gift Hand-Painted Bottles to This Years NBA Championship Team\n",
"Description: Michelob ULTRA, the MVP of joy and official beer sponsor of the NBA is back to celebrate with basketball lovers and sports fans around the globe as the NBA 2022-2023 season comes to a nail-biting close. In collaboration with artist Futura, Michelob ULTRA will…\n",
"Content: Michelob ULTRA, the MVP of joy and official beer sponsor of the NBA is back to celebrate with basket...\n",
"\n"
]
}
],
"source": [
"def search_news(\n",
" query: str,\n",
@ -194,15 +270,11 @@
" if result[\"status\"] == \"ok\":\n",
" articles = articles + result[\"articles\"]\n",
" else:\n",
" raise Exception(result[\"message\"])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" raise Exception(result[\"message\"])\n",
"\n",
"# remove duplicates\n",
"articles = list({article[\"url\"]: article for article in articles}.values())\n",
"\n",
"print(\"Total number of articles:\", len(articles))\n",
"print(\"Top 5 articles of query 1:\", \"\\n\")\n",
"\n",
@ -210,7 +282,7 @@
" print(\"Title:\", article[\"title\"])\n",
" print(\"Description:\", article[\"description\"])\n",
" print(\"Content:\", article[\"content\"][0:100] + \"...\")\n",
" print()"
" print()\n"
]
},
{
@ -227,9 +299,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"'The NBA championship was won by TEAM NAME. The MVP was awarded to PLAYER NAME. The last game was held at STADIUM NAME, where both teams played with great energy and enthusiasm. It was a close game, but in the end, TEAM NAME emerged victorious.'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"HA_INPUT = f\"\"\"\n",
"Generate a hypothetical answer to the user's question. This answer which will be used to rank search results. \n",
@ -243,7 +326,7 @@
"\n",
"hypothetical_answer = json_gpt(HA_INPUT)[\"hypotheticalAnswer\"]\n",
"\n",
"hypothetical_answer"
"hypothetical_answer\n"
]
},
{
@ -256,9 +339,29 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"[0.7854456526852069,\n",
" 0.8086023500072106,\n",
" 0.8002998147018501,\n",
" 0.7961229569526956,\n",
" 0.798354506673743,\n",
" 0.758216458795653,\n",
" 0.7753754083127359,\n",
" 0.7494958338411927,\n",
" 0.804733946801739,\n",
" 0.8405965885235218]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hypothetical_answer_embedding = embeddings(hypothetical_answer)[0]\n",
"article_embeddings = embeddings(\n",
@ -273,7 +376,7 @@
"for article_embedding in article_embeddings:\n",
" cosine_similarities.append(dot(hypothetical_answer_embedding, article_embedding))\n",
"\n",
"cosine_similarities[0:10]"
"cosine_similarities[0:10]\n"
]
},
{
@ -286,9 +389,43 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 5 articles: \n",
"\n",
"Title: NBA Finals: Denver Nuggets beat Miami Hea, lift thier first-ever NBA title\n",
"Description: Denver Nuggets won their maiden NBA Championship trophy defeating Miami Heat 94-89 in Game 5 of the NBA Final held on Tuesday at the Ball Arena in Denver\n",
"Content: Denver Nuggets won their maiden NBA Championship trophy defeating Miami Heat 94-89 in Game 5 of the ...\n",
"Score: 0.8445817523602124\n",
"\n",
"Title: Photos: Denver Nuggets celebrate their first NBA title\n",
"Description: The Nuggets capped off an impressive postseason by beating the Miami Heat in the NBA Finals.\n",
"Content: Thousands of supporters watched along the streets of Denver, Colorado as the US National Basketball ...\n",
"Score: 0.842070667753606\n",
"\n",
"Title: Denver Nuggets win first NBA championship title in Game 5 victory over Miami Heat\n",
"Description: The Denver Nuggets won their first NBA championship Monday night, downing the Miami Heat 94-89 at Ball Arena in Denver to take Game 5 of the NBA Finals.\n",
"Content: The Denver Nuggets won their first NBA championship Monday night, downing the Miami Heat 94-89 at Ba...\n",
"Score: 0.8409346078172385\n",
"\n",
"Title: Denver Nuggets Capture Their First NBA Championship Behind Unbreakable Chemistry\n",
"Description: After 47 years of waiting, the Denver Nuggets are NBA champions. Led by Nikola Jokic and Jamal Murray, they reached the mountain top by staying true to themselves.\n",
"Content: DENVER, CO - JUNE 12: Jamal Murray (27) of the Denver Nuggets celebrates as he leaves the court ... ...\n",
"Score: 0.8405965885235218\n",
"\n",
"Title: NBA Finals: Nikola Jokic, Denver Nuggets survive Miami Heat to secure franchise's first NBA championship\n",
"Description: In a rock-fight of a Game 5, the Denver Nuggets reached the NBA mountaintop from the foothills of the Rockies, winning their first-ever championship and setting Nikola Jokic's legacy as an all-timer in stone.\n",
"Content: DENVER, COLORADO - JUNE 12: Jamal Murray #27 of the Denver Nuggets reacts during the fourth quarter ...\n",
"Score: 0.8389716330890262\n",
"\n"
]
}
],
"source": [
"scored_articles = zip(articles, cosine_similarities)\n",
"\n",
@ -303,7 +440,7 @@
" print(\"Description:\", article[\"description\"])\n",
" print(\"Content:\", article[\"content\"][0:100] + \"...\")\n",
" print(\"Score:\", score)\n",
" print()"
" print()\n"
]
},
{
@ -318,9 +455,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/markdown": [
"The Denver Nuggets won their first-ever NBA championship by defeating the Miami Heat 94-89 in Game 5 of the NBA Finals held on Tuesday at the Ball Arena in Denver, according to this [Business Standard article](https://www.business-standard.com/sports/other-sports-news/nba-finals-denver-nuggets-beat-miami-hea-lift-thier-first-ever-nba-title-123061300285_1.html). Nikola Jokic, the Nuggets' center, was named the NBA Finals MVP. In a rock-fight of a Game 5, the Nuggets reached the NBA mountaintop, securing their franchise's first NBA championship and setting Nikola Jokic's legacy as an all-timer in stone, according to this [Yahoo Sports article](https://sports.yahoo.com/nba-finals-nikola-jokic-denver-nuggets-survive-miami-heat-to-secure-franchises-first-nba-championship-030321214.html). For more information and photos of the Nuggets' celebration, check out this [Al Jazeera article](https://www.aljazeera.com/gallery/2023/6/15/photos-denver-nuggets-celebrate-their-first-nba-title) and this [CNN article](https://www.cnn.com/2023/06/12/sport/denver-nuggets-nba-championship-spt-intl?cid=external-feeds_iluminar_yahoo)."
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"formatted_top_results = [\n",
" {\n",
@ -336,7 +486,7 @@
"TOP_RESULTS: {formatted_top_results}\n",
"USER_QUESTION: {USER_QUESTION}\n",
"\n",
"Include as much information as possible in the answer. Include references to the search results as markdown links.\n",
"Include as much information as possible in the answer. Reference the relevant search result urls as markdown links.\n",
"\"\"\"\n",
"\n",
"completion = openai.ChatCompletion.create(\n",
@ -350,7 +500,7 @@
"for chunk in completion:\n",
" text += chunk.choices[0].delta.get(\"content\", \"\")\n",
" display.clear_output(wait=True)\n",
" display.display(display.Markdown(text))\n"
" display.display(display.Markdown(text))"
]
}
],
@ -370,7 +520,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.9"
"version": "3.11.0"
},
"orig_nbformat": 4
},

Loading…
Cancel
Save