diff --git a/examples/How_to_call_functions_with_chat_models.ipynb b/examples/How_to_call_functions_with_chat_models.ipynb index 072df587..2952ad21 100644 --- a/examples/How_to_call_functions_with_chat_models.ipynb +++ b/examples/How_to_call_functions_with_chat_models.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "3e67f200", "metadata": {}, @@ -48,47 +47,45 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: scipy in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (1.10.1)\n", - "Requirement already satisfied: numpy<1.27.0,>=1.19.5 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from scipy) (1.24.3)\n", - "Requirement already satisfied: tenacity in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (8.2.2)\n", - "Requirement already satisfied: tiktoken in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (0.4.0)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from tiktoken) (2023.6.3)\n", - "Requirement already satisfied: requests>=2.26.0 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from tiktoken) (2.31.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (3.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (2.0.2)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests>=2.26.0->tiktoken) (2023.5.7)\n", - "Requirement already satisfied: termcolor in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (2.3.0)\n", - "Requirement already satisfied: openai in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (0.27.7)\n", - "Requirement already satisfied: requests>=2.20 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from openai) (2.31.0)\n", - "Requirement already satisfied: tqdm in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from openai) (4.65.0)\n", - "Requirement already satisfied: aiohttp in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from openai) (3.8.4)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests>=2.20->openai) (3.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests>=2.20->openai) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests>=2.20->openai) (2.0.2)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests>=2.20->openai) (2023.5.7)\n", - "Requirement already satisfied: attrs>=17.3.0 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from aiohttp->openai) (23.1.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from aiohttp->openai) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from aiohttp->openai) (4.0.2)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from aiohttp->openai) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from aiohttp->openai) (1.3.3)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from aiohttp->openai) (1.3.1)\n", - "Requirement already satisfied: requests in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (2.31.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests) (3.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests) (2.0.2)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from requests) (2023.5.7)\n", - "Requirement already satisfied: arxiv in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (1.4.7)\n", - "Requirement already satisfied: feedparser in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from arxiv) (6.0.10)\n", - "Requirement already satisfied: sgmllib3k in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from feedparser->arxiv) (1.0.0)\n", - "Requirement already satisfied: pandas in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (2.0.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from pandas) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from pandas) (2023.3)\n", - "Requirement already satisfied: tzdata>=2022.1 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from pandas) (2023.3)\n", - "Requirement already satisfied: numpy>=1.20.3 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from pandas) (1.24.3)\n", - "Requirement already satisfied: six>=1.5 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", - "Requirement already satisfied: PyPDF2 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (3.0.1)\n", - "Requirement already satisfied: typing_extensions>=3.10.0.0 in /Users/joe/.virtualenvs/openai-cookbook-internal/lib/python3.9/site-packages (from PyPDF2) (4.6.3)\n" + "Requirement already satisfied: scipy in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (1.10.1)\n", + "Requirement already satisfied: numpy<1.27.0,>=1.19.5 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from scipy) (1.24.2)\n", + "Requirement already satisfied: tenacity in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (8.2.2)\n", + "Requirement already satisfied: tiktoken in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (0.4.0)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from tiktoken) (2023.6.3)\n", + "Requirement already satisfied: requests>=2.26.0 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from tiktoken) (2.28.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2.1.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (3.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (1.26.14)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests>=2.26.0->tiktoken) (2022.12.7)\n", + "Requirement already satisfied: termcolor in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (2.3.0)\n", + "Requirement already satisfied: openai in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (0.27.7)\n", + "Requirement already satisfied: requests>=2.20 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from openai) (2.28.2)\n", + "Requirement already satisfied: tqdm in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from openai) (4.64.1)\n", + "Requirement already satisfied: aiohttp in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from openai) (3.8.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests>=2.20->openai) (2.1.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests>=2.20->openai) (3.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests>=2.20->openai) (1.26.14)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests>=2.20->openai) (2022.12.7)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from aiohttp->openai) (22.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from aiohttp->openai) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from aiohttp->openai) (4.0.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from aiohttp->openai) (1.8.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from aiohttp->openai) (1.3.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from aiohttp->openai) (1.3.1)\n", + "Requirement already satisfied: requests in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (2.28.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests) (2.1.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests) (3.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests) (1.26.14)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from requests) (2022.12.7)\n", + "Requirement already satisfied: arxiv in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (1.4.7)\n", + "Requirement already satisfied: feedparser in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from arxiv) (6.0.10)\n", + "Requirement already satisfied: sgmllib3k in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from feedparser->arxiv) (1.0.0)\n", + "Requirement already satisfied: pandas in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (1.5.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from pandas) (2022.7.1)\n", + "Requirement already satisfied: numpy>=1.21.0 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from pandas) (1.24.2)\n", + "Requirement already satisfied: six>=1.5 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n", + "Requirement already satisfied: PyPDF2 in /Users/colin.jarvis/Documents/dev/openai_scratchpad/openai_test/lib/python3.10/site-packages (3.0.1)\n" ] } ], @@ -133,7 +130,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "69ee6a93", "metadata": {}, @@ -204,7 +200,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "29d4e02b", "metadata": {}, @@ -264,7 +259,8 @@ { "data": { "text/plain": [ - "{'role': 'assistant', 'content': 'Where are you currently located?'}" + "{'role': 'assistant',\n", + " 'content': 'Sure, can you please provide me with your location or the city you want to know the weather for?'}" ] }, "execution_count": 7, @@ -314,7 +310,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "b4482aee", "metadata": {}, @@ -327,7 +322,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "f7654fef", "metadata": {}, @@ -393,7 +387,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "77e6e5ea", "metadata": {}, @@ -418,7 +411,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "ae73c9ee", "metadata": {}, @@ -457,7 +449,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "da08c121", "metadata": {}, @@ -585,14 +576,14 @@ "output_type": "stream", "text": [ "Function generation requested, calling function\n", - "Prepped query is {'query': 'SELECT Artist.Name, COUNT(Track.TrackId) AS NumberOfTracks FROM Artist JOIN Album ON Artist.ArtistId = Album.ArtistId JOIN Track ON Album.AlbumId = Track.AlbumId GROUP BY Artist.Name ORDER BY NumberOfTracks DESC LIMIT 5;'}\n", - "The top 5 artists by number of tracks in the Chinook database are:\n", + "Prepped query is {'query': 'SELECT a.Name AS Artist, COUNT(t.TrackId) AS NumTracks FROM Artist a JOIN Album al ON a.ArtistId = al.ArtistId JOIN Track t ON al.AlbumId = t.AlbumId GROUP BY a.Name ORDER BY NumTracks DESC LIMIT 5'}\n", + "The top 5 artists in the Chinook Music Database based on the number of tracks they have are:\n", "\n", - "1. Iron Maiden with 213 tracks\n", - "2. U2 with 135 tracks\n", - "3. Led Zeppelin with 114 tracks\n", - "4. Metallica with 112 tracks\n", - "5. Lost with 92 tracks.\n" + "1. Iron Maiden - 213 tracks\n", + "2. U2 - 135 tracks\n", + "3. Led Zeppelin - 114 tracks\n", + "4. Metallica - 112 tracks\n", + "5. Lost - 92 tracks\n" ] } ], @@ -629,13 +620,13 @@ "\u001b[35mfunction: [('Iron Maiden', 213), ('U2', 135), ('Led Zeppelin', 114), ('Metallica', 112), ('Lost', 92)]\n", "\n", "\u001b[0m\n", - "\u001b[34massistant: The top 5 artists by number of tracks in the Chinook database are:\n", + "\u001b[34massistant: The top 5 artists in the Chinook Music Database based on the number of tracks they have are:\n", "\n", - "1. Iron Maiden with 213 tracks\n", - "2. U2 with 135 tracks\n", - "3. Led Zeppelin with 114 tracks\n", - "4. Metallica with 112 tracks\n", - "5. Lost with 92 tracks.\n", + "1. Iron Maiden - 213 tracks\n", + "2. U2 - 135 tracks\n", + "3. Led Zeppelin - 114 tracks\n", + "4. Metallica - 112 tracks\n", + "5. Lost - 92 tracks\n", "\n", "\u001b[0m\n" ] @@ -669,13 +660,24 @@ "output_type": "stream", "text": [ "Function generation requested, calling function\n", - "Prepped query is {'query': 'SELECT Album.Title, COUNT(Track.TrackId) AS number_of_tracks FROM Album JOIN Track ON Album.AlbumId = Track.AlbumId GROUP BY Album.AlbumId ORDER BY number_of_tracks DESC LIMIT 1'}\n" + "Prepped query is {'query': 'SELECT AlbumId, Title, COUNT(TrackId) AS TrackCount FROM Album GROUP BY AlbumId ORDER BY TrackCount DESC LIMIT 1;'}\n", + "SQL error: no such column: TrackId\n", + "\n", + "SELECT a.Title, COUNT(t.TrackId) as TrackCount\n", + "FROM Album a\n", + "JOIN Track t ON a.AlbumId = t.AlbumId\n", + "GROUP BY a.AlbumId, a.Title\n", + "ORDER BY TrackCount DESC\n", + "LIMIT 1;\n", + "\n", + "[('Greatest Hits', 57)]\n", + "Got on second try\n" ] }, { "data": { "text/plain": [ - "\"The album with the most tracks in the Chinook database is 'Greatest Hits' with a total of 57 tracks.\"" + "'The album with the most tracks in the Chinook Music Database is \"Greatest Hits\" with a total of 57 tracks.'" ] }, "execution_count": 18, @@ -722,22 +724,28 @@ "\u001b[35mfunction: [('Iron Maiden', 213), ('U2', 135), ('Led Zeppelin', 114), ('Metallica', 112), ('Lost', 92)]\n", "\n", "\u001b[0m\n", - "\u001b[34massistant: The top 5 artists by number of tracks in the Chinook database are:\n", + "\u001b[34massistant: The top 5 artists in the Chinook Music Database based on the number of tracks they have are:\n", "\n", - "1. Iron Maiden with 213 tracks\n", - "2. U2 with 135 tracks\n", - "3. Led Zeppelin with 114 tracks\n", - "4. Metallica with 112 tracks\n", - "5. Lost with 92 tracks.\n", + "1. Iron Maiden - 213 tracks\n", + "2. U2 - 135 tracks\n", + "3. Led Zeppelin - 114 tracks\n", + "4. Metallica - 112 tracks\n", + "5. Lost - 92 tracks\n", "\n", "\u001b[0m\n", "\u001b[32muser: What is the name of the album with the most tracks\n", "\n", "\u001b[0m\n", + "\u001b[31msystem: Query: SELECT AlbumId, Title, COUNT(TrackId) AS TrackCount FROM Album GROUP BY AlbumId ORDER BY TrackCount DESC LIMIT 1;\n", + "The previous query received the error SQL error: no such column: TrackId. \n", + "Please return a fixed SQL query in plain text.\n", + "Your response should consist of ONLY the SQL query with the separator sql_start at the beginning and sql_end at the end\n", + "\n", + "\u001b[0m\n", "\u001b[35mfunction: [('Greatest Hits', 57)]\n", "\n", "\u001b[0m\n", - "\u001b[34massistant: The album with the most tracks in the Chinook database is 'Greatest Hits' with a total of 57 tracks.\n", + "\u001b[34massistant: The album with the most tracks in the Chinook Music Database is \"Greatest Hits\" with a total of 57 tracks.\n", "\n", "\u001b[0m\n" ] @@ -748,7 +756,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "c282b9e3", "metadata": {}, @@ -763,7 +770,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "f2e47962", "metadata": {}, @@ -1023,7 +1029,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 4/4 [00:06<00:00, 1.50s/it]\n" + "100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:03<00:00, 1.33it/s]\n" ] }, { @@ -1049,23 +1055,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "- Core Argument:\n", - " - The authors argue that Proximal Policy Optimization (PPO) and its dynamic version (PPO-dynamic) can effectively replace policy gradient for model optimization in sequence generation tasks.\n", - " - They propose a modification to the constraints of PPO to make it more dynamic and flexible, which further improves the training.\n", - " - The authors also argue that the fixed hyperparameter in PPO, which aims to bound the KL-divergence, is not consistent with the actual KL-divergence that depends on the old policy. They propose dynamic parameters that adjust the bound for better constraints.\n", + "Core Argument:\n", + "- The paper discusses the use of Proximal Policy Optimization (PPO) in sequence generation tasks, specifically in the context of chit-chat chatbots.\n", + "- The authors argue that PPO is a more efficient reinforcement learning algorithm compared to policy gradient, which is commonly used in these tasks.\n", + "- They propose a dynamic approach for PPO (PPO-dynamic) and demonstrate its efficacy in synthetic experiments and chit-chat chatbot tasks.\n", "\n", - "- Evidence:\n", - " - The paper demonstrates the efficacy of PPO and PPO-dynamic on conditional sequence generation tasks, including synthetic experiments and chit-chat chatbots.\n", - " - The authors tested their methods on a synthetic counting task and a chit-chat chatbot task, showing that both PPO and PPO-dynamic can stabilize training and generate more diverse outputs.\n", - " - The authors provide the pseudo code for PPO and PPO-dynamic, which is similar to the original PPO algorithm.\n", - " - They also analyze the distribution of the first output on a counting task, finding that using the PPO method generates a more scattered distribution.\n", - " - The authors use REINFORCE and PPO-dynamic algorithms to generate responses in a chatbot context, demonstrating the differences in their outputs.\n", + "Evidence:\n", + "- PPO-dynamic achieves high precision scores in a synthetic counting task, comparable to other algorithms like REINFORCE and MIXER.\n", + "- In the chit-chat chatbot task, PPO-dynamic achieves a slightly higher BLEU-2 score than REINFORCE and PPO.\n", + "- The learning curve of PPO-dynamic is more stable and faster than policy gradient.\n", "\n", - "- Conclusions:\n", - " - The results show that PPO and PPO-dynamic outperform policy gradient in terms of stability and performance.\n", - " - PPO-dynamic also sped up the convergence.\n", - " - The authors conclude that PPO is a better method for sequence learning and that GAN-based sequence learning can use PPO for improved performance.\n", - " - They also conclude that a shorter input length should correspond to a higher variance in the context of a chatbot, and vice versa.\n" + "Conclusions:\n", + "- PPO is a better optimization method for sequence learning compared to policy gradient.\n", + "- PPO-dynamic further improves the optimization process by dynamically adjusting the hyperparameters.\n", + "- PPO can be used as a new optimization method for GAN-based sequence learning for better performance.\n" ] } ], @@ -1074,7 +1077,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "93a9f651", "metadata": {}, @@ -1191,7 +1193,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "dd3e7868", "metadata": {}, @@ -1227,17 +1228,43 @@ "name": "stdout", "output_type": "stream", "text": [ - "Function not required, responding to user\n" + "Function generation requested, calling function\n", + "Finding and reading paper\n", + "Chunking text from paper\n", + "Summarizing each chunk of text\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████| 17/17 [00:04<00:00, 3.68it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summarizing into overall summary\n" ] }, { "data": { "text/markdown": [ - "Proximal Policy Optimization (PPO) is a type of reinforcement learning algorithm that balances the benefits of other policy optimization methods: it can have a pace comparable to Stochastic Gradient Descent, is less complex to implement, has fewer hyperparameters to tune, and does not require a second-order optimization. \n", + "Core Argument:\n", + "- The paper focuses on the theoretical analysis of the PPO-Clip algorithm in the context of deep reinforcement learning.\n", + "- The paper aims to establish the first global convergence rate guarantee for PPO-Clip under neural function approximation.\n", "\n", - "In reinforcement learning, an agent learns to perform actions in an environment to maximize some notion of cumulative reward. PPO, designed by OpenAI, introduces a novel objective function that takes the best of both worlds: like Trust Region Policy Optimization (TRPO), it uses a trust region to ensure stable updates, but like Clipped Policy Gradient, it avoids the complexity associated with constraining the learning process within a certain region.\n", + "Evidence:\n", + "- The authors identify challenges in analyzing PPO-Clip, including the lack of a closed-form expression for policy updates and the coupling between clipping behavior and neural function approximation.\n", + "- The authors propose two core ideas: reinterpreting PPO-Clip from the perspective of hinge loss and introducing a two-step policy improvement scheme.\n", + "- The paper provides theoretical proofs, lemmas, and analysis to support the convergence properties of PPO-Clip and Neural PPO-Clip.\n", + "- Experimental evaluations on reinforcement learning benchmark tasks validate the effectiveness of PPO-Clip.\n", "\n", - "If you would like a more detailed explanation or academic resources on PPO, I can look up some papers for you." + "Conclusions:\n", + "- The paper establishes the global convergence of PPO-Clip and characterizes its convergence rate as O(1/sqrt(T)).\n", + "- The reinterpretation of PPO-Clip through hinge loss offers a framework for generalization.\n", + "- The paper provides insights into the interplay between convergence behavior and the clipping mechanism in PPO-Clip." ], "text/plain": [ "" @@ -1269,28 +1296,40 @@ "output_type": "stream", "text": [ "Function generation requested, calling function\n", - "Getting search results\n", - "Got search results, summarizing content\n" + "Finding and reading paper\n", + "Chunking text from paper\n", + "Summarizing each chunk of text\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████████████████████████████████████████████| 4/4 [00:03<00:00, 1.33it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summarizing into overall summary\n" ] }, { "data": { "text/markdown": [ - "Sure, here are summaries for some papers I found that focus on Proximal Policy Optimization (PPO) for sequence generation:\n", - "\n", - "1. Title: [Proximal Policy Optimization and its Dynamic Version for Sequence Generation](http://arxiv.org/abs/1808.07982v1)\n", - " The paper presents a method of replacing policy gradient with Proximal Policy Optimization (PPO) for sequence generation tasks. The authors introduce a dynamic approach to PPO (PPO-dynamic) and demonstrate its efficacy in conditional sequence generation tasks including synthetic experiments and chit-chat chatbots. The results show that both PPO and PPO-dynamic outperform policy gradient in terms of stability and performance.\n", - "\n", - "2. Title: [Neural PPO-Clip Attains Global Optimality: A Hinge Loss Perspective](http://arxiv.org/abs/2110.13799v4)\n", - " This paper provides a global convergence rate for the PPO-Clip algorithm under neural function approximation. The authors reinterpret PPO-Clip from the perspective of hinge loss, connecting policy improvement with solving a large-margin classification problem. The paper also proposes a two-step policy improvement scheme that helps with the convergence analysis.\n", - "\n", - "3. Title: [A2C is a special case of PPO](http://arxiv.org/abs/2205.09123v1)\n", - " The paper reveals an intriguing connection between Advantage Actor-Critic (A2C) and PPO algorithms. The authors argue that A2C can be viewed as a special case of PPO and provide theoretical justifications and pseudocode analysis to support their claim.\n", + "Core Argument:\n", + "The paper discusses the use of Proximal Policy Optimization (PPO) in sequence generation tasks, specifically in the context of chit-chat chatbots. The authors argue that PPO is a more efficient reinforcement learning algorithm compared to policy gradient, which is commonly used in these tasks. They propose a dynamic approach for PPO (PPO-dynamic) and demonstrate its efficacy in synthetic experiments and chit-chat chatbot tasks.\n", "\n", - "4. Title: [Continuous-action Reinforcement Learning for Playing Racing Games: Comparing SPG to PPO](http://arxiv.org/abs/2001.05270v1)\n", - " This paper compares Sampled Policy Gradient (SPG) and Proximal Policy Optimization (PPO) in the context of a racing game environment. While the focus is not strictly on sequence generation, this might still provide some interesting insights on PPO's performance in continuous-action settings.\n", + "Evidence:\n", + "- PPO-dynamic achieves high precision scores in a synthetic counting task, comparable to other algorithms like REINFORCE and MIXER.\n", + "- In the chit-chat chatbot task, PPO-dynamic achieves a slightly higher BLEU-2 score than REINFORCE and PPO.\n", + "- The learning curve of PPO-dynamic is more stable and faster than policy gradient.\n", "\n", - "Please confirm if you want more detailed summaries or if you would like to read them directly." + "Conclusions:\n", + "- PPO is a better optimization method for sequence learning compared to policy gradient.\n", + "- PPO-dynamic further improves the optimization process by dynamically adjusting the hyperparameters.\n", + "- PPO can be used as a new optimization method for GAN-based sequence learning for better performance." ], "text/plain": [ "" @@ -1315,9 +1354,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "tua", "language": "python", - "name": "python3" + "name": "tua" }, "language_info": { "codemirror_mode": { @@ -1329,7 +1368,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.9" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/examples/data/Chinook.db b/examples/data/Chinook.db new file mode 100644 index 00000000..8c687ffe Binary files /dev/null and b/examples/data/Chinook.db differ diff --git a/examples/data/papers/.gitkeep b/examples/data/papers/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/examples/data/papers/1808.07982v1.Proximal_Policy_Optimization_and_its_Dynamic_Version_for_Sequence_Generation.pdf b/examples/data/papers/1808.07982v1.Proximal_Policy_Optimization_and_its_Dynamic_Version_for_Sequence_Generation.pdf new file mode 100644 index 00000000..b10db334 Binary files /dev/null and b/examples/data/papers/1808.07982v1.Proximal_Policy_Optimization_and_its_Dynamic_Version_for_Sequence_Generation.pdf differ diff --git a/examples/data/papers/2011.05525v1.Proximal_Policy_Optimization_via_Enhanced_Exploration_Efficiency.pdf b/examples/data/papers/2011.05525v1.Proximal_Policy_Optimization_via_Enhanced_Exploration_Efficiency.pdf new file mode 100644 index 00000000..fbdb5a0b Binary files /dev/null and b/examples/data/papers/2011.05525v1.Proximal_Policy_Optimization_via_Enhanced_Exploration_Efficiency.pdf differ diff --git a/examples/data/papers/2110.10522v2.CIM_PPO_Proximal_Policy_Optimization_with_Liu_Correntropy_Induced_Metric.pdf b/examples/data/papers/2110.10522v2.CIM_PPO_Proximal_Policy_Optimization_with_Liu_Correntropy_Induced_Metric.pdf new file mode 100644 index 00000000..ed80f0b1 Binary files /dev/null and b/examples/data/papers/2110.10522v2.CIM_PPO_Proximal_Policy_Optimization_with_Liu_Correntropy_Induced_Metric.pdf differ diff --git a/examples/data/papers/2110.13799v4.Neural_PPO_Clip_Attains_Global_Optimality_A_Hinge_Loss_Perspective.pdf b/examples/data/papers/2110.13799v4.Neural_PPO_Clip_Attains_Global_Optimality_A_Hinge_Loss_Perspective.pdf new file mode 100644 index 00000000..65625abd Binary files /dev/null and b/examples/data/papers/2110.13799v4.Neural_PPO_Clip_Attains_Global_Optimality_A_Hinge_Loss_Perspective.pdf differ diff --git a/examples/data/papers/2205.09123v1.A2C_is_a_special_case_of_PPO.pdf b/examples/data/papers/2205.09123v1.A2C_is_a_special_case_of_PPO.pdf new file mode 100644 index 00000000..b6ea0e71 Binary files /dev/null and b/examples/data/papers/2205.09123v1.A2C_is_a_special_case_of_PPO.pdf differ