diff --git a/docs/use_cases/agent_simulations/gymnasium.ipynb b/docs/use_cases/agent_simulations/gymnasium.ipynb index ad35b500..766ed21b 100644 --- a/docs/use_cases/agent_simulations/gymnasium.ipynb +++ b/docs/use_cases/agent_simulations/gymnasium.ipynb @@ -12,22 +12,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "f36427cf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: gymnasium in /Users/michaelchang/.miniconda3/envs/langchain/lib/python3.9/site-packages (0.28.1)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /Users/michaelchang/.miniconda3/envs/langchain/lib/python3.9/site-packages (from gymnasium) (0.0.4)\n", + "Requirement already satisfied: importlib-metadata>=4.8.0 in /Users/michaelchang/.miniconda3/envs/langchain/lib/python3.9/site-packages (from gymnasium) (6.0.1)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /Users/michaelchang/.miniconda3/envs/langchain/lib/python3.9/site-packages (from gymnasium) (2.2.1)\n", + "Requirement already satisfied: numpy>=1.21.0 in /Users/michaelchang/.miniconda3/envs/langchain/lib/python3.9/site-packages (from gymnasium) (1.24.3)\n", + "Requirement already satisfied: jax-jumpy>=1.0.0 in /Users/michaelchang/.miniconda3/envs/langchain/lib/python3.9/site-packages (from gymnasium) (1.0.0)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /Users/michaelchang/.miniconda3/envs/langchain/lib/python3.9/site-packages (from gymnasium) (4.5.0)\n", + "Requirement already satisfied: zipp>=0.5 in /Users/michaelchang/.miniconda3/envs/langchain/lib/python3.9/site-packages (from importlib-metadata>=4.8.0->gymnasium) (3.15.0)\n" + ] + } + ], "source": [ "!pip install gymnasium" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 2, "id": "f9bd38b4", "metadata": {}, "outputs": [], "source": [ "import gymnasium as gym\n", + "import inspect\n", "import tenacity\n", "\n", "from langchain.chat_models import ChatOpenAI\n", @@ -50,20 +66,20 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 3, "id": "870c24bc", "metadata": {}, "outputs": [], "source": [ - "def get_docs(env):\n", - " while 'env' in dir(env):\n", - " env = env.env\n", - " return env.__doc__\n", - "\n", - "class Agent():\n", + "class GymnasiumAgent():\n", + " @classmethod\n", + " def get_docs(cls, env):\n", + " return env.unwrapped.__doc__\n", + " \n", " def __init__(self, model, env):\n", " self.model = model\n", - " self.docs = get_docs(env)\n", + " self.env = env\n", + " self.docs = self.get_docs(env)\n", " \n", " self.instructions = \"\"\"\n", "Your goal is to maximize your return, i.e. the sum of the rewards you receive.\n", @@ -90,22 +106,17 @@ " self.message_history = []\n", " self.ret = 0\n", " \n", - " def reset(self, obs):\n", + " def random_action(self):\n", + " action = self.env.action_space.sample()\n", + " return action\n", + " \n", + " def reset(self):\n", " self.message_history = [\n", " SystemMessage(content=self.docs),\n", " SystemMessage(content=self.instructions),\n", " ]\n", - " obs_message = f\"\"\"\n", - "Observation: {obs}\n", - "Reward: 0\n", - "Termination: False\n", - "Truncation: False\n", - "Return: 0\n", - " \"\"\"\n", - " self.message_history.append(HumanMessage(content=obs_message))\n", - " return obs_message\n", " \n", - " def observe(self, obs, rew, term, trunc, info):\n", + " def observe(self, obs, rew=0, term=False, trunc=False, info=None):\n", " self.ret += rew\n", " \n", " obs_message = f\"\"\"\n", @@ -117,16 +128,25 @@ " \"\"\"\n", " self.message_history.append(HumanMessage(content=obs_message))\n", " return obs_message\n", - " \n", - " @tenacity.retry(stop=tenacity.stop_after_attempt(2),\n", - " wait=tenacity.wait_none(), # No waiting time between retries\n", - " retry=tenacity.retry_if_exception_type(ValueError),\n", - " before_sleep=lambda retry_state: print(f\"ValueError occurred: {retry_state.outcome.exception()}, retrying...\"),\n", - " retry_error_callback=lambda retry_state: 0) # Default value when all retries are exhausted\n", - " def act(self):\n", + " \n", + " def _act(self):\n", " act_message = self.model(self.message_history)\n", " self.message_history.append(act_message)\n", " action = int(self.action_parser.parse(act_message.content)['action'])\n", + " return action\n", + " \n", + " def act(self):\n", + " try:\n", + " for attempt in tenacity.Retrying(\n", + " stop=tenacity.stop_after_attempt(2),\n", + " wait=tenacity.wait_none(), # No waiting time between retries\n", + " retry=tenacity.retry_if_exception_type(ValueError),\n", + " before_sleep=lambda retry_state: print(f\"ValueError occurred: {retry_state.outcome.exception()}, retrying...\"),\n", + " ):\n", + " with attempt:\n", + " action = self._act()\n", + " except tenacity.RetryError as e:\n", + " action = self.random_action()\n", " return action" ] }, @@ -140,13 +160,13 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 4, "id": "9e902cfd", "metadata": {}, "outputs": [], "source": [ "env = gym.make(\"Blackjack-v1\")\n", - "agent = Agent(model=ChatOpenAI(temperature=0.2), env=env)" + "agent = GymnasiumAgent(model=ChatOpenAI(temperature=0.2), env=env)" ] }, { @@ -159,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 5, "id": "ad361210", "metadata": {}, "outputs": [ @@ -168,7 +188,7 @@ "output_type": "stream", "text": [ "\n", - "Observation: (10, 3, 0)\n", + "Observation: (15, 4, 0)\n", "Reward: 0\n", "Termination: False\n", "Truncation: False\n", @@ -176,19 +196,11 @@ " \n", "Action: 1\n", "\n", - "Observation: (18, 3, 0)\n", - "Reward: 0.0\n", - "Termination: False\n", - "Truncation: False\n", - "Return: 0.0\n", - " \n", - "Action: 0\n", - "\n", - "Observation: (18, 3, 0)\n", - "Reward: 1.0\n", + "Observation: (25, 4, 0)\n", + "Reward: -1.0\n", "Termination: True\n", "Truncation: False\n", - "Return: 1.0\n", + "Return: -1.0\n", " \n", "break True False\n" ] @@ -196,14 +208,18 @@ ], "source": [ "observation, info = env.reset()\n", - "obs_message = agent.reset(observation)\n", + "agent.reset()\n", + "\n", + "obs_message = agent.observe(observation)\n", "print(obs_message)\n", + "\n", "while True:\n", " action = agent.act()\n", " observation, reward, termination, truncation, info = env.step(action)\n", " obs_message = agent.observe(observation, reward, termination, truncation, info)\n", " print(f'Action: {action}')\n", " print(obs_message)\n", + " \n", " if termination or truncation:\n", " print('break', termination, truncation)\n", " break\n",