{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d6625b81", "metadata": {}, "outputs": [], "source": [ "# set up matplotlib and rendering\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import matplotlib.animation as animation\n", "\n", "from IPython import display" ] }, { "cell_type": "markdown", "id": "65db2637", "metadata": {}, "source": [ "# OpenAI Gym\n", "https://gym.openai.com/docs/\n", "simple tutorial\n", "\n", "https://gym.openai.com/envs/#robotics\n", "Robotics environments\n", "\n", "https://www.gymlibrary.ml/environments/mujoco/ Mujoco environments\n", "\n", "https://www.gymlibrary.ml/ documentation\n", "\n", "https://www.gymlibrary.ml/content/environment_creation/ Creating custom environments" ] }, { "cell_type": "code", "execution_count": 2, "id": "75081cb9", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/yasu/.local/lib/python3.10/site-packages/gym/wrappers/monitoring/video_recorder.py:9: DeprecationWarning: The distutils package is deprecated and slated for removal in Python 3.12. Use setuptools or check PEP 632 for potential alternatives\n", " import distutils.spawn\n", "/home/yasu/.local/lib/python3.10/site-packages/IPython/core/display.py:419: UserWarning: Consider using IPython.display.IFrame instead\n", " warnings.warn(\"Consider using IPython.display.IFrame instead\")\n" ] }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import gym\n", "display.HTML('')" ] }, { "cell_type": "markdown", "id": "dcda7752", "metadata": {}, "source": [ "## MuJoCo environment\n", "mujoco environment for gym is probably already prepared if you have mujoco-py\n", "\n", "But for some weird reason, if the other environments are run before this, notebook kernel fails..." ] }, { "cell_type": "code", "execution_count": 3, "id": "82113be1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/yasu/.local/lib/python3.10/site-packages/gym/envs/registration.py:505: UserWarning: \u001b[33mWARN: The environment Ant-v2 is out of date. You should consider upgrading to version `v3` with the environment ID `Ant-v3`.\u001b[0m\n", " logger.warn(\n", "/home/yasu/.local/lib/python3.10/site-packages/mujoco_py/builder.py:9: DeprecationWarning: The distutils.sysconfig module is deprecated, use sysconfig instead\n", " from distutils.sysconfig import customize_compiler\n", ":283: DeprecationWarning: the load_module() method is deprecated and slated for removal in Python 3.12; use exec_module() instead\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Once\n", " \n", " Loop\n", " \n", " Reflect\n", " \n", " \n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "env = gym.make('Ant-v2')\n", "env.reset()\n", "\n", "fig = plt.figure()\n", "ims = []\n", "for _ in range(20):\n", " ims.append([plt.imshow(env.render(mode='rgb_array', width=256, height=256))])\n", " env.step(env.action_space.sample()) # take a random action\n", "\n", "env.close()\n", "ani = animation.ArtistAnimation(fig, ims, interval=50)\n", "html = display.HTML(ani.to_jshtml())\n", "display.display(html)\n", "plt.close()" ] }, { "cell_type": "markdown", "id": "5403bee8", "metadata": {}, "source": [ "## Robotics environment\n", "```bash\n", "pip3 install gym-robotics\n", "```\n", "This must also be run before the other environments..." ] }, { "cell_type": "code", "execution_count": 4, "id": "53142e32", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Once\n", " \n", " Loop\n", " \n", " Reflect\n", " \n", " \n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "env = gym.make('HandManipulateBlock-v0')\n", "env.reset()\n", "\n", "fig = plt.figure()\n", "ims = []\n", "for _ in range(20):\n", " ims.append([plt.imshow(env.render(mode='rgb_array', width=256, height=256))])\n", " env.step(env.action_space.sample()) # take a random action\n", "\n", "env.close()\n", "ani = animation.ArtistAnimation(fig, ims, interval=50)\n", "html = display.HTML(ani.to_jshtml())\n", "display.display(html)\n", "plt.close()" ] }, { "cell_type": "markdown", "id": "f4b71c2b", "metadata": {}, "source": [ "## Box2D environments\n", "https://www.gymlibrary.ml/environments/box2d/\n", "\n", "to get the Box2D environment...\n", "```bash\n", "sudo apt install swig\n", "pip3 install box2d\n", "pip3 install box2d-py\n", "```" ] }, { "cell_type": "code", "execution_count": 5, "id": "5b6999b0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Track generation: 1021..1288 -> 267-tiles track\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Once\n", " \n", " Loop\n", " \n", " Reflect\n", " \n", " \n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "env = gym.make('CarRacing-v1')\n", "env.reset()\n", "\n", "fig = plt.figure()\n", "ims = []\n", "for _ in range(20):\n", " ims.append([plt.imshow(env.render(mode='rgb_array'))])\n", " env.step(env.action_space.sample()) # take a random action\n", "\n", "env.close()\n", "ani = animation.ArtistAnimation(fig, ims, interval=50)\n", "html = display.HTML(ani.to_jshtml())\n", "display.display(html)\n", "plt.close()" ] }, { "cell_type": "code", "execution_count": 6, "id": "415b0b94", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Once\n", " \n", " Loop\n", " \n", " Reflect\n", " \n", " \n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "env = gym.make('BipedalWalker-v3')\n", "env.reset()\n", "\n", "fig = plt.figure()\n", "ims = []\n", "for _ in range(20):\n", " ims.append([plt.imshow(env.render(mode='rgb_array'))])\n", " env.step(env.action_space.sample()) # take a random action\n", "\n", "env.close()\n", "ani = animation.ArtistAnimation(fig, ims, interval=50)\n", "html = display.HTML(ani.to_jshtml())\n", "display.display(html)\n", "plt.close()" ] }, { "cell_type": "code", "execution_count": 7, "id": "ef93145a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/yasu/.local/lib/python3.10/site-packages/gym/envs/classic_control/cartpole.py:163: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.\u001b[0m\n", " logger.warn(\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Once\n", " \n", " Loop\n", " \n", " Reflect\n", " \n", " \n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "env = gym.make('CartPole-v1')\n", "env.reset()\n", "\n", "fig = plt.figure()\n", "ims = []\n", "for _ in range(50):\n", " ims.append([plt.imshow(env.render(mode='rgb_array'))])\n", " env.step(env.action_space.sample()) # take a random action\n", "\n", "env.close()\n", "ani = animation.ArtistAnimation(fig, ims, interval=50)\n", "html = display.HTML(ani.to_jshtml())\n", "display.display(html)\n", "plt.close()" ] }, { "cell_type": "code", "execution_count": 8, "id": "d39ee291", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Once\n", " \n", " Loop\n", " \n", " Reflect\n", " \n", " \n", "\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "env = gym.make('MountainCar-v0')\n", "env.reset()\n", "\n", "fig = plt.figure()\n", "ims = []\n", "for _ in range(50):\n", " ims.append([plt.imshow(env.render(mode='rgb_array'))])\n", " env.step(env.action_space.sample()) # take a random action\n", "\n", "env.close()\n", "ani = animation.ArtistAnimation(fig, ims, interval=50)\n", "html = display.HTML(ani.to_jshtml())\n", "display.display(html)\n", "plt.close()" ] }, { "cell_type": "markdown", "id": "8a6f54b9", "metadata": {}, "source": [ "let's look at the action / observation space, and what `env.step` returns..." ] }, { "cell_type": "code", "execution_count": 9, "id": "be0011f6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "info:{}\n", "action space:Discrete(2)\n", "observation space:Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)\n", "reward range:(-inf, inf)\n", "----------\n", "observation:[-0.00195471 -0.17760813 0.0406442 0.35323688]\n", "reward:1.0\n", "done:False\n", "info:{}\n", "----------\n", "observation:[-0.00550687 -0.37328374 0.04770894 0.65845406]\n", "reward:1.0\n", "done:False\n", "info:{}\n", "----------\n", "observation:[-0.01297255 -0.5690361 0.06087802 0.9657696 ]\n", "reward:1.0\n", "done:False\n", "info:{}\n" ] } ], "source": [ "env = gym.make('CartPole-v1')\n", "env.reset()\n", "observation, info = env.reset(return_info=True)\n", "print(f\"info:{info}\")\n", "print(f\"action space:{env.action_space}\")\n", "# one action pushes cart to right and the other to the left\n", "print(f\"observation space:{env.observation_space}\")\n", "print(f\"reward range:{env.reward_range}\")\n", "\n", "for i in range(3):\n", " observation, reward, done, info = env.step(env.action_space.sample()) # take a random action\n", " print(\"-\"*10)\n", " print(f\"observation:{observation}\")\n", " print(f\"reward:{reward}\")\n", " print(f\"done:{done}\")\n", " print(f\"info:{info}\")\n", "\n", "env.close()" ] }, { "cell_type": "code", "execution_count": 10, "id": "c84bd4c4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Discrete(8)\n", "True\n", "False\n", "8\n" ] } ], "source": [ "from gym import spaces\n", "space = spaces.Discrete(8)\n", "print(space)\n", "print(space.contains(2))\n", "print(space.contains(8))\n", "print(space.n)" ] }, { "cell_type": "markdown", "id": "58194261", "metadata": {}, "source": [ "Control environment with keyboard input" ] }, { "cell_type": "code", "execution_count": 11, "id": "baef0cef", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/yasu/.local/lib/python3.10/site-packages/gym/utils/play.py:11: UserWarning: \u001b[33mWARN: failed to set matplotlib backend, plotting will not work: No module named 'tkinter'\u001b[0m\n", " logger.warn(f\"failed to set matplotlib backend, plotting will not work: {str(e)}\")\n", "/home/yasu/.local/lib/python3.10/site-packages/gym/envs/registration.py:505: UserWarning: \u001b[33mWARN: The environment Pong-ram-v0 is out of date. You should consider upgrading to version `v5` with the environment ID `ALE/Pong-ram-v5`.\u001b[0m\n", " logger.warn(\n", "A.L.E: Arcade Learning Environment (version 0.7.5+db37282)\n", "[Powered by Stella]\n" ] } ], "source": [ "import pygame\n", "from gym.utils.play import play\n", "mapping = {(pygame.K_UP,): 2, (pygame.K_DOWN,): 3}\n", "env = gym.make(\"Pong-ram-v0\")\n", "print(env.unwrapped.get_action_meanings())\n", "play(env, keys_to_action=mapping)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 5 }