{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [ "UxK08xN_wco-", "qTS3Lm1nWiMK" ], "toc_visible": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "e77a04ad372b47e89944d1524a8777c1": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_21e16d6d8e4d4e08bc3d26b1ffcc10bd", "IPY_MODEL_b35662ce35744a2e80c495da554b22fb", "IPY_MODEL_c190a3bf3649425aa55c17e1e820b3d6" ], "layout": "IPY_MODEL_f359ca695a7d4f1e9b7f2422ed1b0e8f" } }, "21e16d6d8e4d4e08bc3d26b1ffcc10bd": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_225658a1cf92482b95c6a55fc6ed30d8", "placeholder": "​", "style": "IPY_MODEL_6cbf426e997c40c5982949b9ebf4cd3f", "value": "Action = -0.28 | Reward = -3.93: 100%" } }, "b35662ce35744a2e80c495da554b22fb": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_eee1d763560e4a44bf8cf30f1f38718b", "max": 500, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_93f84f7a1c9645e489f282085995692d", "value": 500 } }, "c190a3bf3649425aa55c17e1e820b3d6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3ae83ec6a80a43a7858ccb1b6b6da579", "placeholder": "​", "style": "IPY_MODEL_726ead1902e44663a75600233e7930ea", "value": " 500/500 [00:15<00:00, 91.08it/s]" } }, "f359ca695a7d4f1e9b7f2422ed1b0e8f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "225658a1cf92482b95c6a55fc6ed30d8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "6cbf426e997c40c5982949b9ebf4cd3f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "eee1d763560e4a44bf8cf30f1f38718b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "93f84f7a1c9645e489f282085995692d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "3ae83ec6a80a43a7858ccb1b6b6da579": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "726ead1902e44663a75600233e7930ea": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d95ca03a9a8f412a9cd1a7715e428dcc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_103ce5a1781d482dbb319b3ee8348f1d", "IPY_MODEL_37872f88dac04d2496cfc5aeff0b5eeb", "IPY_MODEL_4dd2a5f7f7bf478cb0d7765b51dde923" ], "layout": "IPY_MODEL_8a9ea81f0a914869a47bbfe19c8b138e" } }, "103ce5a1781d482dbb319b3ee8348f1d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_619b046f60be4cc897643d33869aaf7e", "placeholder": "​", "style": "IPY_MODEL_12fcfe38f0dc4aba897c00b9a103fd91", "value": "Action = -0.00 | Reward = -0.00: 100%" } }, "37872f88dac04d2496cfc5aeff0b5eeb": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_73013360efe04199b565e43956b8248b", "max": 500, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_25bb2652a8454e72900b940b74efd18a", "value": 500 } }, "4dd2a5f7f7bf478cb0d7765b51dde923": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_85fd867ff2d740ebb4923345b22d3c01", "placeholder": "​", "style": "IPY_MODEL_a8d68c64556742d49a8984bf565009d2", "value": " 500/500 [00:40<00:00, 17.75it/s]" } }, "8a9ea81f0a914869a47bbfe19c8b138e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "619b046f60be4cc897643d33869aaf7e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "12fcfe38f0dc4aba897c00b9a103fd91": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "73013360efe04199b565e43956b8248b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "25bb2652a8454e72900b940b74efd18a": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "85fd867ff2d740ebb4923345b22d3c01": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a8d68c64556742d49a8984bf565009d2": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "# HW5: Model Predictive Control\n", "\n", "> **Solution**\n", "\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DeepRLCourse/Homework-5-Questions/blob/main/RL_HW5_MPC.ipynb)\n", "[![Open In kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://kaggle.com/kernels/welcome?src=https://raw.githubusercontent.com/DeepRLCourse/Homework-5-Questions/main/RL_HW5_MPC.ipynb)\n", "\n", "## Overview\n", "Here the goal is to use **MPC** for [gymnasium environments](https://gymnasium.farama.org/).\n", "More specificly we focus on the [Pendulum](https://gymnasium.farama.org/environments/classic_control/pendulum/) environment and try to solve it using [mpc.pytorch](https://locuslab.github.io/mpc.pytorch/).\n", "\n", "\n" ], "metadata": { "id": "UxK08xN_wco-" } }, { "cell_type": "code", "source": [ "# @title Imports\n", "\n", "# Stuff you (might) need\n", "import random\n", "import numpy as np\n", "import gymnasium as gym\n", "\n", "import torch\n", "from torch import nn\n", "import torch.autograd\n", "from tqdm.notebook import trange\n", "import math\n", "\n", "# Stuff used for visualization\n", "from matplotlib import pyplot as plt\n", "from gymnasium.wrappers import RecordVideo\n", "import base64\n", "import imageio\n", "from IPython.display import HTML\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ], "metadata": { "id": "SBD13rNwmXpx", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title Visualization Functions\n", "\n", "def embed_mp4(filename):\n", " video = open(filename,'rb').read()\n", " b64 = base64.b64encode(video)\n", " tag = '''\n", " '''.format(b64.decode())\n", "\n", " return HTML(tag)\n", "\n", "\n", "def plot_results(rewards, actions):\n", " plt.plot(rewards, label='Rewards')\n", " plt.plot(actions, label='Actions')\n", " plt.legend()\n", " plt.title(f\"Total reward: {sum(rewards):.2f}\")\n", " plt.show()" ], "metadata": { "cellView": "form", "id": "Avcbl8VMmgm1" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Explore the Environment (25 points)" ], "metadata": { "id": "qTS3Lm1nWiMK" } }, { "cell_type": "markdown", "source": [ "To better understand the environment, let's first see what a random agent does." ], "metadata": { "id": "yyCz0lxFWsFv" } }, { "cell_type": "code", "source": [ "# Initialize the pendulum environment with video recording enabled\n", "env = gym.make('Pendulum-v1', render_mode='rgb_array')\n", "\n", "# Create a directory to save the video\n", "video_directory = \"random_videos\"\n", "env = RecordVideo(env, video_directory)\n", "\n", "# Set the number of steps to record\n", "num_steps = 500\n", "\n", "# TODO: Reset the environment to get the initial state\n", "state, info = env.reset()\n", "\n", "for _ in (pbar := trange(num_steps)):\n", " # TODO: Sample a random action\n", " action = env.action_space.sample()\n", "\n", " # TODO: Step the environment\n", " state, reward, terminated, truncated, info = env.step(action)\n", "\n", " # TODO: Render the environment\n", " env.render()\n", "\n", " # TODO: If done reset and get new state\n", " if terminated or truncated:\n", " state, info = env.reset()\n", "\n", " pbar.set_description(f'Action = {action[0]:.2f} | Reward = {reward:.2f}')\n", "\n", "# Close the environment to finalize the video\n", "env.close()\n", "\n", "# Show the video\n", "embed_mp4(f'{video_directory}/rl-video-episode-0.mp4')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 533, "referenced_widgets": [ "e77a04ad372b47e89944d1524a8777c1", "21e16d6d8e4d4e08bc3d26b1ffcc10bd", "b35662ce35744a2e80c495da554b22fb", "c190a3bf3649425aa55c17e1e820b3d6", "f359ca695a7d4f1e9b7f2422ed1b0e8f", "225658a1cf92482b95c6a55fc6ed30d8", "6cbf426e997c40c5982949b9ebf4cd3f", "eee1d763560e4a44bf8cf30f1f38718b", "93f84f7a1c9645e489f282085995692d", "3ae83ec6a80a43a7858ccb1b6b6da579", "726ead1902e44663a75600233e7930ea" ] }, "id": "xZI6WO4bmgjS", "outputId": "79f3ff19-78b3-46ff-b6f5-2a8d12d1483a" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ " 0%| | 0/500 [00:00" ], "text/html": [ "\n", " " ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "markdown", "source": [ "The goal of the [Pendulum](https://gymnasium.farama.org/environments/classic_control/pendulum/) environment in [Gymnasium](https://gymnasium.farama.org/) is to swing a pendulum to an upright position and keep it balanced there.\n", "In this environment, you control a torque that can be applied to the pendulum.\n", "The objective is to apply the right amount of torque to swing the pendulum up and maintain its upright position." ], "metadata": { "id": "-8SUlhUHWvbC" } }, { "cell_type": "markdown", "source": [ "## Simulation Tools" ], "metadata": { "id": "wIGVVtRU2kY4" } }, { "cell_type": "markdown", "source": [ "Both the `angle_normalize` function and the `PendulumDynamics` class are fundamental components for accurately simulating, analyzing, and controlling the pendulum system.\n", "They ensure consistency in angle representation and provide a realistic model of the pendulum's behavior, enabling effective control strategies.\n", "\n", "We use the `angle_normalize` function for:\n", "\n", "* **Consistency**: When dealing with angles, it's important to keep them within a standard range to ensure consistency in calculations.\n", "* **Handling Wrapping**: Angles can wrap around when they exceed $2\\pi$ or drop below $-2\\pi$. Normalizing angles helps avoid confusion and errors that can arise from angle wrapping.\n", "\n", "\n", "And we use the `PendulumDynamics` class for:\n", "\n", "* **Modeling Physical Behavior**: The `PendulumDynamics` class models the physical behavior of the pendulum.\n", "* **Simulation and Control**: This class allows us to simulate the pendulum's response to different actions, which is crucial for designing and testing control algorithms.\n", "* **Optimization**: Understanding the dynamics of the pendulum helps in optimizing the control inputs. The class encapsulates the physics involved, enabling us to apply control techniques like Model Predictive Control (MPC) to achieve the desired behavior.\n", "\n", "\n" ], "metadata": { "id": "s7oVXleZ2rY8" } }, { "cell_type": "code", "source": [ "class PendulumDynamics(nn.Module):\n", " def forward(self, state, action):\n", " th = state[:, 0].view(-1, 1)\n", " thdot = state[:, 1].view(-1, 1)\n", "\n", " g = 10 # default value of the environment (not 9.81)\n", " m = 1\n", " l = 1\n", " dt = 0.05\n", "\n", " u = action\n", " u = torch.clamp(u, -2, 2)\n", "\n", " newthdot = thdot + (-3 * g / (2 * l) * torch.sin(th + np.pi) + 3. / (m * l ** 2) * u) * dt\n", " newth = th + newthdot * dt\n", " newthdot = torch.clamp(newthdot, -8, 8)\n", "\n", " state = torch.cat((angle_normalize(newth), newthdot), dim=1)\n", " return state\n", "\n", "\n", "def angle_normalize(x):\n", " return (((x + math.pi) % (2 * math.pi)) - math.pi)" ], "metadata": { "id": "N9y8iJ0u2Lgp" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Model Predictive Control (50 points)" ], "metadata": { "id": "eSb9nKlw4Gra" } }, { "cell_type": "markdown", "source": [ "[mpc.pytorch](https://locuslab.github.io/mpc.pytorch/) is a library that provides a fast and differentiable [Model Predictive Control](https://en.wikipedia.org/wiki/Model_predictive_control) (MPC) solver for PyTorch. It was developed by researchers at [LocusLab](https://locuslab.github.io/) and is designed to integrate seamlessly with PyTorch, allowing for efficient and flexible control of dynamic systems.\n", "\n", "If you are interested to learn more, check out [OptNet](https://arxiv.org/abs/1703.00443) and [Differentiable MPC](https://arxiv.org/abs/1810.13400)." ], "metadata": { "id": "yc8yKVti4tMp" } }, { "cell_type": "markdown", "source": [ "## Quick Setup" ], "metadata": { "id": "PfDVMqgi5kax" } }, { "cell_type": "markdown", "source": [ "In order to install this library you can use `pip`:" ], "metadata": { "id": "qU9dpT0C5nFg" } }, { "cell_type": "code", "source": [ "! pip install mpc" ], "metadata": { "id": "HEyJ0SPi4W28", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "4d6a3b79-6d90-4cf4-eabf-93c576aa0521" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: mpc in /usr/local/lib/python3.11/dist-packages (0.0.6)\n", "Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.11/dist-packages (from mpc) (1.26.4)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (from mpc) (2.5.1+cu124)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (3.17.0)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (4.12.2)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (3.1.5)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (2024.10.0)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (12.4.127)\n", "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (9.1.0.70)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (12.4.5.8)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (11.2.1.3)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (10.3.5.147)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (11.6.1.9)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (12.3.1.170)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (2.21.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (12.4.127)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (12.4.127)\n", "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (3.1.0)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch->mpc) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch->mpc) (1.3.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch->mpc) (3.0.2)\n" ] } ] }, { "cell_type": "markdown", "source": [ "While `mpc` offers a lot, in this notebook we are going to focus only on the core features.\n", "To learn more checkout the [GitHub repository](https://github.com/locuslab/mpc.pytorch) of this project." ], "metadata": { "id": "J9Ev1gLC5uGd" } }, { "cell_type": "code", "source": [ "from mpc import mpc" ], "metadata": { "id": "euuwSTJ14WO-" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "⚠️ If you're running this noteook in colab, please **restart the session**.\n", "Go to _Runtime_ and select the _Restart session_ option.\n" ], "metadata": { "id": "xI9d0IuQEjPT" } }, { "cell_type": "markdown", "source": [ "## The Cost Function" ], "metadata": { "id": "tf0ALVu7Ggsh" } }, { "cell_type": "markdown", "source": [ "The `define_swingup_goal` function creates a cost function that the MPC algorithm uses to determine the optimal control actions to achieve the desired pendulum swing-up task.\n", "It considers both the desired state (upright and stationary) and penalizes large control inputs to ensure smooth control actions." ], "metadata": { "id": "l9pgPno8GjFV" } }, { "cell_type": "code", "source": [ "def define_swingup_goal():\n", " goal_weights = torch.tensor((1., 0.1)) # Weights for theta and theta_dot\n", " goal_state = torch.tensor((0., 0.)) # Desired state (theta=0, theta_dot=0)\n", " ctrl_penalty = 0.001\n", " q = torch.cat((goal_weights, ctrl_penalty * torch.ones(1))) # Combined weights\n", " px = -torch.sqrt(goal_weights) * goal_state\n", " p = torch.cat((px, torch.zeros(1)))\n", " Q = torch.diag(q).repeat(TIMESTEPS, N_BATCH, 1, 1) # Cost matrix\n", " p = p.repeat(TIMESTEPS, N_BATCH, 1)\n", " return mpc.QuadCost(Q, p) # Quadratic cost" ], "metadata": { "id": "13hsSR8cGLzA" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Running MPC" ], "metadata": { "id": "4ILZGtcdLeFb" } }, { "cell_type": "markdown", "source": [ "To run the MPC, in each iteration:\n", "\n", "1. First you obtain the current state of the environment and convert it to a tensor.\n", "2. Then you recreate the MPC controller using the updated `u_init` and calculate the optimal control actions based on the current state, dynamics, and cost function.\n", "3. Next you take the first planned action and update `u_init` with the remaining actions.\n", "4. Finally you take a step in the environment and store the rewards and actions.\n", "\n", "Remember that `u_init` serves as the initial guess for the control inputs." ], "metadata": { "id": "JskzIJuLMpoe" } }, { "cell_type": "code", "source": [ "# Select the hyperparameters\n", "RUN_ITER = 500\n", "TIMESTEPS = 10\n", "N_BATCH = 1\n", "LQR_ITER = 5\n", "\n", "\n", "# Setup the environmnet\n", "env = gym.make('Pendulum-v1', render_mode='rgb_array')\n", "video_directory = \"mpc_videos\"\n", "env = RecordVideo(env, video_directory)\n", "env.reset() # Reset the underlying environment\n", "env.unwrapped.state = [np.pi, 1] # Environment must start in downward position\n", "\n", "\n", "# Define the cost function and initialize u\n", "cost = define_swingup_goal()\n", "u_init = None\n", "\n", "\n", "# Run MPC\n", "rewards, actions = [], []\n", "for _ in (pbar := trange(RUN_ITER)):\n", " state = env.unwrapped.state.copy()\n", " state = torch.tensor(state).view(1, -1)\n", " # recreate controller using updated u_init (kind of wasteful right?)\n", " ctrl = mpc.MPC(2, 1, TIMESTEPS, u_lower=-2.0, u_upper=+2.0,\n", " lqr_iter=LQR_ITER, exit_unconverged=False, eps=1e-2,\n", " n_batch=N_BATCH, backprop=False, verbose=0, u_init=u_init,\n", " grad_method=mpc.GradMethods.AUTO_DIFF)\n", "\n", " # compute action based on current state, dynamics, and cost\n", " nominal_states, nominal_actions, nominal_objs = ctrl(state, cost, PendulumDynamics())\n", " # TODO: Take first planned action\n", " action = nominal_actions[0]\n", " u_init = torch.cat((nominal_actions[1:], torch.zeros(1, N_BATCH, 1)), dim=0)\n", "\n", " # TODO: Take a step in the environment\n", " s, r, _, _, _ = env.step(action.detach().numpy())\n", "\n", " # TODO: Store the latest action and reward\n", " rewards.append(r.item())\n", " actions.append(action.detach().numpy().item())\n", "\n", " pbar.set_description(f\"Action = {actions[-1]:.2f} | Reward = {rewards[-1]:.2f}\")\n", " env.render()\n", "\n", "env.close()\n", "\n", "# Plot the results\n", "plot_results(rewards, actions)\n", "\n", "# Show the policy learned\n", "embed_mp4(f'{video_directory}/rl-video-episode-0.mp4')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "d95ca03a9a8f412a9cd1a7715e428dcc", "103ce5a1781d482dbb319b3ee8348f1d", "37872f88dac04d2496cfc5aeff0b5eeb", "4dd2a5f7f7bf478cb0d7765b51dde923", "8a9ea81f0a914869a47bbfe19c8b138e", "619b046f60be4cc897643d33869aaf7e", "12fcfe38f0dc4aba897c00b9a103fd91", "73013360efe04199b565e43956b8248b", "25bb2652a8454e72900b940b74efd18a", "85fd867ff2d740ebb4923345b22d3c01", "a8d68c64556742d49a8984bf565009d2" ] }, "id": "tE45H5NI05Q_", "outputId": "d877a84a-5170-4842-f121-6c33b7d163ae" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ " 0%| | 0/500 [00:00" ], "image/png": "\n" }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "" ], "text/html": [ "\n", " " ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "code", "source": [ "# Show the policy learned\n", "embed_mp4(f'{video_directory}/rl-video-episode-0.mp4')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 501 }, "id": "4FX9ANmENtUO", "outputId": "27d04b94-f443-4bf8-c017-f2c14a6f1e51" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ], "text/html": [ "\n", " " ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "markdown", "source": [ "# Questions (25 points)" ], "metadata": { "id": "jskFvBFHLido" } }, { "cell_type": "markdown", "source": [ "Based on your experiments, answer the following questions:\n", "\n", "\n", "\n", "* How does the number of LQR iterations effect the MPC?\n", "* What if we didn't had access to the model dynamics? Could we still use MPC?\n", "* Do `TIMESTEPS` or `N_BATCH` matter here? Explain.\n", "* Why do you think we chose to set the initial state of the environment to the downward position?\n", "* As time progresses (later iterations) what happens to the actions and rewards? Why?\n", "\n", "`Your Answers:`\n" ], "metadata": { "id": "9Pk2jwFVNzTZ" } } ] }