{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"2022-01-24-apprentice-mountaincar.ipynb","provenance":[{"file_id":"https://github.com/recohut/nbs/blob/main/raw/T250391%20%7C%20Apprenticeship%20Learning%20in%20Mountaincar%20Environment.ipynb","timestamp":1644669408947},{"file_id":"1K1DpwKNrsmvK-CKDvfOPXt4UfktdSABH","timestamp":1636606729887}],"collapsed_sections":[],"toc_visible":true,"authorship_tag":"ABX9TyPVuNwWSQKASm9LSWCS0n7f"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","metadata":{"id":"b7tF9-oSdIEv"},"source":["# Apprenticeship Learning in Mountaincar Environment"]},{"cell_type":"markdown","metadata":{"id":"pV5flWpeVM6M"},"source":["## Setup"]},{"cell_type":"markdown","metadata":{"id":"VdBAv8vWVM36"},"source":["### Installations"]},{"cell_type":"code","metadata":{"id":"y1GyS_RCTdvg"},"source":["!pip install gym pyvirtualdisplay > /dev/null 2>&1\n","!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1\n","\n","!apt-get update > /dev/null 2>&1\n","!apt-get install cmake > /dev/null 2>&1\n","!pip install --upgrade setuptools 2>&1\n","!pip install ez_setup > /dev/null 2>&1\n","!pip install gym[atari] > /dev/null 2>&1\n","\n","!wget http://www.atarimania.com/roms/Roms.rar\n","!mkdir /content/ROM/\n","!unrar e /content/Roms.rar /content/ROM/\n","!python -m atari_py.import_roms /content/ROM/"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"CfrWqvJJS0I8"},"source":["!pip install -q gym\n","!pip install -q pylab-sdk\n","!pip install -q readchar"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"7ZVxXGqdVLeL"},"source":["### Imports"]},{"cell_type":"code","metadata":{"id":"gXzsjx1rS4m5"},"source":["import gym\n","import matplotlib.pyplot as plt\n","import readchar\n","import numpy as np\n","import sys\n","import cvxpy as cp"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"G1lcBr_bVR6g"},"source":["### Gym render"]},{"cell_type":"code","metadata":{"id":"8FiXt334VV3l"},"source":["from gym.wrappers import Monitor\n","import glob\n","import io\n","import base64\n","from IPython.display import HTML\n","from pyvirtualdisplay import Display\n","from IPython import display as ipythondisplay"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"rsZZr8XaTdvh"},"source":["display = Display(visible=0, size=(1400, 900))\n","display.start()\n","\n","\"\"\"\n","Utility functions to enable video recording of gym environment \n","and displaying it.\n","To enable video, just do \"env = wrap_env(env)\"\"\n","\"\"\"\n","\n","def show_video():\n"," mp4list = glob.glob('video/*.mp4')\n"," if len(mp4list) > 0:\n"," mp4 = mp4list[0]\n"," video = io.open(mp4, 'r+b').read()\n"," encoded = base64.b64encode(video)\n"," ipythondisplay.display(HTML(data=''''''.format(encoded.decode('ascii'))))\n"," else: \n"," print(\"Could not find video\")\n"," \n","\n","def wrap_env(env):\n"," env = Monitor(env, './video', force=True)\n"," return env"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"XH0uQ0dGVJ-8"},"source":["### Params"]},{"cell_type":"code","metadata":{"id":"S7Ut5RnVTZwG"},"source":["# MACROS\n","Push_Left = 0\n","No_Push = 1\n","Push_Right = 2"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"kYjjkkgWTYxO"},"source":["# Key mapping\n","arrow_keys = {\n"," '\\x1b[D': Push_Left,\n"," '\\x1b[B': No_Push,\n"," '\\x1b[C': Push_Right}"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"MTzgzJ-SUOt8"},"source":["n_states = 400 # position - 20, velocity - 20\n","n_actions = 3\n","one_feature = 20 # number of state per one feature\n","feature_num = 4\n","q_table = np.zeros((n_states, n_actions)) # (400, 3)\n","\n","gamma = 0.99\n","q_learning_rate = 0.03\n","\n","N_idx = 20\n","F_idx = 4\n","GAMMA = 0.99\n","\n","np.random.seed(1)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Dr1OTNgjVHV4"},"source":["## Expert Demo"]},{"cell_type":"code","metadata":{"id":"r7hM2cStTyjf"},"source":["# env = wrap_env(gym.make(\"MountainCar-v0\"))\n","\n","# trajectories = []\n","# episode_step = 0\n","\n","# for episode in range(20): # n_trajectories : 20\n","# trajectory = []\n","# step = 0\n","\n","# env.reset()\n","# print(\"episode_step\", episode_step)\n","\n","# while True: \n","# env.render()\n","# print(\"step\", step)\n","\n","# key = readchar.readkey()\n","# if key not in arrow_keys.keys():\n","# break\n","\n","# action = arrow_keys[key]\n","# state, reward, done, _ = env.step(action)\n","\n","# if state[0] >= env.env.goal_position and step > 129: # trajectory_length : 130\n","# break\n","\n","# trajectory.append((state[0], state[1], action))\n","# step += 1\n","\n","# trajectory_numpy = np.array(trajectory, float)\n","# print(\"trajectory_numpy.shape\", trajectory_numpy.shape)\n","# episode_step += 1\n","# trajectories.append(trajectory)\n","\n","# np_trajectories = np.array(trajectories, float)\n","# print(\"np_trajectories.shape\", np_trajectories.shape)\n","\n","# np.save(\"expert_demo\", arr=np_trajectories)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"YFF2azikVZi9","executionInfo":{"status":"ok","timestamp":1636606887163,"user_tz":-330,"elapsed":1626,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"e6c76440-03de-4207-da96-975b15182c91"},"source":["!wget -q --show-progress https://github.com/reinforcement-learning-kr/lets-do-irl/raw/master/mountaincar/app/expert_demo/expert_demo.npy"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\rexpert_demo.npy 0%[ ] 0 --.-KB/s \rexpert_demo.npy 100%[===================>] 62.62K --.-KB/s in 0.03s \n"]}]},{"cell_type":"markdown","metadata":{"id":"UzzcDLaaWB0m"},"source":["## Training"]},{"cell_type":"code","metadata":{"id":"Rn5btkVIYHT0"},"source":["def idx_state(env, state):\n"," env_low = env.observation_space.low\n"," env_high = env.observation_space.high\n"," env_distance = (env_high - env_low) / one_feature\n"," positioone_feature = int((state[0] - env_low[0]) / env_distance[0])\n"," velocity_idx = int((state[1] - env_low[1]) / env_distance[1])\n"," state_idx = positioone_feature + velocity_idx * one_feature\n"," return state_idx\n","\n","def update_q_table(state, action, reward, next_state):\n"," q_1 = q_table[state][action]\n"," q_2 = reward + gamma * max(q_table[next_state])\n"," q_table[state][action] += q_learning_rate * (q_2 - q_1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"0bk-oKZAYQhd"},"source":["class FeatureEstimate:\n"," def __init__(self, feature_num, env):\n"," self.env = env\n"," self.feature_num = feature_num\n"," self.feature = np.ones(self.feature_num)\n","\n"," def gaussian_function(self, x, mu):\n"," return np.exp(-np.power(x - mu, 2.) / (2 * np.power(1., 2.)))\n","\n"," def get_features(self, state):\n"," env_low = self.env.observation_space.low\n"," env_high = self.env.observation_space.high\n"," env_distance = (env_high - env_low) / (self.feature_num - 1)\n","\n"," for i in range(int(self.feature_num/2)):\n"," # position\n"," self.feature[i] = self.gaussian_function(state[0], \n"," env_low[0] + i * env_distance[0])\n"," # velocity\n"," self.feature[i+int(self.feature_num/2)] = self.gaussian_function(state[1], \n"," env_low[1] + i * env_distance[1])\n","\n"," return self.feature\n","\n","\n","def calc_feature_expectation(feature_num, gamma, q_table, demonstrations, env):\n"," feature_estimate = FeatureEstimate(feature_num, env)\n"," feature_expectations = np.zeros(feature_num)\n"," demo_num = len(demonstrations)\n"," \n"," for _ in range(demo_num):\n"," state = env.reset()\n"," demo_length = 0\n"," done = False\n"," \n"," while not done:\n"," demo_length += 1\n","\n"," state_idx = idx_state(env, state)\n"," action = np.argmax(q_table[state_idx])\n"," next_state, reward, done, _ = env.step(action)\n"," \n"," features = feature_estimate.get_features(next_state)\n"," feature_expectations += (gamma**(demo_length)) * np.array(features)\n","\n"," state = next_state\n"," \n"," feature_expectations = feature_expectations/ demo_num\n","\n"," return feature_expectations\n","\n","def expert_feature_expectation(feature_num, gamma, demonstrations, env):\n"," feature_estimate = FeatureEstimate(feature_num, env)\n"," feature_expectations = np.zeros(feature_num)\n"," \n"," for demo_num in range(len(demonstrations)):\n"," for demo_length in range(len(demonstrations[0])):\n"," state = demonstrations[demo_num][demo_length]\n"," features = feature_estimate.get_features(state)\n"," feature_expectations += (gamma**(demo_length)) * np.array(features)\n"," \n"," feature_expectations = feature_expectations / len(demonstrations)\n"," \n"," return feature_expectations\n","\n","\n","def QP_optimizer(feature_num, learner, expert):\n"," w = cp.Variable(feature_num)\n"," \n"," obj_func = cp.Minimize(cp.norm(w))\n"," constraints = [(expert-learner) * w >= 2] \n","\n"," prob = cp.Problem(obj_func, constraints)\n"," prob.solve()\n","\n"," if prob.status == \"optimal\":\n"," print(\"status:\", prob.status)\n"," print(\"optimal value\", prob.value)\n"," \n"," weights = np.squeeze(np.asarray(w.value))\n"," return weights, prob.status\n"," else:\n"," print(\"status:\", prob.status)\n"," \n"," weights = np.zeros(feature_num)\n"," return weights, prob.status\n","\n","\n","def add_feature_expectation(learner, temp_learner):\n"," # save new feature expectation to list after RL step\n"," learner = np.vstack([learner, temp_learner])\n"," return learner\n","\n","def subtract_feature_expectation(learner):\n"," # if status is infeasible, subtract first feature expectation\n"," learner = learner[1:][:]\n"," return learner"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mC_jI_fqTyEY","executionInfo":{"status":"ok","timestamp":1636608973194,"user_tz":-330,"elapsed":1901683,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"d6fb33e0-c2e9-4c59-d777-c0c1eb063f7f"},"source":["env = wrap_env(gym.make(\"MountainCar-v0\"))\n","\n","demonstrations = np.load(file=\"expert_demo.npy\")\n","\n","feature_estimate = FeatureEstimate(feature_num, env)\n","\n","learner = calc_feature_expectation(feature_num, gamma, q_table, demonstrations, env)\n","learner = np.matrix([learner])\n","\n","expert = expert_feature_expectation(feature_num, gamma, demonstrations, env)\n","expert = np.matrix([expert])\n","\n","w, status = QP_optimizer(feature_num, learner, expert)\n","\n","\n","episodes, scores = [], []\n","\n","for episode in range(60000):\n"," state = env.reset()\n"," score = 0\n","\n"," while True:\n"," state_idx = idx_state(env, state)\n"," action = np.argmax(q_table[state_idx])\n"," next_state, reward, done, _ = env.step(action)\n"," \n"," features = feature_estimate.get_features(state)\n"," irl_reward = np.dot(w, features)\n"," \n"," next_state_idx = idx_state(env, next_state)\n"," update_q_table(state_idx, action, irl_reward, next_state_idx)\n","\n"," score += reward\n"," state = next_state\n","\n"," if done:\n"," scores.append(score)\n"," episodes.append(episode)\n"," break\n","\n"," if episode % 1000 == 0:\n"," score_avg = np.mean(scores)\n"," print('{} episode score is {:.2f}'.format(episode, score_avg))\n"," # plt.plot(episodes, scores, 'b')\n"," # plt.savefig(\"./learning_curves/app_eps_60000.png\")\n"," np.save(\"app_q_table\", arr=q_table)\n","\n"," if episode % 5000 == 0:\n"," # optimize weight per 5000 episode\n"," status = \"infeasible\"\n"," temp_learner = calc_feature_expectation(feature_num, gamma, q_table, demonstrations, env)\n"," learner = add_feature_expectation(learner, temp_learner)\n"," \n"," while status==\"infeasible\":\n"," w, status = QP_optimizer(feature_num, learner, expert)\n"," if status==\"infeasible\":\n"," learner = subtract_feature_expectation(learner)"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["status: optimal\n","optimal value 0.04285986897328936\n","0 episode score is -200.00\n","status: optimal\n","optimal value 0.04343140320473908\n","1000 episode score is -199.97\n","2000 episode score is -199.12\n","3000 episode score is -198.75\n","4000 episode score is -197.01\n","5000 episode score is -194.22\n","status: optimal\n","optimal value 0.05168352123017688\n","6000 episode score is -193.90\n","7000 episode score is -192.68\n","8000 episode score is -190.49\n","9000 episode score is -187.72\n","10000 episode score is -185.93\n","status: optimal\n","optimal value 0.06887200496494023\n","11000 episode score is -185.53\n","12000 episode score is -184.30\n","13000 episode score is -183.70\n","14000 episode score is -182.80\n","15000 episode score is -181.61\n","status: optimal\n","optimal value 0.06887200491350551\n","16000 episode score is -180.00\n","17000 episode score is -178.65\n","18000 episode score is -176.88\n","19000 episode score is -176.11\n","20000 episode score is -174.66\n","status: optimal\n","optimal value 0.08240263036799281\n","21000 episode score is -174.20\n","22000 episode score is -173.40\n","23000 episode score is -172.07\n","24000 episode score is -172.03\n","25000 episode score is -171.51\n","status: optimal\n","optimal value 0.10118726927984818\n","26000 episode score is -171.19\n","27000 episode score is -171.07\n","28000 episode score is -170.65\n","29000 episode score is -170.51\n","30000 episode score is -170.29\n","status: optimal\n","optimal value 0.10118726939128136\n","31000 episode score is -169.58\n","32000 episode score is -169.06\n","33000 episode score is -168.43\n","34000 episode score is -167.48\n","35000 episode score is -167.00\n","status: optimal\n","optimal value 0.10118726938402643\n","36000 episode score is -166.79\n","37000 episode score is -166.07\n","38000 episode score is -165.34\n","39000 episode score is -164.64\n","40000 episode score is -163.92\n","status: optimal\n","optimal value 0.12417663796193439\n","41000 episode score is -163.59\n","42000 episode score is -163.09\n","43000 episode score is -162.44\n","44000 episode score is -161.99\n","45000 episode score is -162.07\n","status: optimal\n","optimal value 0.12417663799999985\n","46000 episode score is -161.69\n","47000 episode score is -161.35\n","48000 episode score is -161.39\n","49000 episode score is -161.17\n","50000 episode score is -160.78\n","status: optimal\n","optimal value 0.12417663798361642\n","51000 episode score is -160.73\n","52000 episode score is -160.45\n","53000 episode score is -160.06\n","54000 episode score is -159.64\n","55000 episode score is -159.23\n","status: optimal\n","optimal value 0.1331090749714291\n","56000 episode score is -158.89\n","57000 episode score is -158.49\n","58000 episode score is -158.14\n","59000 episode score is -157.77\n"]}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":268},"id":"-SCtYi_odal3","executionInfo":{"status":"ok","timestamp":1636609020586,"user_tz":-330,"elapsed":648,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"0b1302c4-cdc3-415e-e76b-af4cfd6dd60b"},"source":["plt.plot(episodes, scores, 'b')\n","plt.show()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAYAAAAD7CAYAAABjVUMJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de7wVdb3/8ddHVEzTwFuayA878ksxDXVr4qWUzHuSmGX9OmiWHBPtYjdJj5ke82icygveEy/185JGEmCkXQ7eADcoCiK4MU2UI5iieBf4nj9mVnvtvdd9ZtZ3Lu/n47Efs9Z3Zs18Zu1Z85nv9zsXc84hIiLFs57vAERExA8lABGRglICEBEpKCUAEZGCUgIQESkoJQARkYJKLAGY2XAzm2Vmj5pZp5ntHZabmV1qZl1m9piZ7ZFUDCIiUl2SNYCLgR8754YD54TvAQ4HhoZ/Y4ErE4xBRESqWD/BeTtgs/D1B4AXwtejgJtccAXaLDMbYGbbOueW15rZlltu6YYMGZJYsCIieTN37tyXnHNbVRufZAL4FjDDzCYQ1DT2Dcu3A54rm25ZWFYzAQwZMoTOzs4k4hQRySUze7bW+EgJwMzuBbapMOos4FPAt51zd5rZ54FfAgc3Of+xBM1EDB48OEqoIiLSiyV1LyAzexUY4JxzZmbAq865zczsauCvzrlbwukWAwfWawLq6OhwqgGIiDTOzOY65zqqjU+yE/gF4JPh65HAU+HrKcCY8GygfQgSQ82dv4iIxC/JPoCTgUvMbH3gbcKmHGA6cATQBbwJfCXBGEREpIrEEoBz7n5gzwrlDhiX1HJFRKQxuhJYRKSglABERApKCUBEanrsMXjwwfrTPfIIzJmTfDwSnyQ7gUUkBz72sWBY74zxPfZobDpJD9UAMu6pp2D0aHjnHd+RSB69+273a7Pgb+XK4P26dd1lZt3T/fCH7Y1RWqcEkHFf/zpMngz33ec7EsmjSk0/EyYEw5deqvyZCy9MLh6JlxJAxv3pT8Hw7bf9xhG3rbeGESN8RyFnntm37O9/D4bf+157Y5H4KQHkxOrVviOIz9KlQTPDrFk9y+fNy1+iS7vZs/uWTZkSdAzfdFP1z61alVxMEh8lAEmdHXfsW7Z8Oey5J5x8cvvjkZ7efLO7Y7iagQPbE4tEowSQE+WdcHlUale+/36/cSTlwQdht92Cms/ll8NFF8HMmb6jkrzTaaCSCZddFgyfecZrGInZb79geOKJMH16d7lOqZQkqQYguXL22XDkkb6jaF35zh+yXbObP993BFKPEkBOZHlHEacLLui7E82L1auz1Ql+0UW+I6jtvffg1Vd9R+GXEoBIRmy2Gey+u+8oGnfLLb4jqG3UKBgwwHcUfikBSOqtWeM7gvR48knfEeTH3XcHw/nzG2+uWrIExoU3s1++HO65p/nlTpsGI0fCJz4B++8f1N5HjICJE+Gkk+DLX4Z9923Pdq9O4JzIcxPQL37hOwLJs+HDg2EjHe4f+UgwPPXUoK/p2Web66hftQqOOqpv+axZfa97GTcOrr668Xm3QjWAAvvtb+HSS31HUd9yPTBUYlZvp71qFYwZA6+9Vnn8BRcEO/9GPfxwcOV0I3dVLWnHacCqAeREKzWAY48Nht/4RryxiKTdc8/VHn/xxXDzzbDTTpVvbtds/8beewfD0n2UGtGO5j7VAHIiz01Avs+FX7ky+H59dWouXepnuXm2cGFj0919d/C/P+qo5n9j++7b906paaMEIFLH4sXBcOJEP8uv1Wxw8809b9ksjfnDH/qWrV4ddNC+8Qa88EJQVrryfNq05pfx0EOtx9cuSgAiGTVxYtBOrfvuNK9S39fIkcGR/qhRcOON8S6v1VpA0v0ASgA5keZqZlStNgGtWRPcQ+itt+KJo6srnvnE5YEHguGbb/qNIy86O4Nh6RbrjVq3Lv5YSqZOTW7eoAQgGXDHHa197vrrgw68Cy6IJ44XX4xnPpIvU6YkN++f/jS5eYMSgGTAe+81/5m1a7uPjF9/Pd542q2V9Zf2OeaY+tNEOZHhq19t/bP1KAFI6lVq3qq3Uy8f7/ssoqhKHZISzeuvZ3NbuP765OatBJATee4D6O2ee2DTTeGvf60+TVdXdr6Te++tPT4r65FmS5cG20zSV9ZmTaQEYGbHmdlCM1tnZh29xo03sy4zW2xmh5aVHxaWdZlZhSeOitRW6qSr9XCY0qmbACtWxB/Diy/2XP7SpfDII63NK88PfnnlFd8RBJYsCYZ33eU3jrSJWgNYAIwGemzCZjYMOB7YBTgMuMLM+plZP2AicDgwDPhiOK1IVb2r7Y3eZrh05HzrrfHGA9DRAQcc0P1+xx1hjz1am1e9ZoksNluUfOYzviOQWiLdCsI5twjA+tZRRwG3OufeAf5mZl1AeDE0Xc65p8PP3RpO+0SUOETNBO22bJnvCLKh0Stu2+UPf9CFc+WS6gPYDii/28aysKxaeUVmNtbMOs2sc+XKlYkEKvkU51FzWpNrWuNKu9/8xncE6VE3AZjZvWa2oMLfqKSDc85d45zrcM51bLXVVkkvLtPyvDNodWeele+k3vplZT0qWbXKdwTtd8st2fmf1W0Ccs4d3MJ8nwe2L3s/KCyjRrlEkJUNTuKT5b4Bn15+Odn5X3xxsvOPU1JNQFOA482sv5ntAAwF5gAPA0PNbAcz25CgozjB6+gkz/Jy5KwdeXvp9ufdInUCm9kxwGXAVsA0M3vUOXeoc26hmd1O0Lm7BhjnnFsbfuY0YAbQD7jeOZeybiJJG987SN+JxPfys845OOII31GkU9SzgCYDk6uMuwDocxcW59x0YHqU5Upf2kn05Fx2vhPfCS7v/vjH9i4vK9sd6EpgyYBWf1BZ+iHWkpf18KWIHdGNUgIQ8Uw1gGSVrgJulywlbCWAnMjSRtesvJ8G2qq8r19c9D1VpwQgmZWWI+fVq/0sd80aP8uV2rKUcJQAJFUq3WKhlR19nJ3A9ebz+c9Hm3+riez226MtVyTSWUAi5a6/HgYPhoNbuXQwFNfDW2bOhOuui2devfV+SlOlB4w3I43XM/z1r+1vO4/TpEkwaBB8+tPZOiJvNyWAnEjDRl56clGUpplKz1dtZX5J7fwBvv/9eOeXlqascgcd5DuCaE46KRj6+G7T8FtslJqAJJIpU4INvtVT7caPhyFDYg2pYdOmBbFXujXAoEFwzjnJLXvFimDZ9R4GI9FlaYfcbkoABfWPf8Qzn29/Oxg+0eINvf/zP+HZZ7vfVzpiS+qZuKXnCixY0Hfc88/D+efDc8/1HReH2bOD4SWXxHeU+vrrMGdOkJTnzGltHqtWJX+vnLx75x3fETROTUA50exRzpZbxrPcp58OhnE9+emxx/qWvfZaPPNuxeDBMD0j161vumnP960kloED44mlyB5/3HcEjVMNQGIR16mQS5c2Pm272nebWbfFi+Huu4PXzsG118Kbb3aPnz0bvvCFnp+pV8toJLlXOiU0qZpT1rSjCejll4OTH045JfllxUk1gAybO9d3BNLbTjsFQ+eCs4PGjg2eFXzFFUH5PvsEwyOP7P7MbbdFX+5ll/Ut+6//gjP11O222GuvoDZcel51VqgGkGHlR5alo5zzz4ef/Sy5ZU6dGizrC1/oeWTVytF4efxp8uUv93xfa932269y+ZIl3TWHl14KHhIyblz3+NKD6qdObT3Ocmec0besd7Pc976X7NlRrTr11OD7acV558HPf979/q23+h7xt6MGUGoKzRolgJw55xz4zneSm3/pId9xXIT03/8dfR5RVNux//rXPd/X6oN48MHK5Ycc0vP9l77UXQsAuOmm+vHFbcIEOPnk9i+3niuvDL6fVvzoR93Jzzl44IG+0+gsoOqUAAooiWpqoz/g738/+EFWOt8f4MIL44upnvvvD4adnd1l117bd7pWdyClBFPpGbTNdBS2uvxmnkz16KP+d5RmPf/OOqu5z663XnDhlzROCcCzefPg1Vejz6eZH+/vfx99ea0qXUVbrfmnd3lcZ1SsW1e9xnHPPd2vy4/So3j77Xjm0y6+a2OV/OQntcevXdvYfHwntjRTAvDIOdhzTzj88PYu95JL2ru8Sj70oaBtvJ7ddotneb/4BRx4YHDxVy3rxfSLePHFeOYD7dmBlbejp0mtc+ov6PO4KWmWEkANV14ZHKEnpdRE8NBDQbPMpEnwgx8EHVlTpwYX9GTR5ZfXn2b16uidwM10PD/1VDD8+9/7jivfwVba2bZ6imsab/FQTfnFeElatKi5ZFOtqRCC9v9GqAZQnU4DreHUU4NhO37I5TdQGzgwuEVCM8tO00Z++ulw2mm+o+ipdGRfaYdS77u788744ymqj388SKjf/GZ8tS1pnf4FER1ySLDDa0W1nfuvftXY5+sduWZJqeMvKbUSQKXpylU6syQrSnfDLP9u+/eHnXfu2eHaLqXa1IwZQRwf+1hr89lrr8ami+sK9bxSAojonnsaa/KopFr75sKFrccjlZV2cvVqAFlIpKVrCBpR6WZz774LTz4ZXzytOOKIII5Kt/6o5a23gqtuy8/cqiUNZzelmRKAR5tsUn+aODsT06bVG5a1opRUK10xm9QOIq6jz97xffCD6b2ILmkjRsAWW/iOIj+UAFJu4sTGpotzJ/bee3DDDfWbS6Jq9CguDqV7DC1dGhxB/+533eOSSgBJPiqyKAng0Ud7vp8/v/l5qAZQnRJAyp1/fnM3SIvDxRfDV77S94rYuDVb/e+tmc758rb9o4+GY46JtuxGbLBBPPMp8g5s3319R5BvSgAZUOmioptuggMO6H4/e3aw047D2WcHwzFjGr/YJu3KE8Dzz1efLs47aPbrF898vvtdWLmyZ9kvfxnPvPPu5ZeLnUDriZQAzOw4M1toZuvMrKOs/NNmNtfMHg+HI8vG7RmWd5nZpWb697TihBN6vj/nnKDZJm6l8+ezrjwBbLhhz3HlW2Cc93KP8zTH3jf4010+GzNxohJALVE30QXAaGBmr/KXgM8453YFTgBuLht3JXAyMDT8OyxiDJn02c/6jqCv0mMK86hWIiu/4K5es1LUZiuRNImUAJxzi5xziyuUP+KceyF8uxB4n5n1N7Ntgc2cc7Occw64CUjhrjB5d93lO4K+lizpW9bZmb8Hi9S6BUW9BPDnPze+nDgvIMzSVcVpk9eDmji0ow/gWGCec+4dYDtgWdm4ZWFZRWY21sw6zaxzZe9GUGmLf/3X4A6ezYrjBndJ8fmISZE0qXsrCDO7F9imwqiznHM1j2PNbBfgIuCQWtNV45y7BrgGoKOjQ8dACat2YVor90N6+234wAeixeODjrTzRUf/tdVNAM65g+tNU4mZDQImA2Occ6UTGZ8HBpVNNigskxS49FLfETTHx87a1w5Fial1SgLVJdIEZGYDgGnAmc65f95JxTm3HHjNzPYJz/4ZA6SwNbwx7bqDYru89ZbvCPxTu336RLmxoP4HtUU9DfQYM1sGjACmmdmMcNRpwI7AOWb2aPi3dTjuVOA6oAtYCtwdJQaf4niYd7OcC85tFj90NNl+jV4NX4kSQG2RbgftnJtM0MzTu/w/gP+o8plO4KNRlltkEya01inbiPInY5XTj8g//Q9ap6Rdna4EjmDNmvYvMysPiWnHDivtfQDaaUvaKQFEUHq+bd71viFXkflKAEomrVMNoDolgAhWrfIdQXskeVdLacyECb4jyCYlztqUACSz1AQkjVANoDolAJGEKAH455wSQC1KAE2q1R6+dm28d5OsJMrG/O678MQT8cVSSzt+dEk/sEYk75QAmnDbbbD77nD77ZXH//u/w267xb+TjWtnetppsMsu8cyrnnYc/V54YfLL6E1NQNmjGkB1SgBNKO3YFy2qPP73vw+GL7xQebxv117bvmWl+WZwUWhnki1FOVGjVUoAMVqwIBiWHkBeZOPG+Y4gGUoA2fLYYzB0qO8o0ksJIAF5u39+K/JaA5DWVXq0aTtsvLGf5WaBEkALfLbtZuUI1NePPU3UB9DT4j6PjhLflACa0OjOVz/87uawvMlKAhZphBJAm02frqPjotCBgKRdpLuBSnPmzYMjj4SxY31HIq3SaaCSJ6oBNKHRH3S1ncQrrwTDG2+MJ54suPlmuPNO31HER01ArfOVEF980c9ys0A1AA+qPXu3EVnbAY0ZEwx1NCy+nH667wjSSzWAJkTtBM7azjtOeVl3NQFlz9q1viNILyWABCgBVPbGG74jqE6P2cwvJeLqlACkbQ45xHcE1W2xRWPTFT2JR+FrR6wEUJ0SQAvqbVDVdhJF23n0/p4efNBPHL5oxyNppwTQhDRcCJalJNLZ6TuC+KkPIHtWrvQdQXrpLKA2irLzvuQSeOqp+GJphzxe8JalBCxSjxJAG7W68zCDb30reH3ggbGFkzjdFE/KqUaUPmoCksR86lO+I4jf6683Pu0zzyQWhkgslADaSM0H2Ve6mrsR7Xr8pkirIiUAMzvOzBaa2Toz66gwfrCZvW5m3y0rO8zMFptZl5mdGWX5Imm2ZInvCERqi1oDWACMBmZWGf8z4O7SGzPrB0wEDgeGAV80s2ERY8iMOGoAqkX41cz330xtQcSHSJ3AzrlFAFbhV2FmnwX+BpRf/7k30OWcezqc5lZgFJCryrI6u/JLp4FKniTSB2Bm7wd+APy416jtgOfK3i8LyzKllR/2738PBxwQfyxxUc2iMc18T+vWJRdHFk2Z4jsC6a1uAjCze81sQYW/UTU+di7wc+dcE+dMVFz2WDPrNLPOlSm4mqPZneQ778Dy5cHrn/yk9eU2c+aJJEuJsnmTJwfPiP5x78NB8a5uE5Bz7uAW5vtx4HNmdjEwAFhnZm8Dc4Hty6YbBDxfY9nXANcAdHR0ZK5CfeyxMG1aUGOYNav1+ey9d3wxtWrtWujXz3cU2aIaQGD0aN8RSDWJXAjmnPtnY4eZnQu87py73MzWB4aa2Q4EO/7jgS8lEUMSSk0/r77a2PTTpsUfQ6tHoKWaSKvWrct/AihdbFeLagCSJ1FPAz3GzJYBI4BpZjaj1vTOuTXAacAMYBFwu3NuYZQY2unxx4PhpZf6jaMVI0f6jiD9Lrmk/jRKAJInUc8CmgxMrjPNub3eTwemR1muL1Ge5OXbk0/6jiAflAAkT3QlcBPW07dVeEoAkifapTXorbeydRrbNdcEO6sVK+KZ3667aucH+g4kX5QAGrRqle8IAo3ugMaPD4aTJtW/bqGRxyEuXtzYcvNOCcCP556rP400T7eDTkDvHe7q1cnNu5rSTv3MM2GbbWpPu88+0WISSdrgwbqyOgmqAbSB7/viP/107fFZe9CMT6oB+KMEED8lgAbphy/i1003+Y4gf5QAWjRpUuPTxtl++ec/N/+Z//mf+JZfdDoTzB/fNek80ubcoq9+tfFpJ05MLo5GTK55pYY04x//8B1BcSkBxE8JoAGVnuxUqz2yd3PRtdfGG0+zUnAfvdy44ALfERTXqaf6jiB/lAAasHSp+gBEJH+UAERECkoJQHLj3Xd9RyCSLUoADVITUPo98IDvCESyRQmgAUcfXX+a8gShC1b80C2vRZpT+ATw2muVz/KR7Fi3Dq6+2ncUItlT+ARwyCGwyy71p2umCUg1gPY68UQ45RTfUYhkT+FvBjd7dmPT/fKXycYhrdlii8buZioifRW+BtCo0u2VJV208xdpnRJAAn74Q501JCLppwRQ5pVXfEcgItI+SgChn/8cNt8c7rjDdyQiIu2hBBA644xg+Je/+I1DRKRdlAB60SmckkWnn+47AskiJQCRHDjxRN8RSBYpAYgUxDe/6TsCSRslAJEc2GST+tPstVffsiOOiD8WyY5ICcDMjjOzhWa2zsw6eo3bzcweCsc/bmYbheV7hu+7zOxSs3SdMa8+AMmij3yk5/tKT4H73Od6vu/q0vUq7TRunO8I+opaA1gAjAZmlhea2frAr4BTnHO7AAcCpSd6XgmcDAwN/w6LGIOI9LLlln3Leu/s/+Vf2hOLBI4/3ncEfUVKAM65Rc65xRVGHQI85pybH073D+fcWjPbFtjMOTfLOeeAm4DPRolBRAKvvVZ/mlGjguH11wdD1QCSt/32wTCNrQtJ9QH8X8CZ2Qwzm2dm3w/LtwOWlU23LCyryMzGmlmnmXWu1JPNRWradFO44grYfffq05T6CjbcsD0xSbq/67oJwMzuNbMFFf5G1fjY+sD+wP8Lh8eY2aeaDc45d41zrsM517HVVls1+/GWpDFLZ92nmv7PS6u+/nWYN6/yOLPuI/5167rLfLvqKt8RFFfd20E75w5uYb7LgJnOuZcAzGw6sAdBv8CgsukGAc+3MH8RacF64SFfmg50TjpJz3PwJakmoBnArma2cdgh/EngCefccuA1M9snPPtnDHBXQjGISC+77hoMS+3SabDBBr4jSFaplpWmpFsS9TTQY8xsGTACmGZmMwCcc68APwMeBh4F5jnnpoUfOxW4DugClgJ3R4lB0i8NzQwS+M534KGH4KCDfEdSHGlOAJGeCOacmwxMrjLuVwRNPr3LO4GPRlluktL4T8q6Ri5SkvZYbz3YZ5/u90rOxaYrgSVxaT4LQqTIlAB60RGRSHZsu63vCBqXxtYFJYBe0vhPEklK1g94fvMb3xHUl+bvWAmgCiWC2s47z3cEkidz5rT2uTTvXLOg0AngnXd8R5BdG2/c+LT6kUo9O+7oO4JiKmQCmDQJVqyAd9+tPo1qAJI3lRKx7+T80kvBMM+/tzSfBlq4BPDss8GVh6NHVx6fxn+SSFJ8J4Attoj2+Y+m9oTybr6/41oiXQeQRaWj/hdfrP6PuewyePLJ9sUkEsWECb4j8GezzXxH0Lg0HlwWLgGU1PpnfOMb7Ysjq9J8VCPZk8adY1zS/FspXBNQ+T+j0j/mj39sXywiEshzAihJ4zoWLgGUVPtnPPtse+MQ8SktR6dp3DkWQeESQFo2+Kz75Ccbn1bfebKa+V+IlCtcAihxTjumKPbc03cEUtLR4TsCyarCJQDt9EW6peX3UIQmoDSuY+ESQLm0bPwi7aDt3Y80f++FSwBpvipPRKSdCpcAyqU5M4vErdJBzwEHtD+OSnRA5kdhE4A2uPZRok2vPFz0mPbHW6a51aFwCaDehWAiRRLXb2D48Hjm04qBA/0tuxFKACmUxn+GSJKS3Oa/8pVon9fv0Y/CJQAd9YvEb+xYf8vWb7p1hUsAJTrikLwbNKh9y9poo2ifL8LvMY3rWLgEoKMFKYq5c31HIJDufU7hEoBIUWy9dc/36+X0157mHWwtAwb4jqCACSDNPfJ5ldUfaJ4MHw79+vmOoroov8esbl9piDtSAjCz48xsoZmtM7OOsvINzOxGM3vczBaZ2fiycYeZ2WIz6zKzM6MsX0Qas/HGyc37Jz+JPo8iHJClcR2j1gAWAKOBmb3KjwP6O+d2BfYE/s3MhphZP2AicDgwDPiimQ2LGENTVAOQIor67N1axo+vP02RpeFIv5pIj4R0zi0CsL5r6IBNzGx94H3Au8BrwN5Al3Pu6fBztwKjgCeixCEitd1wg+8IaivCAVka1zGpPoA7gDeA5cDfgQnOuZeB7YDnyqZbFpa1XRr/GSJJ2Xxz3xEkJ81H2JDu+OrWAMzsXmCbCqPOcs7dVeVjewNrgQ8BA4H7wvk0xczGAmMBBg8e3OzHq8yz+/XDD8cySxGJqAgHZL3XMQ2JoW4CcM4d3MJ8vwT8wTn3HrDCzB4AOgiO/rcvm24Q8HyNZV8DXAPQ0dER6ybiXHruhCiSB6NHw29/6zuK9Elz7SupJqC/AyMBzGwTYB/gSeBhYKiZ7WBmGwLHA1MSiqGiNGTdotF3Xgx33uk7gvRxLvpV0kmKehroMWa2DBgBTDOzGeGoicD7zWwhwU5/knPuMefcGuA0YAawCLjdObcwSgwikn1FvA4gDc1eUc8CmgxMrlD+OsGpoJU+Mx2YHmW5Ueg0UBGJw447QldX49OncZ9TuCuBRSRf4qwBzJvX+LTnndfYdGmuoSgBiIh3aTk63n33xqfdYIPk4mgXJQARybQ0H2GnXeESgPoA2k8/UMmjkSMbm67aPicNv4vCJQARSZ+0HpDttlv1cY2e35/mg87CJYA0/zNEikq/x+rGjElu3oVLACKSPq0kgFInbBqaUpK0447JzVsJQEQyqR23cokzuagPIAXUBCSSD2nYgTZy2mjUOJNcz8IlABFJn6zeCqKZC8fSqHAJQDWA9kvDkZrkl7av1hUuAYiINCrJPoA0KFwCUA1AJH2y9nu8//7Gp01zDaVwCUBEpFHVEtN++8U3L58KmwDS+M8QKaqsdgI3Is3xFTYBiOTBxhtHn0et2x2kWTt2rEn2AQwaFN+8W1XYBKAaQPuk+Qgo6/r1iz6P+fPhxBOjz0cqq7b99+/f3jgqKWwCEJH0SMMBWemB9ltv7TeOdlICEJFMi6uGecwxwXD//eOZX1x0JXAC0nDEIRKVtuP43Xxz92tdByAikrBttmn9s3EfIcfRsV4uzX1ghU0AaczGIr74/j1stpnf5RdVYROAiGRb6cg6iSPsD34wGE6Y0Po8hg+vPX7SpNbnHZfCJgDfRzxFkuYqcNZpO05GqRloyJDW57HzzsHwQx8Khptu2nP8sGGtzzsuhU0AEt3MmXDggb6jkHa48krfEWTXT38KN9wABx/c3Oei9Is0KlICMLOfmtmTZvaYmU02swFl48abWZeZLTazQ8vKDwvLuszszCjLj0JHTtEdcAB89KO+o5BaDj20/jSNOOmkeOZTRO97H5xwQvM14XZ851FrAPcAH3XO7QYsAcYDmNkw4HhgF+Aw4Aoz62dm/YCJwOHAMOCL4bQikoC77oIVK3xHkSw1MbYuUgJwzv3RObcmfDsLKN3dYhRwq3PuHefc34AuYO/wr8s597Rz7l3g1nBaEUlA//6w1Va+oyimUkdyq7785WB43HHRY6lm/RjndRJwW/h6O4KEULIsLAN4rlf5x2OMoY+ODnjrre73a8J0tXp1kkstjkbuZ7LhhsnHUVRxnbNe7/+YxibTjTbqOYxTHN9r707fZkD/lskAAAabSURBVO28c/Lfe90agJnda2YLKvyNKpvmLGAN8Os4gzOzsWbWaWadK1eubGkeO+0U9LaX/kp3Phw+PGib622PPSIEXBBnndX9+kc/gu99D84+u/K0Q4cGnWDHHhu8v+WW+vPvnTAOOghuvx2++93W4m3FmDHxzeuqq2Dq1OD1vHlwxhmVp5s+vfl533df63GVu+gi2Hvv6uNLCaJ0VszXvhYc4Q4c2HO6mTMbX2ZpmyiZOrW56wGuuy4YXnhhsLO95BLYZReYNq3ndAcdVH9eAwb0fD91Kpx3HuywA/zpTz237xtu6Dntgw8Gw97b9g9+0Hc58+cHw1nh4fFeewXDD38YNtmke7pmHjgThbmIKcbMTgT+DfiUc+7NsGw8gHPuwvD9DODc8CPnOucOrTRdLR0dHa6zszNSrCIiRWJmc51zHdXGRz0L6DDg+8DRpZ1/aApwvJn1N7MdgKHAHOBhYKiZ7WBmGxJ0FE+JEoOIiLQmah/A5UB/4B4LuuJnOedOcc4tNLPbgScImobGOefWApjZacAMoB9wvXNuYcQYRESkBZGbgNpFTUAiIs1JtAlIRESySwlARKSglABERApKCUBEpKCUAERECiozZwGZ2Urg2RY/viXwUozh+JSXdcnLeoDWJY3ysh4QbV3+j3Ou6t2gMpMAojCzzlqnQmVJXtYlL+sBWpc0yst6QLLroiYgEZGCUgIQESmooiSAa3wHEKO8rEte1gO0LmmUl/WABNelEH0AIiLSV1FqACIi0kuuE0BaHkDfm5ldb2YrzGxBWdnmZnaPmT0VDgeG5WZml4br8JiZ7VH2mRPC6Z8ysxPKyvc0s8fDz1xqlsxTU81sezP7i5k9YWYLzeybGV6XjcxsjpnND9flx2H5DmY2O1z+beFtzAlvdX5bWD7bzIaUzWt8WL7YzA4tK2/r9hg+h/sRM5ua5XUxs2fCbeBRM+sMy7K4jQ0wszvM7EkzW2RmI7yvh3Mul38Et5teCnwY2BCYDwzzHVcY2yeAPYAFZWUXA2eGr88ELgpfHwHcDRiwDzA7LN8ceDocDgxfDwzHzQmntfCzhye0HtsCe4SvNwWWAMMyui4GvD98vQEwO1zu7cDxYflVwNfD16cCV4WvjwduC18PC7e1/sAO4TbYz8f2CJwB/H9gavg+k+sCPANs2assi9vYjcDXwtcbAgN8r0diG5/vP2AEMKPs/XhgvO+4yuIZQs8EsBjYNny9LbA4fH018MXe0wFfBK4uK786LNsWeLKsvMd0Ca/TXcCns74uwMbAPILnVb8ErN97myJ4psWI8PX64XTWezsrTdfu7REYBPwJGAlMDWPL6ro8Q98EkKltDPgA8DfCfte0rEeem4C2o+8D6LerMm0afNA5tzx8/T/AB8PX1dajVvmyCuWJCpsNdic4cs7kuoRNJo8CK4B7CI5yVznn1lRY/j9jDse/CmxB8+uYlF8QPK1vXfh+C7K7Lg74o5nNNbOxYVnWtrEdgJXApLBZ7joz2wTP65HnBJBZLkjhmTk9y8zeD9wJfMs591r5uCyti3NurXNuOMHR897ATp5DaomZHQWscM7N9R1LTPZ3zu0BHA6MM7NPlI/MyDa2PkGz75XOud2BNwiafP7Jx3rkOQE8D2xf9n5QWJZWL5rZtgDhcEVYXm09apUPqlCeCDPbgGDn/2vn3G/D4kyuS4lzbhXwF4KmjgFmVnp0avny/xlzOP4DwD9ofh2TsB9wtJk9A9xK0Ax0CdlcF5xzz4fDFcBkguSctW1sGbDMOTc7fH8HQULwux5Jtdv5/iPIuE8TVL1KHVW7+I6rLL4h9OwD+Ck9O4MuDl8fSc/OoDlh+eYEbYoDw7+/AZuH43p3Bh2R0DoYcBPwi17lWVyXrYAB4ev3AfcBRwG/oWfH6anh63H07Di9PXy9Cz07Tp8m6DT1sj0CB9LdCZy5dQE2ATYte/0gcFhGt7H7gI+Er88N18HreiS68fn+I+hJX0LQlnuW73jK4roFWA68R3Bk8FWCNtc/AU8B95b9Uw2YGK7D40BH2XxOArrCv6+UlXcAC8LPXE6vjqcY12N/girrY8Cj4d8RGV2X3YBHwnVZAJwTln84/GF1EexA+4flG4Xvu8LxHy6b11lhvIspOxPDx/ZIzwSQuXUJY54f/i0sLSuj29hwoDPcxn5HsAP3uh66ElhEpKDy3AcgIiI1KAGIiBSUEoCISEEpAYiIFJQSgIhIQSkBiIgUlBKAiEhBKQGIiBTU/wJdqaZnMWjTJgAAAABJRU5ErkJggg==\n","text/plain":["
"]},"metadata":{"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":421},"id":"o0A-BMMjdT-q","executionInfo":{"status":"ok","timestamp":1636609023304,"user_tz":-330,"elapsed":11,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"800ff3f6-7cd3-4b59-d78f-caa8ef3a0b50"},"source":["show_video()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":[""],"text/plain":[""]},"metadata":{}}]},{"cell_type":"markdown","metadata":{"id":"bCWHIAAgTyg7"},"source":["## Test"]},{"cell_type":"code","metadata":{"id":"kkNGrXpMTye0"},"source":["def idx_to_state(env, state):\n"," env_low = env.observation_space.low\n"," env_high = env.observation_space.high\n"," env_distance = (env_high - env_low) / N_idx\n"," position_idx = int((state[0] - env_low[0]) / env_distance[0])\n"," velocity_idx = int((state[1] - env_low[1]) / env_distance[1])\n"," state_idx = position_idx + velocity_idx * N_idx\n"," return state_idx"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"OMy7a8KwWkjr","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1636609071483,"user_tz":-330,"elapsed":7610,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"6c400b6d-db79-4671-867e-7d7ba2ce2e4b"},"source":["print(\":: Testing APP-learning.\\n\")\n","\n","# Load the agent\n","n_states = N_idx**2 # position - 20, velocity - 20\n","n_actions = 3\n","q_table = np.load(file=\"app_q_table.npy\")\n","\n","# Create a new game instance.\n","env = wrap_env(gym.make(\"MountainCar-v0\"))\n","n_episode = 10 # test the agent 10times\n","scores = []\n","\n","for ep in range(n_episode):\n"," state = env.reset()\n"," score = 0\n","\n"," while True:\n"," # Render the play\n"," env.render()\n","\n"," state_idx = idx_to_state(env, state)\n","\n"," action = np.argmax(q_table[state_idx])\n","\n"," next_state, reward, done, _ = env.step(action)\n"," next_state_idx = idx_to_state(env, next_state)\n","\n"," score += reward\n"," state = next_state\n","\n"," if done:\n"," print('{} episode | score: {:.1f}'.format(ep + 1, score))\n"," \n"," break\n","\n","env.close()"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":[":: Testing APP-learning.\n","\n","1 episode | score: -97.0\n","2 episode | score: -133.0\n","3 episode | score: -98.0\n","4 episode | score: -148.0\n","5 episode | score: -135.0\n","6 episode | score: -146.0\n","7 episode | score: -131.0\n","8 episode | score: -132.0\n","9 episode | score: -158.0\n","10 episode | score: -98.0\n"]}]},{"cell_type":"code","metadata":{"id":"dajTkpC-Tdvi","colab":{"base_uri":"https://localhost:8080/","height":421},"executionInfo":{"status":"ok","timestamp":1636609075181,"user_tz":-330,"elapsed":850,"user":{"displayName":"Sparsh Agarwal","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"13037694610922482904"}},"outputId":"364b2145-5eea-4ab0-faa4-8c1f7dd4e1a1"},"source":["show_video()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/html":[""],"text/plain":[""]},"metadata":{}}]}]}