{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import gym\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from gym.envs.registration import register\n", "register(\n", " id='FrozenLakeNotSlippery-v0',\n", " entry_point='gym.envs.toy_text:FrozenLakeEnv',\n", " kwargs={'map_name' : '4x4', 'is_slippery': False},\n", " max_episode_steps=100,\n", " reward_threshold=0.78, # optimum = .8196\n", ")\n", "\n", "#env = gym.make('FrozenLake-v0')\n", "env = gym.make('FrozenLakeNotSlippery-v0')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Score over time: 0.9545\n", "Final Q-Table Values\n", "[[0. 0. 0.77378094 0. ]\n", " [0. 0. 0.81450625 0. ]\n", " [0. 0.857375 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0.9025 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0.95 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0. 0. 0. ]\n", " [0. 0. 1. 0. ]\n", " [0. 0. 0. 0. ]]\n" ] } ], "source": [ "Q = np.zeros([env.observation_space.n, env.action_space.n])\n", "#Q = np.random.rand(env.observation_space.n, env.action_space.n)\n", "lr = 0.8\n", "y = 0.95\n", "num_episodes = 2000\n", "#create list to contain total rewards per episode\n", "rList = []\n", "\n", "for i in range(num_episodes):\n", " #Reset environment and get first new observation\n", " s = env.reset()\n", " rAll = 0\n", " d = False\n", " j = 0\n", " #The Q-Table learning algorithm\n", " while j < 99:\n", " j+=1\n", " #Choose an action by greedily (with noise) picking from Q table\n", " a = np.argmax(Q[s,:] + (np.random.randn(1, env.action_space.n) * (1./(i**2 + 1))))\n", " #Get new state and reward from environment\n", " s1, r, d, _ = env.step(a)\n", " #Update Q-Table with new knowledge\n", " Q[s,a] = Q[s,a] + lr*(r + y * np.max(Q[s1,:]) - Q[s,a])\n", " rAll += r\n", " s = s1\n", " if d == True:\n", " break\n", " rList.append(rAll)\n", "\n", "print('Score over time: {}'.format(sum(rList)/num_episodes))\n", "print('Final Q-Table Values')\n", "print(Q)\n", " \n", "\n", " \n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "s = env.reset()\n", "d = 0\n", "j = 0\n", "while j < 99:\n", " #env.render()\n", " j+=1\n", " a = np.argmax(Q[s,:] )\n", " #Get new state and reward from environment\n", " s1, r, d, _ = env.step(a)\n", " s = s1\n", " if d == True:\n", " break" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[<matplotlib.lines.Line2D at 0x7fca6de89e48>]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAEc1JREFUeJzt3XuMXGd5x/Hvg01CIVfwJqS+xKZ1KkxFSboKqSiUKiHYUWv3QpGtVqQ0wqpE2iJoVaNUKU3/KaCCipqSmpJyERACLcWqHBlEU6gqknoDuTmOycaEeus0NiEKUbkEw9M/5jhMJrO7Z9Zz8Tnv9yON9pwz78x5/M7sz2fPO++cyEwkSe3yrEkXIEkaPsNdklrIcJekFjLcJamFDHdJaiHDXZJayHCXpBYy3CWphQx3SWqh5ZPa8YoVK3Lt2rWT2r0kNdIdd9zxzcycWqzdxMJ97dq1zMzMTGr3ktRIEfGNOu08LSNJLWS4S1ILGe6S1EKGuyS1kOEuSS20aLhHxI0RcSQi7p3n/oiI90XEbETcHREXDb9MSdIg6hy5fwjYuMD9m4D11W078P4TL0uSdCIW/Zx7Zn4pItYu0GQL8JHsXK/vtog4KyLOy8yHh1Tjgm49cIT155zGqrOfu2C7v7rlfh545Ale8pNnjKMsSZrXpS8+l59bfdZI9zGMSUwrgUNd63PVtmeEe0Rsp3N0z5o1a4awa3jjP+7l9FOXc89fvHbeNkee+B43fPFBAL5w/xEihrJrSVqSc854TiPCvV9U9r3qdmbuBHYCTE9PD+3K3E98/9iC9z957EdPLf/N1pex5WUrh7VrSTopDePTMnPA6q71VcDhITyvJGmJhhHuu4A3VJ+auQR4fFzn2yVJ/S16WiYiPgG8GlgREXPAnwPPBsjMG4DdwBXALPAd4I2jKlaSVE+dT8tsW+T+BN48tIokSSfMGaqS1EKGuyS1kOEuSS1kuEtSCxnuktRChrsktVBx4R5+sYykAhQX7pJUAsNdklrIcJekFjLcJamFigt3h1MllaC4cJekEhjuktRChrsktZDhLkktVFy4O0FVUgmKC3dJKoHhLkktZLhLUgsZ7pLUQsWFezhHVVIBigt3SSqB4S5JLWS4S1ILGe6S1ELFhbszVCWVoLhwl6QSGO6S1EKGuyS1UK1wj4iNEXEgImYjYkef+9dExK0R8dWIuDsirhh+qZKkuhYN94hYBlwPbAI2ANsiYkNPsz8Dbs7MC4GtwN8Nu9BhcTxVUgnqHLlfDMxm5sHMfBK4CdjS0yaBM6rlM4HDwytRkjSo5TXarAQOda3PAS/vafMO4HMR8QfA84DLhlKdJGlJ6hy59zuTkT3r24APZeYq4ArgoxHxjOeOiO0RMRMRM0ePHh28WklSLXXCfQ5Y3bW+imeedrkKuBkgM78MPAdY0ftEmbkzM6czc3pqamppFUuSFlUn3PcC6yNiXUScQmfAdFdPm/8GLgWIiBfTCfeT8tDcGaqSSrBouGfmMeBqYA+wn86nYvZFxHURsblq9jbgTRFxF/AJ4Hczs/fUjSRpTOoMqJKZu4HdPduu7Vq+D3jFcEuTJC2VM1QlqYUMd0lqoQLD3RFVSe1XYLhLUvsZ7pLUQoa7JLWQ4S5JLVRcuDtDVVIJigt3SSqB4S5JLWS4S1ILGe6S1ELFhbvjqZJKUFy4S1IJDHdJaqFGh3vd64F42RBJpWl0uEuS+isi3LtnpYZTVCUVoIhwl6TSGO6S1EKNDve6A6UOqEoqTaPDXZLUXxHh/rQB1cmVIUljU0S4S1JpDHdJaqFGh3vdcVIHVCWVptHhLknqr4hwf/oM1cnVIUnjUkS4S1JpDHdJaqFGh7tf+StJ/dUK94jYGBEHImI2InbM0+b1EXFfROyLiI8Pt0xJ0iCWL9YgIpYB1wOvAeaAvRGxKzPv62qzHng78IrMfCwizhlVwUvhgKqk0tQ5cr8YmM3Mg5n5JHATsKWnzZuA6zPzMYDMPDLcMiVJg6gT7iuBQ13rc9W2bhcAF0TEf0bEbRGxsd8TRcT2iJiJiJmjR48urWJJ0qLqhHu/Exm9Q5TLgfXAq4FtwD9ExFnPeFDmzsyczszpqampQWuVJNVUJ9zngNVd66uAw33afDYzf5CZXwcO0An7kfLrBySpvzrhvhdYHxHrIuIUYCuwq6fNvwC/DBARK+icpjk4zEKHJfzSX0kFWDTcM/MYcDWwB9gP3JyZ+yLiuojYXDXbAzwaEfcBtwJ/kpmPjqpoSdLCFv0oJEBm7gZ292y7tms5gbdWN0nShDV6hqokqb9Gh7sDpZLUX6PDfUkcT5VUgPLCXZIKYLhLUgsZ7pLUQo0O96w5R9WBV0mlaXS4L4XjqZJKUFy4S1IJDHdJaiHDXZJaqNHhXnegtO7AqyS1RaPDfSnCi6hKKkBx4S5JJTDcJamFDHdJaqEiwt0ZqpJKU0S4d3M4VVIJigt3SSqB4S5JLWS4S1ILNTrc689QlaSyNDrcJUn9FRfufvuApBIUF+6SVALDXZJaqNHhXv8aqg6pSipLo8NdktRfceEefgGBpAIUF+6SVIJa4R4RGyPiQETMRsSOBdq9LiIyIqaHV6IkaVCLhntELAOuBzYBG4BtEbGhT7vTgT8Ebh92kfNxhqok9VfnyP1iYDYzD2bmk8BNwJY+7f4SeBfwvSHWJ0lagjrhvhI41LU+V217SkRcCKzOzH8dYm0j4QxVSSWoE+794vCpMx0R8SzgvcDbFn2iiO0RMRMRM0ePHq1fpSRpIHXCfQ5Y3bW+CjjctX468LPAv0fEQ8AlwK5+g6qZuTMzpzNzempqaulVS5IWVCfc9wLrI2JdRJwCbAV2Hb8zMx/PzBWZuTYz1wK3AZszc2YkFXepO1DqBFVJpVk03DPzGHA1sAfYD9ycmfsi4rqI2DzqAiVJg1tep1Fm7gZ292y7dp62rz7xskbH8VRJJXCGqiS1kOEuSS3U6HCv/1W+jqhKKkujw12S1F954e6IqqQClBfuklQAw12SWqjR4e4MVUnqr9HhLknqr7hw9xqqkkpQXLhLUgkMd0lqoUaHu9dQlaT+Gh3ukqT+igt3r6EqqQTFhbsklcBwl6QWana41x1QdURVUmGaHe6SpL6KC3fHUyWVoLhwl6QSGO6S1EKNDvesOaJat50ktUWjw12S1F9x4R5OUZVUgOLCXZJKYLhLUgs1Otxrf+Wv46mSCtPocJck9VdcuDueKqkExYW7JJWgVrhHxMaIOBARsxGxo8/9b42I+yLi7oj4QkScP/xSJUl1LRruEbEMuB7YBGwAtkXEhp5mXwWmM/OlwKeBdw270H7qjpM6oCqpNHWO3C8GZjPzYGY+CdwEbOlukJm3ZuZ3qtXbgFXDLVOSNIg64b4SONS1Pldtm89VwC397oiI7RExExEzR48erV/lEDmeKqkEdcK9Xx72PdEREb8DTAPv7nd/Zu7MzOnMnJ6amqpfpSRpIMtrtJkDVnetrwIO9zaKiMuAa4BfyszvD6c8SdJS1Dly3wusj4h1EXEKsBXY1d0gIi4E/h7YnJlHhl9mf1lzpNSv/JVUmkXDPTOPAVcDe4D9wM2ZuS8irouIzVWzdwOnAZ+KiDsjYtc8TydJGoM6p2XIzN3A7p5t13YtXzbkukbGGaqSSuAMVUlqIcNdklqo0eHuDFVJ6q/R4S5J6q/AcHdEVVL7FRjuktR+hrsktVCjw92BUknqr9HhLknqr7hwd4aqpBIUF+6SVALDXZJaqNHhXverfB14lVSaRoe7JKm/4sLd8VRJJSgu3CWpBIa7JLWQ4S5JLdTscK/5KRgvkC2pNM0O9yUIp6hKKkBx4S5JJTDcJamFDHdJaqFGh7sXyJak/hod7kvhcKqkEhQX7pJUAsNdklrIcJekFmp0uNcdKHU8VVJpGh3uS+EEVUklqBXuEbExIg5ExGxE7Ohz/6kR8cnq/tsjYu2wC5Uk1bdouEfEMuB6YBOwAdgWERt6ml0FPJaZPw28F3jnsAuVJNVX58j9YmA2Mw9m5pPATcCWnjZbgA9Xy58GLg2/oUuSJmZ5jTYrgUNd63PAy+drk5nHIuJx4AXAN4dRZLeb9x7iA/9xEIBjP/rxUOlr3vPFeR/z3R/8cNhlSNJJrU649zsC7/0ASp02RMR2YDvAmjVrauz6mc567rNZf+5pT61//Zv/x0tXncmqs39iwcfNPfZdXnjGc7jg3NOXtF9JapI64T4HrO5aXwUcnqfNXEQsB84EvtX7RJm5E9gJMD09vaRPKF7+khdy+UteuJSHSlIx6pxz3wusj4h1EXEKsBXY1dNmF3Bltfw64N8y/bouSZqURY/cq3PoVwN7gGXAjZm5LyKuA2YycxfwQeCjETFL54h96yiLliQtrM5pGTJzN7C7Z9u1XcvfA35ruKVJkpaquBmqklQCw12SWshwl6QWMtwlqYUMd0lqoZjUx9Ej4ijwjSU+fAUj+GqDIbCuwZysdcHJW5t1DaaNdZ2fmVOLNZpYuJ+IiJjJzOlJ19HLugZzstYFJ29t1jWYkuvytIwktZDhLkkt1NRw3znpAuZhXYM5WeuCk7c26xpMsXU18py7JGlhTT1ylyQtoHHhvtjFuke879URcWtE7I+IfRHxR9X2d0TE/0TEndXtiq7HvL2q9UBEvHaEtT0UEfdU+5+ptj0/Ij4fEQ9UP8+utkdEvK+q6+6IuGhENf1MV5/cGRHfjoi3TKK/IuLGiDgSEfd2bRu4fyLiyqr9AxFxZb99DaGud0fE/dW+PxMRZ1Xb10bEd7v67Yaux/x89frPVrWf0GUu56lr4Ndt2L+v89T1ya6aHoqIO6vt4+yv+bJhcu+xzGzMjc5XDj8IvAg4BbgL2DDG/Z8HXFQtnw58jc5Fw98B/HGf9huqGk8F1lW1LxtRbQ8BK3q2vQvYUS3vAN5ZLV8B3ELnClqXALeP6bX7X+D8SfQX8CrgIuDepfYP8HzgYPXz7Gr57BHUdTmwvFp+Z1dda7vb9TzPfwG/UNV8C7BpBHUN9LqN4ve1X1099/81cO0E+mu+bJjYe6xpR+51LtY9Mpn5cGZ+pVp+AthP5/qx89kC3JSZ38/MrwOzdP4N49J94fIPA7/Wtf0j2XEbcFZEnDfiWi4FHszMhSaujay/MvNLPPPqYIP2z2uBz2fmtzLzMeDzwMZh15WZn8vMY9XqbXSufjavqrYzMvPL2UmIj3T9W4ZW1wLme92G/vu6UF3V0ffrgU8s9Bwj6q/5smFi77GmhXu/i3UvFK4jExFrgQuB26tNV1d/Xt14/E8vxltvAp+LiDuic61agHMz82HovPmAcyZQ13Fbefov3aT7Cwbvn0n02+/ROcI7bl1EfDUivhgRr6y2raxqGUddg7xu4+6vVwKPZOYDXdvG3l892TCx91jTwr3WhbhHXkTEacA/AW/JzG8D7wd+CngZ8DCdPw1hvPW+IjMvAjYBb46IVy3Qdqz9GJ3LM24GPlVtOhn6ayHz1THufrsGOAZ8rNr0MLAmMy8E3gp8PCLOGGNdg75u4349t/H0A4ix91efbJi36Tw1DK22poV7nYt1j1REPJvOi/exzPxngMx8JDN/mJk/Aj7Aj08ljK3ezDxc/TwCfKaq4ZHjp1uqn0fGXVdlE/CVzHykqnHi/VUZtH/GVl81kPYrwG9Xpw6oTns8Wi3fQed89gVVXd2nbkZS1xJet3H213LgN4BPdtU71v7qlw1M8D3WtHCvc7HukanO6X0Q2J+Z7+na3n2++teB4yP5u4CtEXFqRKwD1tMZyBl2Xc+LiNOPL9MZkLuXp1+4/Ergs111vaEasb8EePz4n44j8rQjqkn3V5dB+2cPcHlEnF2dkri82jZUEbER+FNgc2Z+p2v7VEQsq5ZfRKd/Dla1PRERl1Tv0Td0/VuGWdegr9s4f18vA+7PzKdOt4yzv+bLBib5HjuREeJJ3OiMMn+Nzv/C14x5379I50+ku4E7q9sVwEeBe6rtu4Dzuh5zTVXrAU5wRH6Bul5E55MIdwH7jvcL8ALgC8AD1c/nV9sDuL6q6x5geoR99lzgUeDMrm1j7y86/7k8DPyAztHRVUvpHzrnwGer2xtHVNcsnfOux99jN1Rtf7N6fe8CvgL8atfzTNMJ2weBv6WaoDjkugZ+3Yb9+9qvrmr7h4Df72k7zv6aLxsm9h5zhqoktVDTTstIkmow3CWphQx3SWohw12SWshwl6QWMtwlqYUMd0lqIcNdklro/wHCVWaoa3zW4wAAAABJRU5ErkJggg==\n", "text/plain": [ "<matplotlib.figure.Figure at 0x7fca763aa940>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.plot(rList)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }