{ "metadata": { "name": "", "signature": "sha256:b15441d26bbc7e5a5a2c28a817f71cf978d25b49a174fb4d4b204b5667169a92" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "from quantecon import MarkovChain\n", "from __future__ import division" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "MDP class file" ] }, { "cell_type": "code", "collapsed": false, "input": [ "class MDP:\n", " \n", " def __init__(self, r, delta, Q):\n", " self.r=np.array(r)\n", " self.delta=delta\n", " self.Q=np.array(Q)\n", " self.n=self.r.shape[0] # number of states\n", " self.m=self.r.shape[1] # number of actions\n", " \n", " def Bellman_Operator(self, w):\n", " w=np.array(w)\n", " objf=self.r+self.delta*self.Q.dot(w)\n", " Bw=np.max(objf, axis=1)\n", " policy=np.argmax(objf, axis=1)\n", " return Bw,policy\n", " \n", " def solve_bellman_equation(self,w0, tol, max_iter):\n", " # Iteration to obtain value function and optimal policy\n", " w_current=np.array(w0)\n", " for t in range(max_iter+1):\n", " w_next, policy=self.Bellman_Operator(w_current)\n", " if np.max(abs(w_current-w_next))\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpolicy_iter\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mP\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkurtzsigma\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpolicy_iteration\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mB\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mM\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mmax_iter\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10000\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpolicy_iter\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m\u001b[0m in \u001b[0;36mpolicy_iteration\u001b[1;34m(self, policy0, max_iter)\u001b[0m\n\u001b[0;32m 42\u001b[0m \u001b[0mpolicy_current\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpolicy0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmax_iter\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 44\u001b[1;33m \u001b[0mpolicy_next\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgreedy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue_of_policy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpolicy_current\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 45\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpolicy_next\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mpolicy_current\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 46\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m==\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m\u001b[0m in \u001b[0;36mgreedy\u001b[1;34m(self, w)\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mgreedy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[0mw\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 36\u001b[1;33m \u001b[0mobjf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdelta\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mQ\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 37\u001b[0m \u001b[0mpolicy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobjf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 38\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mpolicy\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: operands could not be broadcast together with shapes (16,6) (16,6,6) " ] } ], "prompt_number": 71 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 61 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }