{ "metadata": { "name": "predicting_with_trees" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas.rpy.common as com" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "iris = com.load_data('iris')\n", "iris.columns = map(lambda x: x.lower().replace('.', '_'), iris.columns)\n", "iris.columns" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 26, "text": [ "Index([sepal_length, sepal_width, petal_length, petal_width, species], dtype=object)" ] } ], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "iris['species'].value_counts()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 27, "text": [ "setosa 50\n", "versicolor 50\n", "virginica 50" ] } ], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "spec = iris.groupby('species')\n", "cols = ['k', 'r', 'g']\n", "\n", "i = 0\n", "for s, df in spec:\n", " plot(df['petal_width'], df['sepal_width'], 'o', color=cols[i], label=s)\n", " i += 1\n", " \n", "legend()\n", "xlabel('Petal Width')\n", "ylabel('Sepal Width');" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAEKCAYAAAAW8vJGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4E+XaP/Bv2oTWQm2hYS2UJUVsQboAFiq2US4bAevh\naFlaZRE9FqQU1OMGYoMsBxQXClXhHA8urywHlCM0BUGkIJZFD5XDCy8/oVAooAXZly5pc//+qA1M\nmzTJJDNJmvtzXbmuZjLPPM9MJvN05pl7bgURERhjjLE/+Lm7AYwxxjwLdwyMMcYEuGNgjDEmwB0D\nY4wxAe4YGGOMCXDHwBhjTEDyjqG2thZxcXFITU1t9FlhYSFCQkIQFxeHuLg4zJ07V+rmMMYYs0Ep\ndQWLFy9GdHQ0rl27ZvHz5ORkbNiwQepmMMYYs5OkZwynT59GQUEBnnnmGViLo+P4OsYY8yySnjE8\n//zzePvtt3H16lWLnysUChQVFSEmJgbh4eFYtGgRoqOjG83DGGPMcWL/8ZbsjCE/Px/t2rVDXFyc\n1cbFx8ejrKwMBw4cwNSpUzFixAiL8xERv4iQk5Pj9jZ4you3BW8L3hZNv5whWcdQVFSEDRs2oHv3\n7khPT8d3332HcePGCeYJDg5GUFAQAGDo0KEwGo24ePGiVE1ijDFmB8k6hvnz56OsrAwnTpzA6tWr\n8eCDD+Kzzz4TzFNeXm7u2fbt2wciQps2baRqEmOMMTtIfldSvfqxgmXLlgEAMjMzsW7dOnz44YdQ\nKpUICgrC6tWr5WqOV9Jqte5ugsfgbXELb4tbeFu4hoKcvRglMYVC4fT1MsYY8zXOHDtlO2NgdQwG\nA3Jzc1FVVYWAgABkZ2dj+PDh7m4WYza1adMGly5dcnczWAOtW7d2+dgsdwwyMhgMmDZtGkpKSszT\n6v/mzoF5ukuXLvHZuweS4pZ+flaSjHJzcwWdAlDXMSxZssRNLWKMsca4Y5BRVVWVxemVlZUyt4Qx\nxqzjjkFGAQEBFqcHBgbK3BLGGLOOOwYZZWdnQ6PRCKZpNBpMnTrVTS1ijLHGePBZRvUDzEuWLEFl\nZSUCAwMxdepUHnhmzA1KS0vRo0cP1NTUwM+P/0e+HccxMMbs0tRv0RW3Yct9K3d9x2A0GuHv7y9Z\nPVKz9r04dewkD+cFTWTMJ1j7Lebn55NGoyEA5pdGo6H8/Hy7l+2KZSxYsIDCw8MpODiYevXqRdu2\nbSOTyUR/+9vfSKPRUFhYGI0aNYouXrxIRERdunQhhUJBrVq1olatWtGePXvIZDLRnDlzqGvXrtSu\nXTsaN24cXblyhYiIKioq6IknnqCwsDAKDQ2lAQMGUHl5ORER/fOf/6SoqCgKDg6mHj160LJly+xu\nt7OsfS/OHDs9/qjLHQNjnsHabzElJUVwQK9/6XQ6u5ft7DKOHDlCXbp0oV9//ZWIiE6ePEklJSX0\n/vvv06BBg+jMmTNUXV1NmZmZlJ6eTkREpaWlpFAoqLa21rycjz/+mCIjI+nEiRN0/fp1euyxx2js\n2LFERPTRRx9RamoqVVRUkMlkov3799PVq1eJiMhgMNDx48eJiGjHjh0UFBRE+/fvt3v9nSFFx8AX\n1hhjTnHFbdjOLsPf3x9VVVU4dOgQjEYjIiIi0KNHDyxbtgxz585Fp06doFKpkJOTg3Xr1sFkMlm8\nzPLFF1/gxRdfRLdu3dCyZUv87W9/w+rVq1FbW4sWLVrgwoULOHr0KBQKBeLi4hAcHAwAGDZsGLp3\n7w4ASEpKQkpKCr7//nu719/TcMfAGHOKK27DdnYZkZGReP/996HX69G+fXukp6fj7NmzKC0txZ//\n/Ge0bt0arVu3RnR0NJRKJcrLyy0u59dff0XXrl3N7yMiIlBTU4Nz585h7Nix0Ol0GDNmDMLDw/HK\nK6+gpqYGALBp0yYMHDgQYWFhaN26NQoKCnDhwgW719/TcMfAGHOKK27DdsUy0tPT8f333+PkyZNQ\nKBR45ZVXEBERgc2bN+PSpUvm182bN9GxY0eLj5Lo1KkTSktLze9PnToFpVKJ9u3bQ6lU4o033sCh\nQ4dQVFSE/Px8fPbZZ6iqqsLjjz+Ol19+GefOncOlS5cwbNgwr75phm9XZYw5xRW3YTu7jF9++QWn\nT5/Gfffdh4CAAAQGBoKIMGnSJMyYMQOffvopIiIicP78eezevRuPPvoo2rZtCz8/P5SUlKBnz54A\n6jqXhQsXYujQoVCr1ZgxYwbGjBkDPz8/FBYWIiwsDNHR0QgODoZKpYK/vz+qq6tRXV0NtVoNPz8/\nbNq0CVu2bME999zj4Jb0IKJHJ2TiBU1kzCd48m/xv//9L917770UHBxMbdq0odTUVPr111/JZDLR\nu+++S7169aLg4GDSaDQ0c+ZMc7k33niD2rZtS6GhobR3714ymUz05ptvUpcuXaht27Y0duxYunz5\nMhERrVq1inr16kUtW7ak9u3b07Rp08wD13l5edS+fXsKDQ2lsWPHUnp6Os2aNUuWdbf2vTjzfXEc\nA2PMLvxb9ExSxDHwGANjjDEB7hhQF3Gp0+mg1Wqh0+lgMBjc3STGGHMbnx985uQ5jDEm5PNnDJw8\nhzHGhHy+Y+DkOYwxJuTzHQMnz2GMMSGf7xg4eQ5jjAn5/OAzJ89hjDEhDnBjjNmFf4tAcHAwDh48\niG7duoleRrdu3fDxxx9jyJAhLmmTFAFuPn/GwBhj9rp27ZrTy1AoFBYf4OdJmlXHIHdqQMZYnZ0G\nA7bk5kJZVYWagACkZGcjycHfniuW4aza2lqPTfNZU1MDpVKmQ7bopyzJxN4muiI1IGPMOmu/xR35\n+TRDoyECzK8ZGg3tcOC35+wyFixYQGlpaYJp2dnZlJ2dTVeuXKGJEydSx44dKTw8nF5//XXzw+9W\nrFhBiYmJ9Pzzz1NYWBjNmjWLjh49SklJSRQSEkJqtZpGjx5tXqZCoaCSkhIiIrp58ya98MIL1LVr\nVwoJCaHBgwdTRUUFERF9/fXXFB0dTaGhoaTVaun//u//zMvo1q0bbdu2jYiIKisradq0adSpUyfq\n1KkTTZ8+naqqqoiIaPv27RQeHk4LFy6kDh060Lhx4yyuu7XvxZnDe7PpGFyRXpAxZp213+LMlBTB\nAb3+9boDvz1nl3Hy5EkKCgqia9euERFRTU0NdezYkfbu3UsjRoygSZMm0c2bN+ncuXN07733mnMy\nr1ixgpRKJS1dupRqa2upoqKCxowZQ/PnzycioqqqKvrhhx/M9dzeMTz33HP0wAMP0NmzZ6m2tpZ2\n795NVVVV9P/+3/+jli1b0rfffks1NTX01ltvUWRkJBmNRiISdgyzZs2iQYMG0fnz5+n8+fOUmJho\nfirr9u3bSalU0quvvkrV1dXmTqchKTqGZnO7KgeqMeYeSiu/PX8HfnvOLiMiIgLx8fFYv349AOC7\n775Dy5Yt0a1bN2zatAnvvfce7rjjDrRt2xbTp0/H6tWrzWU7deqEKVOmwM/PD4GBgWjRogVKS0tx\n5swZtGjRAomJiY3qM5lMWLFiBRYvXoyOHTvCz88PAwcORIsWLbBmzRo88sgjGDJkCPz9/fHXv/4V\nFRUVKCoqarSclStX4o033oBarYZarUZOTg4+//xz8+d+fn6YPXs2VCqVrLFVzaZj4EA1xtyjxspv\nr9aB354rlpGRkYFVq1YBqDvgZmRk4OTJkzAajejYsaM5veekSZNw/vx5c7kuXboIlvPWW2+BiHDv\nvfeiT58+WLFiRaO6fv/9d1RWVjaKgQLq0oNGRESY3ysUCnTp0gVnzpxpNO/Zs2cbpRI9e/as+X3b\ntm3RokULu7eBqzSbjoED1Rhzj5TsbMxs8NubodHgIQd+e65YRlpaGgoLC3HmzBn8+9//RkZGBjp3\n7oyAgABcuHDBnNrzypUrOHjwoLlcwzuE2rdvj+XLl+PMmTNYtmwZnnvuORw/flwwj1qtRmBgII4d\nO9aoHZ06dcLJkyfN74kIZWVlCA8Ptzhvw1SinTp1sto2uTSbu5I4UI0x96i/c2jWkiXwr6xEbWAg\nHp461aE7ilyxjLZt20Kr1WLChAno0aMHevXqBQBISUnBCy+8gDlz5qBly5Y4ceIEzpw5g6SkJIvL\nWbt2LQYNGoTOnTsjNDQUCoUCfn7C/6H9/PwwceJEvPDCC/j888/Rrl077Nu3D/369cOoUaOwYMEC\nfPfdd7j//vuxePFiBAYGWrwklZ6ejrlz52LAgAEAgDfffBNjx461e50lI3p0QiZe0ETGfII3/BY/\n//xzUigUtGjRIvO0K1eu0OTJk6lz584UEhJCcXFxtGbNGiIi+uSTT+j+++8XLOPll1+m8PBwatWq\nFWk0Gvr73/9u/szPz888+FxRUUHTp0+n8PBwCgkJoeTkZPMA8fr16yk6OppCQkJIq9XS4cOHzcto\neFdSdnY2dezYkTp27EjTpk0T3JXUpUsXm+ts7Xtx5vviyGfGmF34t+iZvDK1Z21tLeLi4pCammrx\n8+zsbPTs2RMxMTEoLi6WujkWcQY3xhi7RfIxhsWLFyM6OtpiKHlBQQGOHTuGo0ePYu/evZg8eTL2\n7NkjdZMEOIMbY4wJSXrGcPr0aRQUFOCZZ56xeEqzYcMGjB8/HgCQkJCAy5cvo7y8XMomNcIZ3Bhj\nTEjSM4bnn38eb7/9Nq5evWrx8zNnzgjuIe7cuTNOnz6N9u3bC+bT6/Xmv7VaLbRarcvayIFxjLHm\noLCwEIWFhS5ZlmQdQ35+Ptq1a4e4uLgmG9vwTMLSfbu3dwyuxoFxjLHmoOE/zbNnzxa9LMkuJRUV\nFWHDhg3o3r070tPT8d1332HcuHGCecLDw1FWVmZ+f/r0aYtBIFLiwDjGGBOS5XbVHTt2YNGiRdi4\ncaNgekFBAZYuXYqCggLs2bMH06dPbzT4LMctcgaDgQPjGLOBb1f1TF6dqKf+EtGyZcsAAJmZmRg2\nbBgKCgoQGRmJli1bWnwmiRyGDx/OHQFjjP2BA9wYY3bx1t/isGHDkJ6eLvpRE46k83RF6k9HSXHG\nwB2DzPR6PZYuXWrOxpSVlSXp4DpjrtLcfovNhVdfSmJ1ncK8efNQU1NjnjZv3jzzZ4x5K8NWA3JX\n5qKKqhCgCEB2RjaGP+TY5VlXLMNRnpzK061EP2VJJl7QRLuFhYVZzDKnVqvd3TTGbLL2W8zfkk+a\nP2kIephfmj9pKH+L/ak9nV1GU6k9tVot/eMf/yAiy6k8L1y4QI888gjdeeedNGDAAJo5cyYNHjzY\nvJzbs7aNHz+ennvuORo+fDgFBwdTQkKC+bOG81pK/VlZWUlERGlpadShQwcKCQmhpKQkOnTokN3b\nqiFr34szx85mk4/BG9x+pnA7o9Eoc0sYc53clbkoiWvw9IC4EixZbf/TA5xdRnp6OgoKCnD9+nUA\ndWcCa9euxRNPPAFAGB+1b98+aDQanDt3DjNmzMBzzz2H4OBglJeX49NPP8Vnn33WZB6ENWvWQK/X\n49KlS4iMjMTMmTMtzvfXv/4VxcXF2L17Ny5evIi3337bvNzhw4fj2LFjOH/+POLj483t9BTcMchI\nqbR85U6lUsncEsZcp4qsPD2g1v6nBzi7DGupPe+9995G896eylOlUuGrr77C7NmzERgYiKioKIwf\nP97qtXmFQoHHHnsM/fv3h7+/P5544gn8/PPPjeZrKvUnAEyYMAEtW7aESqVCTk4ODhw4YPF5cu7C\nHYOMsrKyGnUOSqUSU6ZMcVOLGHNegMLK0wP87X96gCuWYSm1pyW3P4bn/PnzqKmpafRonqbc/sie\nO+64w3yWcrumUn+aTCa8+uqriIyMREhICLp37w6FQoHff/+96RWUEXcMMtLr9Zg5cybUajVCQkKg\nVqsxc+ZMHnhmXi07Ixua4gZPD9ivwdQx9j89wBXLsJTa05LbLxO1bdsWSqVS8ASG2/8Wq6nUn198\n8QU2bNiAbdu24cqVKzhx4gSIyKPu+OK7kmSm1+u5I2DNSv2dQ0tWL0FlbSUC/QMxNWuqQ3cUuWIZ\n1lJ7NsXf3x+PPfYY9Ho9/vGPf+DkyZP4/PPP0bVrV4vz23vwbir15/Xr1xEQEIA2bdrgxo0bmDFj\nht3rKBfuGBhjThv+0HCnby11xTIyMjIwbtw4vP322xY/VygUjQaWly5digkTJqBDhw64++67kZ6e\njp9++klQpqnyDT+vt2jRIrz22msYMGAArl+/jtjYWHzzzTcYN24cvvnmG4SHhyMsLAxvvvmm+YkQ\nnoID3FD3rKTc3FxUVVUhICAA2dnZNh+RITZQTUxdjHkCXwlwe+WVV3Du3Dm3PaLHUVIEuHl8kIDU\nTczPzyeNRiOIK9BoNJSfb/3+6ZycHFIqlYIySqWScnJyXF4XY57CCw4Xohw5coQOHDhAJpOJ9u7d\nS2q1mr7++mt3N8tu1r4XZ74vj/+mpd4ZU1JSLAad6XQ6q2XEBqqJqYsxT9FcO4Yff/yRIiMjKSgo\niLp3704LFixwd5McIkXH4PNjDGIyuIkNVONscYx5nv79++Po0aPuboZH8fnbVcVkcBMbqMbZ4hhj\n3sDnOwYxGdzEBqpxtjjGmDfw+UtJ9XcEOZLBrf7uo7y8PBiNRqhUKkyZMsXmXUli6mKMMbnx7aqM\nMbu0adMGly5dcnczWAOtW7fGxYsXG03nRD2MMcYEnDl2+vwYA1B3aUitViM0NBRqtVrSR1YYDAbo\ndDpotVrodDoYDAbJ6mKMMTF8foxBzqxqBoMB06ZNQ0nJrefO1//N4wyMMU/h85eS1Go1Lly4YHH6\n+fPnXVqXTqfDli1bLE7fvHmzS+tijPk2vpTkBDmzqnGAG2PMG/h8xyBnVjUOcGOMeQOf7xjkzKrG\nAW6MMW/Ag88ig9XE4AA3xpg38PnBZ8YYa4548JkxxpjLNKuOQWzwmJwBbowxJiXDVgN0T+mcWkaz\nGWMQGzwmZ4AbY4xJybDVgGl501ASV2J75iY0mzEGscFjcga4McaYlHRP6bCl2x/HQT14jEFs8Jic\nAW6MMSalKrJ8HHRUs+kYxAaPyRngxhhjUgpQWD4OOqrZdAxig8fkDHBjjDEpZWdkQ1OssT2jDc1m\n8Fls8JicAW6MMSal4Q/9cRxcvQTf4BvRy7E5+Pzll1/i1VdfRXl5uXkgQ6FQ4OrVq6IrdaiBHODG\nGGMOkzSDm0ajQX5+PqKiokRV4CzuGBhjzHGSRj536NBBVKdQWVmJhIQExMbGIjo6Gq+99lqjeQoL\nCxESEoK4uDjExcVh7ty5DtfjChkZGVCpVFAqlVCpVMjIyLBZxtOD4jhTHGNMLKtjDF9++SUAoH//\n/hg9ejRGjBiBFi1aAKjriR577LEmFxwYGIjt27cjKCgINTU1GDx4MHbt2oXBgwcL5ktOTsaGDRuc\nXQ/RMjIysGrVKsG0+vcrV660WMbTg+I4UxxjzBlWLyVNmDABCoUCQF2QRP3f9VasWGF3JTdv3kRy\ncjI+/fRTREdHm6cXFhbinXfewcaNG603UOJLSSqVymIsg0qlQnV1tcUynh4Ux5niGGPOHDutnjF8\n8sknAGDxv/xdu3bZtXCTyYT4+HiUlJRg8uTJgk4BqGt4UVERYmJiEB4ejkWLFjWaBxD+F67VaqHV\nau2q3x7WNpzJZLJaxtOD4jhTHGO+p7CwEIWFha5ZGNkQFxdn17SmXL58mRISEmj79u2C6VevXqUb\nN24QEVFBQQH17NmzUVk7mugUpVJJABq9VCqV1TJhYWEWy6jVaknbaq+UlBSL7dPpdO5uGmNMJs4c\nO60OPu/evRvvvPMOzp07h3fffRfvvPMO3nnnHej1etTW1jrU+YSEhGD48OH46aefBNODg4MRFBQE\nABg6dCiMRiMuXrzo0LKdNXLkSIvT09LSrJbx9KA4zhTHGHOG1UtJ1dXVuHbtGmpra3Ht2jXz9Dvv\nvBPr1q2zueDff/8dSqUSoaGhqKiowNatW5GTkyOYp7y8HO3atYNCocC+fftARGjTpo0Tq+O4+gHm\ndevWwWQywc/PD2lpaVYHngHPD4rjTHGMMWfYjGM4efIkunbt6vCCDx48iPHjx8NkMsFkMmHs2LF4\n6aWXsGzZMgBAZmYm8vLy8OGHH0KpVCIoKAjvvvsuBg4cKGwgxzEwxpjDJAlwS01NtVqBQqGQ7RZT\n7hgYY8xxkgS4vfjii3jxxRfRo0cP3HHHHXj22Wfxl7/8Ba1atUKPHj1EN9YTde3aFQqFwvyy5wyJ\ns8Uxb7TTYMDrOh30Wi1e1+mwkwMfZVGfVU07QQvdUzoYttre7mLKuIyt0en4+Hi7pknFjiY6JSIi\nwuIdPBEREVbL5Ofnk0ajEcyv0WgoPz+/ybpycnIa3QWlVCopJyfHxWvFWGM78vNphkZDBJhfMzQa\n2mFjv2XOyd+ST5o/aQh6mF+aP2kof4v17S6mTEPOHDttPhLj5s2bggja48eP4+bNm67un9zm1KlT\nDk0HgNzcXME2Aeoii5csWdJkXUuXLm0UA1FTU4O8vDw7W8uYeFtyczGvwX47r6QEW23st8w5uStz\nG6XaLIkrwZLV1re7mDKuZPOx2++99x4eeOABdO/eHQBQWlqK5cuXS94wT8bZ4pg3UlrZb/058FFS\n1rKqVdZa3+5iyriSzY7h4Ycfxi+//IIjR45AoVDg7rvvtpotzVdwtjjmjWqs7Le1NvZb5hxrWdUC\n/a1vdzFlXMnqpaRt27YBqHuYXkFBAUpKSnDs2DEYDAZ89dVXsjRODhEREQ5NBzhbHPNOKdnZmNlg\nv52h0eAhDnyUlKWsapr9GkwdY327iynjSlbPGHbu3IkhQ4Zg48aNjR6gB8Dm01W9RX2cxu1jChER\nETh58qTVMpwtjnmjpD/2z1lLlsC/shK1gYF4eOpU83QmjduzqlXWViLQPxBTs6aap7uqjCtZjWMo\nLi5GbGysxU5BThzHwBhjjpMkwK1fv344fvw4+vfvj8TERNx3330YNGgQgoODnWqsww3kjoExxhwm\nSYDbf/7zH5w+fRozZsxAixYtkJubC41Gg5iYGEyePFl0Yz2Rp2c78/T2Me/BAW63uDWATEL6t/RQ\nD1A7txB7gh2uXbtG3377Len1eurRowd169ZNdOCEo+xsomhig9Xk4untY96DA9xucUUAmSfKWZhD\nynuVdevkxLHT6qWkL774AkVFRfj5558REBCAAQMGYODAgRg0aBA6dOjgXG/kAKkvJXl6tjNPbx/z\nHq/rdJhrYV+apdNhjo/tS7qndNjSzcLv6pQOmz/23m2hHqDGhUf+yC6pt56IzBardyVlZmaiV69e\nmDRpEpKSktCrVy9RFXg6T8925untY96DA9xucXcAmVRq/C0H0TrKasdw+fJlHDhwALt378bs2bNx\n5MgRdOzYEYmJiRg0aBAefPBBlzTA3cQGq8nF09vHvAcHuN3i7gAyqShrbcYs28Xq4LNSqUS/fv2Q\nlZWFlStXYtOmTXj44Yfxz3/+Ew899JBLKvcEnp7tzNPbx7wHB7jd4u4AMqlkjcyCssD5zsHqGMOB\nAwdQVFRkflVXVyMxMdH8GjBggNOV29VAGW5XNRgMHp3tzNPbx7zHToMBW28LcHvIhwPcDFsNwgCy\nMfIFkElJ/5Yeeevy8PuPv7s+jiEuLg6DBw82dwRisri5AscxMMaY4yQJcPMU3DEwxpjjJAlw8yV9\n+vQRZHDr06ePu5vEfIyYwDNPD1ZbOF+PHn3V6BYbih591Vg4Xy9ZXZ4erJYxKQOqKBWUfZVQRamQ\nMSnD3U1qkmuGsL1Ynz59cOjQIcG0Q4cOoU+fPvjf//1fN7WK+ZKdBgO+mTZNkERn5h9/W7v+L6aM\nnBbO12PB2nm4/Pit2ycXrJ0HAHhlht6ldRm2GjAtb5ogsU1JXt3fnjBmkDEpA6t+XAWMuTVt1YZV\nwCRg5Ucr3dewJvj8paSmHhLo4ZuGNRNiAs88PVitR181Tjx+ofH0r9QoOXDepXV5erCaKkqFmjGN\n4wtUa1SoPlwtWb3OHDutnjGkpqY2WeGGDRtEVcgYExITeObpwWomP8uBVrUK12cr9PRgNVJZPjib\nlCaZW2I/qx3Diy++KGc7GPNZYgLPPD1Yzc9k+dDiT67PVujpwWoKo+WrEn41njvEa7Vj0Gq1MjbD\nfXr37t1ojKF+OmNySMnOxsySEsF4wQyNBg83EXgmpoycMsdk1Y0xjLh15hC6XolnR7s+W2F2RjZK\n8koEYwya/RpMzfKMbTEyeWTdmMKjt03cAKQlpbmtTbbYHGP45ZdfMGPGDBw6dMj8fB6FQoHjx4/L\n00AZbldtOADdu3dvHnhmshITeObpwWoL5+uxfE0eahVG+JMKz46e4vKB53qeHqyWMSkD63aug0lp\ngl+NH9KS0iQfeJY0juG+++7D7Nmz8cILL2Djxo1YsWIFamtrMWfOHFEVOtxAjmNgjDGHSdoxxMfH\nY//+/bjnnntw8OBBwTQ5cMfAGGOOkzTALTAwELW1tYiMjMTSpUvx1Vdf4caNG6Iqk5rYTGcZGRlQ\nqVRQKpVQqVTIyPDs4BPW/IgJVvtAr8dotRoTQkMxWq3GB3q99A11gKcHncnZPjF1uXX72crks3fv\nXrp69SqdOnWKxo8fT3/+859p9+7dojMDOcqOJhKR+Exn6enpgjL1r/T0dFc0nzGbxGRWy8vJoUyl\nUlAmU6mkvJwc+RreBE/PkCZn+8TU5Yr22XvstMTuALerV68CAO68804Juifr7D0dEpvpTKVSoabG\nQvCJSoXqaumCTxirJyZYbbRajTUXGgeQjVGrsfq8awPIxPD0oDM52yemLle0T9JLST/++CPuuece\n8ysmJgY//fSTqMqkJDbTmbUNZzJ5bvAJa17EBKvdYeGfGQAINLo+gEwMTw86k7N9Yupy9/az+ayk\niRMn4oMPPsD9998PANi1axcmTpyI//73v5I3zhFiM51ZeySGn5/nBp+w5kVMsFqF0vJPt1Ll+gAy\nMTw96EzO9ompy93bz+bRT6lUmjsFABg8eDCUVnZKdxKb6WzkyJEWp6eleW7wCWtexGRWS87KwqQG\nv8NMpRK1fKYzAAAY5klEQVRJU1wfQCaGp2dIk7N9Yupy9/azOcYwffp0VFRUID09HQCwZs0aBAYG\nYuzYsQDqbl2VtIEOXCcTm+ksIyMD69atg8lkgp+fH9LS0rBypWc+9ZA1T2KC1T7Q67EzLw+BRiMq\nVSokTZmC5zzoziRPDzqTs31i6nK2fZLGMWi12iafQLp9+3ZRFduL4xgYY8xxnMGNMcaYgKR3Jf32\n2294+umn8fDDDwMADh8+jI8//tjmgisrK5GQkIDY2FhER0fjtddeszhfdnY2evbsiZiYGBQXFzvY\nfCG9Xg+1Wo3Q0FCo1Wro7TytFhPgJjaYjnkPObOqiQlWeyUjA6kqFcYolUhVqfCKPfutyKApMeuV\n/KdkKO5SQNFHAcVdCiT/KdmuusQQs15is6qJqUv/lh7qAWqEDgyFeoAa+rf0dtXlNrYCHXQ6Ha1e\nvZruueceIiKqrq6m3r172xUkcePGDSIiMhqNlJCQQN9//73gc4PBQEOHDiUioj179lBCQkKjZdjR\nRCIiysnJIaVSKQhSUyqVlGMj4EdMgJvYYDrmPcQEnYkpQyQuWO3l9HR65rb5CaBnAHq5qf1WZNCU\nmPVKejSJEA9BXYgHJT2a1GRdYohZr/TMdIvtS89sOrBVTF05C3NIea9SUEZ5r5JyFuaIXWW72Hvs\ntFjW1gz9+vUjIqLY2FjztJiYGIcquXHjBvXv358OHTokmJ6ZmUmrV682v+/Vqxf99ttvwgbauXJh\nYWEWD/BqtbrJcg07k/qXSqWyWiYlJcViGZ1OZ1dbmeebmZIiOBDWv15v4jsWU4aIaFRYmMVyo5vY\ndx9p0JHUvx5par+dkCI8EP7x0k1sun1i1gs9G9cDPQg9xR+srBGzXsq7lRbLqKKsbz+xdYX1D7NY\nRj2g6WOTs5zpGGzed9qqVStcuC3Ccs+ePQgJCbHrbMRkMiE+Ph4lJSWYPHkyoqOjBZ+fOXMGXbp0\nMb/v3LkzTp8+jfbt2wvmu/2SkFartZgrwlL0MgAYbQT8kIgAN7HBdMx7yJlVTUywWksr+23LpvZb\nkUFTotarhYPTnSBmvcRmVRNTV42/lWOTn2uDEQsLC1FYWOiSZdnsGN555x2kpqbi+PHjSExMxPnz\n57Fu3Tq7Fu7n54eff/4ZV65cgU6nQ2FhYaODesMDs6U7oOwZK7AWW6GyEfAjJsBNbDAd8x5yZlUT\nE6x2w8p+e6Op/VZk0JSo9bL2NBkJnjIjZr3EZlUTU5ey1sqxyeTaYMSG/zTPnj1b9LJsDj7369cP\nO3bswA8//IDly5fj8OHDiImJcaiSkJAQDB8+vNGjNMLDw1FWVmZ+f/r0aYSHhzu07HpZWVmNOgel\nUokpNgJ+xAS4iQ2mY95DTNCZmDKAuGC16JEj8ZcG0/4CILqp/VZk0JSY9UqKSgIapoXf8Md0FxOz\nXiOTR1psn62samLqyhqZBWVBg2OTQYkpaZ4RjGiRtWtMe/fupbNnz5rff/LJJ5SamkpTp06lCxcu\n2LxGdf78ebp06RIREd28eZPuv/9++vbbbwXz3D74vHv3bqcGn4nqBqDVajWFhISQWq22OfBcLz09\nnVQqFfn7+5NKpbLryar5+fmk0+koOTmZdDodDzw3Qzvy8+l1nY5ykpPpdZ3O5iCy2DJEdQPQo9Vq\nGh8SQqPVaruekvpyejo9olLRaH9/ekSlanLguV7+lnzSTdRR8vhk0k3U2f20TjHrlfRoUt1YQ++6\nsQUpBp7riVmv9Mx0UkWpyP8ef1JFqWwOPDtTV87CHFIPUFNIQgipB6glH3gmkujpqnFxcdi2bRva\ntGmDnTt3YvTo0Vi6dCmKi4tx5MgRm5eTDh48iPHjx8NkMsFkMmHs2LF46aWXsGzZMgBAZmYmgLr/\n9Ddv3oyWLVtixYoVjSKpOY6BMcYcJ0mAW0xMDA4cOAAAmDJlCtq2bWu+1n/7Z1KTo2MwGAzIzc1F\nVVUVAgICkJ2dbdejNBhr6M99+qDy0CEEA7gGILB3b6y3I3/4ToMBW3JzoayqQk1AAFKys+3K+exo\nGTnp39Jj6dqlqPGvgbJWiayRWdC/rHd3s3yGU8dOa6cSvXv3purqaiIiuuuuu6iwsND8WXR0tOhT\nFEc10USX4JgE5iojeve2GFswwkbcj5wxE3Jx17377BZnjp1WB5/T09ORnJyMRx99FEFBQeYnrB49\nehShoaHieiEPlJubi5KSEsG0kpISLFmyxE0tYt6q8tAh/L3BtL//Mb0pW3JzMa/BPjivpARbm9gH\nxZSR09K1S1EzTHibZs2wGuSty3NTi5gjrN6uOnPmTDz44IP47bffkJKSYr59k4ia1UGTYxKYqwQ7\nOL2enDETcpHr3n0mjSbjGAYNGtRo2l133SVZY9yBYxKYq1xzcHo9OWMm5CLXvftMGj6fpoxjEpir\nBPbu3Si24Jk/pjdFzpgJuXjlvfvMjB+7DfEJfhhryJm7khxN1COmjJz0b+mRty4PRj8jVCYVpqRN\n4buSZMT5GBhjjAlImo+BMcaYb+GOgTErxCSnEZM8R2xdC+fr0aOvGt1iQ9GjrxoL5+vtqksuYhLa\niE0kJCYRjti6xJZzlFz1WGLz6aqM+aKdBgO+mTZNECsw84+/rV3HfyUjAxdXrcLG26b9ZdUqvAJg\n4cqVLq1r4Xw9Fqydh8uP37otdMHaeXXtmKFvatVkYdhqwLS8aSiJu7VOJXl1f1tLaC+mDFDXKcz7\nch5qHrm1LeZ9WbctrI1piK1LbDlHyVWPNTzGwJgFr+t0mLtlS6Pps3Q6zNm82WKZVJUKGy3kVkhV\nqbCx2vrzpsXU1aOvGicev9B4+ldqlBw4b7Uuueie0mFLt8brpDulw+aPLa+TmDIAoB6gxoVHGm8L\ntUGN8/ssbwuxdYkt5yhX1MNjDIy5mJgAMjHJc8TWZfKzHEBWq/CMADIxCW3EJhISE0wnti6x5Rwl\nVz3WcMfAmAViAsjEJM8RW5efyfJVYH/yjAAyMQltxCYSEhNMJ7YuseUcJVc91nDHwJgFYgLIxCTP\nEVtX5pgshP5beEAMXa/Es6M9I4BMTEIbsYmExATTia1LbDlHyVWPNTzGwJgVYgLIXsnIwOF169DS\nZMINPz9Ep6U1OfDsTF0L5+uxfE0eahVG+JMKz46e4hEDz/UMWw1YsnoJKmsrEegfiKljptocOBVT\nBhAXTCe2LrHlHOVsPRzgxhhjTIAHnxljjLkMxzEwryJn1jI5s6rJtV5ybj/DVgNyV+aiiqoQoAhA\ndka2zUshGZMysHbHWpCKoDAqMDJ5JFZ+ZPtSHHMx0Sl+ZOIFTWQykTNrmZxZ1eRaLzm3X/6WfNL8\nSSPI4Kb5k4byt1ivKz0znRAPQRnEg9Iz013ePl/gzLHT44+63DGwejNTUgQHtfrX6zqdR9Qltn1y\nrZec2y9lQorwAP/HSzfRel3Ku5UWy6iiVC5vny9w5tjJYwzMa8iZtUzOrGpyrZec209MgBapLA+U\nmpRNBwgy1+OOgXkNObOWyZlVTa71knP7iQnQUhgtBwj61fBhSm68xZnXkDNrmZxZ1eRaLzm3n5gA\nrZHJI4ENDSZuANKSmg4QZK7HcQzMq8iZtUzOrGpyrZec209MgFbGpAys27kOJqUJfjV+SEtK47uS\nROIAN8YYYwIc4MYYY8xlOMCNeRU5A7Q+0OuxY+lS3FFTgwqlEslZWXhOr5ekLjHBYMx7eNv3yx0D\n8xpiMp2J9YFej//Om4c1tyXemTRvHj4AXN45uDtbF5OWN36/PMbAvIaYTGdijVarseZC46xgY9Rq\nrD7v2gxpcmUFY+7hru+XxxiYT5AzQOsOCyk6ASDQ6PoMae7O1sWk5Y3fL3cMzGvIGaBVobR8lbVS\n5foMae7O1sWk5Y3fL3cMzGvIGaCVnJWFSQ06h0ylEklTXJ8hzd3Zupi0vPH75TEG5lXkDND6QK/H\nzrw8BBqNqFSpkDRliqR3JcmRFYy5hzu+Xw5wY4wxJsCDz4wxxlxGso6hrKwMDzzwAHr37o0+ffog\nNze30TyFhYUICQlBXFwc4uLiMHfuXKmaw3zYB3o9RqvVmBAaitFqNT6Q6HIQUHep63WdDnqtFq/r\ndNhpMDSLugxbDdA9pYN2gha6p3QwbLVdl1xlnCnHrBCdycGGX3/9lYqLi4mI6Nq1a3TXXXfR4cOH\nBfNs376dUlNTm1yOhE1kPiAvJ4cylUpBYppMpZLycnJcXpenZ5gTS0w2NrnKOFOuuXPm2CnZGUOH\nDh0QGxsLAGjVqhWioqJw9uxZSx2TVE1gDDuWLsVHDWISPqqpwc68PJfXtSU3VxCVDQDzSkqwdckS\nr64rd2WuIGoXAEriSrBktfW65CrjTDlmnSyPxCgtLUVxcTESEhIE0xUKBYqKihATE4Pw8HAsWrQI\n0dHRjcrrbzv112q10Gq1EreYNRdyBqp5eoY5scQEaMlVxplyzU1hYSEKCwtdsizJO4br168jLS0N\nixcvRqtWrQSfxcfHo6ysDEFBQdi0aRNGjBiBX375pdEy9BJeE2bNm5yBap6eYU4sMQFacpVxplxz\n0/Cf5tmzZ4telqR3JRmNRjz++ON48sknMWLEiEafBwcHIygoCAAwdOhQGI1GXLx4UcomMR8jZ6Ca\np2eYE0tMgJZcZZwpx6yTLI6BiDB+/HiEhYXhvffeszhPeXk52rVrB4VCgX379mHUqFEoLS0VNpDj\nGJiT5AxU8/QMc2KJCdCSq4wz5Zozjwxw27VrF5KSktC3b18oFHVJvufPn49Tp04BADIzM5GXl4cP\nP/wQSqUSQUFBePfddzFw4EBhA7ljYIwxh3lkx+Aq3DEwxpjjOPKZ+QwxwWpyBoIx1hxwBjfmNcRk\nVZMz6xtjzQVfSmJeQ0xWNTmzvjHmSfhSEvMJYoLV5AwEY6y54I6BeQ0xwWpyBoIx1lxwx8C8hphg\nNTkDwRhrLniMgXkVMcFqcgaCMeYpOI6BMcaYAA8+M8YYcxnuGEQyGAzQ6XTQarXQ6XQwcNCULDw9\nWE3ObHGMSYUD3EQwGAyYNm0aSm4Lmqr/ezhfu5aMpweriQnAY8wT8RiDCDqdDlssBE3pdDps5qAp\nyXh6sJqYADzGpMJjDDKrshI0VclBU5Ly9GA1ObPFMSYl7hhECLASNBXIQVOS8vRgNTmzxTEmJe4Y\nRMjOzoamQdCURqPBVA6akpSnB6vJmS2OMSnxGINIBoMBS5YsQWVlJQIDAzF16lQeeJaBpweryZkt\njrGmcIAbY4wxAR58Zowx5jLcMTCv4ukBbnJaOF+PHn3V6BYbih591Vg4X+/uJrFmggPcmNfw9AA3\nOS2cr8eCtfNw+fFbt8guWDsPAPDKDL2bWsWaCx5jYF7D0wPc5NSjrxonHm8cTNfjKzVKDnAwHeMx\nBuYjPD3ATU4mP8vBdLUKDqZjzuOOgXkNTw9wk5OfyfJVYH/iYDrmPO4YmNfw9AA3OWWOyULov4Wd\nQ+h6JZ4dzcF0zHk8xsC8iqcHuMlp4Xw9lq/JQ63CCH9S4dnRU3jgmZlxgBtjjDEBHnxmjDHmMtwx\nMMYYE+COgTHGmAB3DIwxxgS4Y2CMMSbAHQNjjDEB7hgYY4wJcMfAGGNMgDsGxhhjAtwxeJHCwkJ3\nN8Fj8La4hbfFLbwtXEOyjqGsrAwPPPAAevfujT59+iA3N9fifNnZ2ejZsydiYmJQXFwsVXOaBd7p\nb8vgNmGCz2dwq8f7xS28LVxDsgxuKpUK7733HmJjY3H9+nX069cPDz30EKKioszzFBQU4NixYzh6\n9Cj27t2LyZMnY8+ePVI1iXm52zO46QHoT5702QxujElJsjOGDh06IDY2FgDQqlUrREVF4ezZs4J5\nNmzYgPHjxwMAEhIScPnyZZSXl0vVJObltuTmCtJ6AsC8khJsXbLETS1irJkiGZw4cYIiIiLo2rVr\ngumPPPII/fDDD+b3Q4YMoZ9++kkwDwB+8Ytf/OKXiJdYkl1Kqnf9+nWkpaVh8eLFaNWqVaPPqcFj\nYRUKRZOfM8YYk5akdyUZjUY8/vjjePLJJzFixIhGn4eHh6OsrMz8/vTp0wgPD5eySYwxxmyQrGMg\nIjz99NOIjo7G9OnTLc7z6KOP4rPPPgMA7NmzB6GhoWjfvr1UTWKMMWYHyTK47dq1C0lJSejbt6/5\n8tD8+fNx6tQpAEBmZiYAICsrC5s3b0bLli2xYsUKxMfHS9Ecxhhj9hI9OuFimzZtol69elFkZCQt\nWLDA4jxTp06lyMhI6tu3L+3fv1/mFsrH1rbYvn073XnnnRQbG0uxsbE0Z84cN7RSek899RS1a9eO\n+vTpY3UeX9knbG0LX9kniIhOnTpFWq2WoqOjqXfv3rR48WKL8/nCvmHPthCzb3hEx1BTU0MajYZO\nnDhB1dXVFBMTQ4cPHxbMYzAYaOjQoUREtGfPHkpISHBHUyVnz7bYvn07paamuqmF8tm5cyft37/f\n6sHQV/YJItvbwlf2CSKiX3/9lYqLi4mI6Nq1a3TXXXf57PHCnm0hZt/wiEdi7Nu3D5GRkejWrRtU\nKhXGjBmDr7/+WjCPr8Q82LMtAN+4W+v+++9H69atrX7uK/sEYHtbAL6xTwAcI3U7e7YF4Pi+4REd\nw5kzZ9ClSxfz+86dO+PMmTM25zl9+rRsbZSLPdtCoVCgqKgIMTExGDZsGA4fPix3Mz2Cr+wT9vDV\nfaK0tBTFxcVISEgQTPfFfcPathCzb0gex2CPhrEL1jTs9ewt503sWaf4+HiUlZUhKCgImzZtwogR\nI/DLL7/I0DrP4wv7hD18cZ9wNkaqOWlqW4jZNzzijKFhPENZWRk6d+7c5DzNNebBnm0RHByMoKAg\nAMDQoUNhNBpx8eJFWdvpCXxln7CHr+0THCN1i61tIWbf8IiOoX///jh69ChKS0tRXV2NNWvW4NFH\nHxXM4ysxD/Zsi/LycvN/Q/v27QMRoU2bNu5orlv5yj5hD1/aJ4hjpMzs2RZi9g2PuJSkVCqxdOlS\n6HQ61NbW4umnn0ZUVBSWLVsGoC7mYdiwYSgoKEBkZKQ55qE5smdbrFu3Dh9++CGUSiWCgoKwevVq\nN7daGunp6dixYwd+//13dOnSBbNnz4bRaATgW/sEYHtb+Mo+AQA//PAD/ud//gd9+/ZFXFwcgMYx\nUr6yb9izLcTsG5IFuDHGGPNOHnEpiTHGmOfgjoExxpgAdwyMMcYEuGNgjDEmwB0Da3b8/f0RFxeH\ne+65B6NGjUJFRYXVeQ8cOIBNmzbZXGZhYSFSU1MbTY+Li8OBAwcAADU1NWjVqhW++OIL8+f9+vVD\ncXExcnJysG3btiaXu2PHDuzevdv82YQJE/Dll1/abBtjrsYdA2t2goKCUFxcjIMHD6JFixb46KOP\nrM5bXFyMgoIC0XUNHjwYRUVFAOo6mV69epnf37hxA8ePH0dsbCxmz56NIUOGNLms7du3m8sCzTtS\nl3k27hhYszZ48GAcO3YMN2/exMSJE5GQkID4+Hhs2LABRqMRb7zxBtasWYO4uDj861//wo8//ojE\nxETEx8fjvvvus/nogMTERPPBfPfu3Zg0aRJ+/vlnAHXBRP3794dCoRD8979582ZERUWhX79+WL9+\nPQDg5MmTWLZsGd577z3Ex8dj165dAICdO3fivvvug0aj4bMHJhvuGFizVVNTg82bN6Nv376YO3cu\nhgwZgr179+K7777DSy+9BKPRiDlz5mDMmDEoLi7GqFGjcPfdd+P777/H/v37MXv2bMyYMaPJOm7v\nGIqKipCUlISAgABcv34dRUVFSExMBFD3379CoUBlZSWeffZZ5Ofn4z//+Q9+++03KBQKdO3aFZMm\nTcILL7yA/fv3Y/DgwSAi/Pbbb/jhhx+Qn5+PV199VfJtxhjgIZHPjLlSRUWFOQo0KSkJEydOxKBB\ng7Bx40YsWrQIAFBVVYVTp06B6nKSmMtevnwZ48aNw7Fjx6BQKMzRxdZ07doV1dXVKC8vx5EjR9Cr\nVy8MGDAAe/fuxe7du5GdnW2el4hw5MgRdO/eHRqNBgDw5JNPYvny5YJ56ikUCvOzb6KioprlY6OZ\nZ+KOgTU7d9xxB4qLixtN/+qrr9CzZ0/BtL179wrez5o1C0OGDMH69etx8uRJaLVam/UlJibiX//6\nFzp27AgAGDhwIHbt2oV9+/Zh0KBBgnkbjhvYevBAixYt7J6XMVfhS0nMJ+h0OuTm5prf13ccwcHB\nuHbtmnn61atX0alTJwCw+/k6iYmJeP/9982XjQYNGoTPPvsMHTt2RHBwsHk+hUKBu+++G6WlpTh+\n/DgAYNWqVebPG7aFMXfhjoE1O5bu5pk1axaMRiP69u2LPn36ICcnBwDwwAMP4PDhw+bB55dffhmv\nvfYa4uPjUVtbK1iWtbuEEhMTUVpaaj476NChA0wmk7mjuF1AQACWL1+O4cOHo1+/fmjfvr15uamp\nqVi/fr1g8Nme+hlzNX6IHmOMMQE+Y2CMMSbAHQNjjDEB7hgYY4wJcMfAGGNMgDsGxhhjAtwxMMYY\nE/j/auRZlXr4uqMAAAAASUVORK5CYII=\n" } ], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "import patsy as pt\n", "from sklearn import tree\n", "\n", "y, X = pt.dmatrices('species ~ sepal_width + petal_width - 1', iris)\n", "\n", "clf = tree.DecisionTreeClassifier(max_depth=3).fit(X, y)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 258 }, { "cell_type": "code", "collapsed": false, "input": [ "import StringIO, pydot\n", "from IPython.core.display import HTML\n", "\n", "dot_data = StringIO.StringIO()\n", "tree.export_graphviz(clf, out_file=dot_data)\n", "graph = pydot.graph_from_dot_data(dot_data.getvalue())\n", "graph.write_png('tree.png')\n", "HTML('')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "" ], "output_type": "pyout", "prompt_number": 259, "text": [ "" ] } ], "prompt_number": 259 }, { "cell_type": "code", "collapsed": false, "input": [ "clf.tree_.threshold" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 260, "text": [ "array([ 8.00000012e-001, -2.32035018e+077, 1.75000000e+000,\n", " 1.34999996e+000, 2.23443806e-314, 2.28130340e-314,\n", " 1.84999996e+000, 6.93069750e-310, 2.28041841e-314])" ] } ], "prompt_number": 260 }, { "cell_type": "code", "collapsed": false, "input": [ "%load_ext rmagic" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 261 }, { "cell_type": "code", "collapsed": false, "input": [ "%%R -o rnewdata\n", "set.seed(32313)\n", "rnewdata <- data.frame(sepal_width = runif(20,2,4.5),petal_width = runif(20,0,2.5))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 262 }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "\n", "newdata = pd.DataFrame(rnewdata.T, columns=['sepal_width', 'petal_width'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 263 }, { "cell_type": "code", "collapsed": false, "input": [ "pred1 = clf.predict_proba(newdata)\n", "\n", "species = iris['species'].unique()\n", "idx = ['0', '1']\n", "cols = [x + '_' + y for x in species for y in idx]\n", "\n", "pd.DataFrame(np.hstack(pred1), columns=cols)\n", "\n", "# I don't understand what these probabilities mean" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
setosa_0setosa_1versicolor_0versicolor_1virginica_0virginica_1
0 1 0 1.000000 0.000000 0.000000 1.000000
1 0 1 1.000000 0.000000 1.000000 0.000000
2 0 1 1.000000 0.000000 1.000000 0.000000
3 1 0 0.000000 1.000000 1.000000 0.000000
4 1 0 0.000000 1.000000 1.000000 0.000000
5 0 1 1.000000 0.000000 1.000000 0.000000
6 0 1 1.000000 0.000000 1.000000 0.000000
7 1 0 1.000000 0.000000 0.000000 1.000000
8 1 0 1.000000 0.000000 0.000000 1.000000
9 0 1 1.000000 0.000000 1.000000 0.000000
10 0 1 1.000000 0.000000 1.000000 0.000000
11 1 0 0.000000 1.000000 1.000000 0.000000
12 1 0 1.000000 0.000000 0.000000 1.000000
13 1 0 0.192308 0.807692 0.807692 0.192308
14 1 0 1.000000 0.000000 0.000000 1.000000
15 1 0 1.000000 0.000000 0.000000 1.000000
16 1 0 0.000000 1.000000 1.000000 0.000000
17 1 0 0.192308 0.807692 0.807692 0.192308
18 1 0 0.192308 0.807692 0.807692 0.192308
19 1 0 0.000000 1.000000 1.000000 0.000000
\n", "
" ], "output_type": "pyout", "prompt_number": 264, "text": [ " setosa_0 setosa_1 versicolor_0 versicolor_1 virginica_0 virginica_1\n", "0 1 0 1.000000 0.000000 0.000000 1.000000\n", "1 0 1 1.000000 0.000000 1.000000 0.000000\n", "2 0 1 1.000000 0.000000 1.000000 0.000000\n", "3 1 0 0.000000 1.000000 1.000000 0.000000\n", "4 1 0 0.000000 1.000000 1.000000 0.000000\n", "5 0 1 1.000000 0.000000 1.000000 0.000000\n", "6 0 1 1.000000 0.000000 1.000000 0.000000\n", "7 1 0 1.000000 0.000000 0.000000 1.000000\n", "8 1 0 1.000000 0.000000 0.000000 1.000000\n", "9 0 1 1.000000 0.000000 1.000000 0.000000\n", "10 0 1 1.000000 0.000000 1.000000 0.000000\n", "11 1 0 0.000000 1.000000 1.000000 0.000000\n", "12 1 0 1.000000 0.000000 0.000000 1.000000\n", "13 1 0 0.192308 0.807692 0.807692 0.192308\n", "14 1 0 1.000000 0.000000 0.000000 1.000000\n", "15 1 0 1.000000 0.000000 0.000000 1.000000\n", "16 1 0 0.000000 1.000000 1.000000 0.000000\n", "17 1 0 0.192308 0.807692 0.807692 0.192308\n", "18 1 0 0.192308 0.807692 0.807692 0.192308\n", "19 1 0 0.000000 1.000000 1.000000 0.000000" ] } ], "prompt_number": 264 }, { "cell_type": "code", "collapsed": false, "input": [ "Cars93 = com.load_data('Cars93', package='MASS')\n", "Cars93.columns = map(lambda x: x.lower().replace('.', '_'), Cars93.columns)\n", "Cars93.ix[:6, :15]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
manufacturermodeltypemin_pricepricemax_pricempg_citympg_highwayairbagsdrivetraincylindersenginesizehorsepowerrpmrev_per_mile
1 Acura Integra Small 12.9 15.9 18.8 25 31 None Front 4 1.8 140 6300 2890
2 Acura Legend Midsize 29.2 33.9 38.7 18 25 Driver & Passenger Front 6 3.2 200 5500 2335
3 Audi 90 Compact 25.9 29.1 32.3 20 26 Driver only Front 6 2.8 172 5500 2280
4 Audi 100 Midsize 30.8 37.7 44.6 19 26 Driver & Passenger Front 6 2.8 172 5500 2535
5 BMW 535i Midsize 23.7 30.0 36.2 22 30 Driver only Rear 4 3.5 208 5700 2545
6 Buick Century Midsize 14.2 15.7 17.3 22 31 Driver only Front 4 2.2 110 5200 2565
\n", "
" ], "output_type": "pyout", "prompt_number": 276, "text": [ " manufacturer model type min_price price max_price mpg_city mpg_highway \\\n", "1 Acura Integra Small 12.9 15.9 18.8 25 31 \n", "2 Acura Legend Midsize 29.2 33.9 38.7 18 25 \n", "3 Audi 90 Compact 25.9 29.1 32.3 20 26 \n", "4 Audi 100 Midsize 30.8 37.7 44.6 19 26 \n", "5 BMW 535i Midsize 23.7 30.0 36.2 22 30 \n", "6 Buick Century Midsize 14.2 15.7 17.3 22 31 \n", "\n", " airbags drivetrain cylinders enginesize horsepower rpm \\\n", "1 None Front 4 1.8 140 6300 \n", "2 Driver & Passenger Front 6 3.2 200 5500 \n", "3 Driver only Front 6 2.8 172 5500 \n", "4 Driver & Passenger Front 6 2.8 172 5500 \n", "5 Driver only Rear 4 3.5 208 5700 \n", "6 Driver only Front 4 2.2 110 5200 \n", "\n", " rev_per_mile \n", "1 2890 \n", "2 2335 \n", "3 2280 \n", "4 2535 \n", "5 2545 \n", "6 2565 " ] } ], "prompt_number": 276 }, { "cell_type": "code", "collapsed": false, "input": [ "y, X = pt.dmatrices('drivetrain ~ mpg_city + mpg_highway + airbags + \\\n", " enginesize + width + length + weight + price + \\\n", " cylinders + horsepower + wheelbase - 1', Cars93)\n", "\n", "clf = tree.DecisionTreeClassifier().fit(X, y)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 277 }, { "cell_type": "code", "collapsed": false, "input": [ "dot_data = StringIO.StringIO()\n", "tree.export_graphviz(clf, out_file=dot_data)\n", "graph = pydot.graph_from_dot_data(dot_data.getvalue())\n", "graph.write_png('tree2.png')\n", "HTML('')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "" ], "output_type": "pyout", "prompt_number": 280, "text": [ "" ] } ], "prompt_number": 280 }, { "cell_type": "code", "collapsed": false, "input": [ "# pruning not currently supported" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }