{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Case study\n", "Given a user’s past reviews on Yelp (available from yelp-challenge dataset),\n", "\n", "When the user writes a review for a business she hasn't reviewed before,\n", "\n", "How likely will it be a Five-Star review?\n", "\n", "- Load data\n", "- Visualize the data\n", "- Featurize the data\n", "- Join tables to populate the features\n", "- Model the data: Logistic regression\n", "- Evaluate the model\n", "- Make prediction with the model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Load data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "PATH = '/scratch/xun/docs/yelp_dataset_challenge_academic_dataset/'\n", "biz_df = pd.read_csv(PATH + 'yelp_academic_dataset_business.csv')\n", "user_df = pd.read_csv(PATH + 'yelp_academic_dataset_user.csv')\n", "review_df = pd.read_csv(PATH + 'yelp_academic_dataset_review.csv')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "review_df = review_df.set_index('review_id')\n", "user_df = user_df.set_index('user_id')\n", "biz_df = biz_df.set_index('business_id')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualize the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example: Plot distribution of review star ratings" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import seaborn as sns\n", "%matplotlib inline\n", "\n", "# Set context to \"talk\" for figure aesthetics \n", "sns.set_context(context=\"talk\")\n", "# set plot figure size to larger\n", "sns.set(palette='Set2', rc={\"figure.figsize\": (15, 8)}, style=\"ticks\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA5UAAAH3CAYAAAArC3urAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X+UlnWd//HXzTATIoKAMyjiDwqRreMRjRAUhwjKsp8i\nCoi4a67taoHH8KCYEFkk/j4rpKWsRRtiCln+SLeExF/86EQkuWuaRQIqzCASCgjCfP/YdTa+auQV\nzD0Oj8c5c87MdV/3db8/nuscec513feUGhoaGgIAAAAFtCr3AAAAALx7iUoAAAAKE5UAAAAUJioB\nAAAoTFQCAABQmKgEAACgsD0alU899VSGDBmSWbNmJUleeOGFjBo1KqNGjcrYsWOzdevWJMndd9+d\noUOHZtiwYZkzZ06SZNu2bRk3blxGjhyZM844IytXrmw85umnn56RI0dm8uTJja81Y8aMnHrqqRk2\nbFgWLFiQJNm4cWP++Z//OaNGjco555yTDRs27MnlAgAA7HX2WFRu3rw5V155ZQYMGNC47YYbbsjZ\nZ5+dWbNmpUePHpk7d242bdqUG2+8Mbfddltmz56dmTNnZsOGDbn33nvTuXPnzJ49O2PHjs11112X\nJJkyZUouv/zyzJ49O5s3b87DDz+clStX5oEHHsicOXNy6623ZurUqdmxY0dmzpyZE088MbNmzcrJ\nJ5+cW265ZU8tFwAAYK+0x6Kyqqoq3/nOd3LAAQc0bluyZEkGDhyYJKmtrc3ChQvzxBNP5Kijjkqb\nNm1SWVmZY445JkuXLs2iRYsyaNCgJEnfvn2zdOnSbNu2LatXr06vXr12OsaSJUsyYMCAlEqltG/f\nPl27ds3vf//7LFq0KB/5yEca93388cf31HIBAAD2SnssKisqKlJVVbXTts2bN6eysjJJ0qlTp6xd\nuzb19fXp1KlT4z6dO3dOXV1d6uvr07Fjx/8ZslWrlEql1NfXp0OHDo377uoYdXV1jcfo1KlT6urq\n3vE6Xn/99axatSqvv/76O34uAABAS9e6XC/c0NCwW7b/rfv+Lc+fNm1apk+f/paPzZs3L926dfub\nZwAAANgbNOmnv7Zt27bxw3nq6upSU1OTmpqa1NfXN+7zl9vXrVuXJNm+fXsaGhpSXV2904ftrFmz\n5k37vt2x39j214wZMya/+93vdvqaN2/ebls/AABAS7PHo/IvrxAef/zxmT9/fpJk/vz5qa2tzdFH\nH50nn3wymzZtymuvvZZly5alT58+OeGEExqD7tFHH02/fv3SunXrdO/ePcuXL0+SPPTQQ6mtrU2/\nfv3yyCOPZMeOHXnppZeydu3a9OjRIwMGDGg8xhuvBwAAwO5Tangn95W+A8uWLcvEiROzbt26VFRU\nZP/998+MGTMyfvz4bNu2LQcffHCmTp2aioqK/Od//mfjJ7P+0z/9Uz71qU9lx44d+cpXvpIVK1ak\nsrIyV199dbp06ZJnn302l112WRoaGtK7d+9ccsklSZIf/OAH+clPfpIkGTduXPr165dNmzZl3Lhx\n+fOf/5z99tsv11xzTdq1a/eO1rFq1aoMHjzY7a8AAABvYY9FZUshKgEAAN5ek76nEgAAgJZFVAIA\nAFCYqAQAAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAIAmVFdXl8suu6zcY+w2rcs9AAAAwN7kxhtv\nzD333JN77703y5YtS5KcddZZufDCC/PVr341J5xwQtatW5f3vve9+dd//dfcdddd+fnPf56qqqq8\n//3vzxe+8IUyr2BnohIAAKAJfeITn8i2bduycuXK1NfXZ8eOHWnbtm1qamry6quv5uKLL06SfOpT\nn8rnP//5zJgxI/fcc09atWqV0aNH53Of+1xqamrKvIr/IyoBAADK4JRTTsk999yTqqqqnHLKKUmS\nbt26NT6+//7756WXXsqGDRvyla98JUnSunXr1NfXi0oAAIC9ValUSpJ8/OMfz7/8y79kn332ybRp\n07JmzZqsXr06SdLQ0JD6+vp07tw51dXVueKKK5Ikf/rTn3LooYeWbfa3IioBAACa0KGHHpqlS5fm\npz/9abp3757KyspUVlYmSfbdd99MmTIlq1atyrBhw1JZWZkzzzwzY8aMyT777JN99tknX/va18q8\ngp2JSgAAgCbUpUuX/PSnP02S/PKXv8yoUaMaH+vUqVPjra5vOPXUU3Pqqac26YzvhD8pAgAA0MS2\nbNmS8847L507d84RRxzRuP2NW2PfTUoNDQ0N5R6iOVu1alUGDx6cefPm7fSmWQAAAFypBAAA4O8g\nKgEAAChMVAIAAFCYT38FAAB4G1u3bs2KFSt26zEPP/zwVFVV7dZjlpOoBAAAeBsrVqzIhDtvSYcu\n1bvleBvW1OWK085Nz549d8vx3nDXXXdlv/32y5AhQ3brcf8WohIAAOCv6NClOh27HVjuMf6qU045\npWyv7T2VAAAAzciPfvSjXHDBBTnzzDMzffr0DB8+PKeeemq+/e1vZ/v27fnIRz6SrVu3JkmWLFmS\nMWPGZPr06Zk1a1aSZOrUqRk5cmSGDh2auXPn5rnnnsu5556bJFm6dGk+9KEPJUlef/31fPrTn/67\n5xWVAAAAzUx9fX2uuOKK/OpXv8oPf/jDzJkzJ7/4xS/y/PPPp3///lm4cGGSZN68efn4xz/e+LyF\nCxdmw4YNmT17dm6//fbcfPPNqa6uzosvvpjkf6Ly/e9/f55++un893//d4466qi/e1a3vwIAADQj\npVIpH/jAB7J8+fL88Y9/zOjRo5MkmzdvzgsvvJCPfexjmT9/fgYOHJjHHnssY8eOzR//+MckyW9/\n+9ssXbq08TmtW7fO2rVr07Nnz/zxj3/M8uXLc8YZZ2TZsmXZsmVLjjvuuL97XlEJAADQzFRUVCRJ\nPvzhD2fy5Mk7PbZ169ZcddVVefrpp3PIIYdk33333enxYcOGNd7u+objjjuuMST79u2bq666Kps3\nb86ECRP+7lnd/goAAPBXbFhTl/WrXtwtXxvW1P3Nr3vUUUdl4cKF2bp1axoaGnLVVVdly5Ytqaqq\nSq9evfLv//7vO936+sZzHn744ST/E59Tp05NknzoQx/KT37ykxx22GHp2LFj1q9fn/Xr16dLly5/\n938fVyoBAADexuGHH54rTjt31zu+w2PuSqlUyiGHHJIRI0ZkxIgRSZIhQ4akTZs2SZKPfvSjmTBh\nQiZOnLjT8/r165cFCxZk2LBh2b59e0aNGpUk6d69e5599tmcfvrpSZIOHTqkpqZmt6yn1NDQ0LBb\njtRCrVq1KoMHD868efPSrVu3co8DAADQrLj9FQAAgMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAAChM\nVAIAAFCYqAQAAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAAAKa13uAQAAgN1j69atWbFiRbnHoBk6\n/PDDU1VVtUeOLSoBAKCFWLFiRSbceUs6dKku9yg0IxvW1OWK085Nz54998jxRSUAALQgHbpUp2O3\nA8s9BnsR76kEAACgMFEJAABAYaISAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKExUAgAAUJio\nBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAAChMVAIAAFCYqAQAAKAwUQkA\nAEBhohIAAIDCRCUAAACFiUoAAAAKE5UAAAAUJioBAAAoTFQCAABQmKgEAACgMFEJAABAYaISAACA\nwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKExUAgAAUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJ\nSgAAAAoTlQAAABTWuqlfcOPGjRk/fnw2btyY1157Leedd166du2aiRMnpnXr1jnyyCMzefLkJMmM\nGTNy//33p1QqZcyYMRk4cGA2btyYCy+8MJs3b06bNm1y3XXXpUOHDnn88cdz7bXXpqKiIh/+8Idz\n/vnnJ0m++c1vZtmyZUmSiRMn5qijjmrqJQMAALRYTR6Vc+fOTa9evXLBBRdk3bp1GTFiRA466KB8\n/etfT69evXLxxRfn4YcfTvfu3fPAAw9kzpw52bhxY4YPH54TTzwxM2fOzIknnph//Md/zNy5c3PL\nLbfkoosuypQpUzJz5swccMABGT16dE466aSsW7cuq1evzh133JHnnnsu48ePz+23397USwYAAGix\nmvz21+rq6qxfvz5JsmHDhnTu3DmrV69Or169kiS1tbVZuHBhlixZkgEDBqRUKqV9+/bp2rVrfv/7\n32fRokX5yEc+0rjv448/npUrV6ZDhw454IADkiQnnnhiFi5cmEWLFmXQoEFJkkMPPTQbNmzIq6++\n2tRLBgAAaLGaPCo/+clP5k9/+lM+/elPZ/To0Rk/fnzat2/f+HinTp2ydu3a1NfXp1OnTo3bO3fu\nnLq6utTV1aVjx46N+9bV1b1p37/c/sa+f7kdAACA3aPJb3/90Y9+lO7du+e73/1unn322Zx77rnZ\nf//9d/m8hoaGv2nbrraXSqV3NjAAAABvq8mj8te//nUGDhyYJHnf+96Xbdu27XT1cM2aNampqUlN\nTU1WrFjRuL2urq5xe319fdq1a/embW9Yu3ZtampqUllZmXXr1jVur6+vT3V19dvONm3atEyfPn03\nrhYAAKBla/LbXw855JAsX748yf8EZGVlZXr27Nm47aGHHkptbW369euXRx55JDt27MhLL72UtWvX\npkePHhkwYEDmzZuXJJk/f35qa2tz8MEH59VXX82aNWuyY8eOPPLIIxkwYEBOOOGExn2feeaZ1NTU\npG3btm8725gxY/K73/1up683ng8AAMCbNfmVyjPOOCMXX3xxRo0alW3btuXyyy/PQQcdlMsuuywN\nDQ3p3bt3+vfvnyQZOnRohg8fniSZNGlSSqVSRo8enXHjxmX+/PnZb7/9cs011yRJvvrVr+ZLX/pS\nkuTkk0/OYYcdlsMOOyw9e/bM8OHDUyqV8rWvfa2plwsAANCilRre7g2IJElWrVqVwYMHZ968eenW\nrVu5xwEAgLf19NNPZ+rDP07HbgeWexSakfWrXswltZ9Lz54998jxm/z2VwAAAFoOUQkAAEBhohIA\nAIDCRCUAAACFiUoAAAAKE5UAAAAUJioBAAAoTFQCAABQmKgEAACgMFEJAABAYaISAACAwkQlAAAA\nhYlKAAAAChOVAAAAFCYqAQAAKExUAgAAUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoT\nlQAAABQmKgEAAChMVAIAAFCYqAQAAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAAAKE5UAAAAUJioB\nAAAoTFQCAABQmKgEAACgMFEJAABAYaISAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKExUAgAA\nUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAAChMVAIAAFCYqAQAAKAw\nUQkAAEBhohIAAIDCRCUAAACFiUoAAAAKE5UAAAAUJioBAAAoTFQCAABQmKgEAACgMFEJAABAYaIS\nAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKExUAgAAUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAA\nAIWJSgAAAAoTlQAAABQmKgEAAChMVAIAAFCYqAQAAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAAAK\nE5UAAAAUJioBAAAoTFQCAABQmKgEAACgsNbleNG77747M2fOzOuvv54LLrgg//AP/5CLLrooSdK5\nc+dcc801qaqqyt13353vfe97adWqVUaMGJFhw4Zl27ZtueSSS/L888+nVCrlyiuvzCGHHJKnnnoq\nkyZNSkVFRY488shMnjw5STJjxozcf//9KZVKGTNmTAYOHFiOJQMAALRITX6lcv369fn2t7+d2267\nLTNmzMi8efNyww035Oyzz86sWbPSo0ePzJ07N5s2bcqNN96Y2267LbNnz87MmTOzYcOG3Hvvvenc\nuXNmz56dsWPH5rrrrkuSTJkyJZdffnlmz56dzZs35+GHH87KlSvzwAMPZM6cObn11lszderUNDQ0\nNPWSAQAAWqwmj8qFCxfm+OOPz3ve855UV1dnypQpWbJkSeMVxNra2ixcuDBPPPFEjjrqqLRp0yaV\nlZU55phjsnTp0ixatCiDBg1KkvTt2zdLly7Ntm3bsnr16vTq1WunYyxZsiQDBgxIqVRK+/bt07Vr\n1zzzzDNNvWQAAIAWq8mjcvXq1Xnttddy/vnnZ8SIEXn44YezefPmVFZWJkk6deqUtWvXpr6+Pp06\ndWp8XufOnVNXV5f6+vp07Njxf4Zv1SqlUin19fXp0KFD4767OgYAAAC7R5O/p3Lbtm15+eWX861v\nfSsvvPBCRowYke3btzc+/na3p77T7W+3b6lUetvHp02blunTp//NxwMAANjbNXlUVldX5z3veU9K\npVK6du2a/fffP5s2bcrWrVtTVVWVurq61NTUpKamJvX19Y3Pq6urS+/evVNTU5N169YlSbZv356G\nhoZUV1dnw4YNjfuuWbOm8RgrVqzY6Rg1NTVvO9uYMWMyZsyYnbatWrUqgwcP3k2rBwAAaFma/PbX\nfv365fHHH0+SbNiwIX/+85/Tt2/fzJ8/P0kyf/781NbW5uijj86TTz6ZTZs25bXXXsuyZcvSp0+f\nnHDCCZk3b16S5NFHH02/fv3SunXrdO/ePcuXL0+SPPTQQ6mtrU2/fv3yyCOPZMeOHXnppZeydu3a\n9OjRo6mXDAAA0GI1+ZXKQw45JLW1tRkxYkS2bt2aCRMm5Nhjj81FF12U73//+zn44IMzbty4VFRU\n5MILL8xZZ52VJDnvvPPSrl27nHzyyXnssccycuTIVFZW5uqrr06SXHrppbnsssvS0NCQ3r17p3//\n/kmSoUOHZvjw4UmSSZMmNfVyAYAWZuvWrTvdCQVvOPzww1NVVVXuMaDJlRr8jY2/6o3bX+fNm5du\n3bqVexwAoMyefvrpTLjzlnToUl3uUWhGNqypyxWnnZuePXuWdY6nn346Ux/+cTp2O7Csc9C8rF/1\nYi6p/dweOz+b/EolAMC7XYcu1f7RDvC/mvw9lQAAALQcohIAAIDCRCUAAACFiUoAAAAKE5UAAAAU\nJioBAAAoTFQCAABQmKgEAACgMFEJAABAYaISAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKExU\nAgAAUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAAChMVAIAAFCYqAQA\nAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAAAK22VUXnLJJW/adu655+6RYQAAAHh3af12D9x99925\n/fbb8/TTT+eMM85o3P7666+nvr6+SYYDAACgeXvbqPzMZz6T4447LuPGjcvYsWPT0NCQJGnVqlWO\nOOKIJhsQAACA5uttozJJunTpkh/84AdZv359XnnllcbtGzduzP7777/HhwMAAKB5+6tRmSQTJ07M\nfffd96aInD9//h4bCgAAgHeHXUblr3/96yxatChVVVVNMQ8AAADvIrv89Nfu3bsLSgAAAN7SLq9U\nHnDAARk9enSOPfbYlEqlJEmpVMoFF1ywx4cDAACgedtlVHbq1Cl9+/bdadsbcQkAAMDebZdRef75\n5zfFHAAAALwL7TIqP/CBD+z0c6lUSvv27bN48eI9NhQAAADvDruMyqeeeqrx++3bt2fx4sV54okn\n9uhQAAAAvDvs8tNf/1JFRUWOP/74LFq0aE/NAwAAwLvILq9U3nnnnTv9vHbt2qxevXqPDQQAAMC7\nxy6j8le/+tVOn/batm3bXH/99Xt0KAAAAN4ddhmVU6dOTZKsWbMmpVIpNTU1e3woAAAA3h12GZUL\nFy7MxRdfnB07diRJqqqqctVVV6VPnz57fDgAAACat11G5XXXXZeZM2eme/fuSZJnnnkml1566Zve\nawkAAMDeZ5ef/tqmTZvGoEySI444Im3atNmjQwEAAPDusMuo3LZtW+bPn58tW7Zk8+bNefDBBxtv\nhQUAAGDvtsvbX7/+9a/ny1/+ci688MKUSqX07t07X//615tiNgAAAJq5XV6pfPTRR9OtW7f85je/\nybJly/L666/nF7/4RVPMBgAAQDO3y6i85557Mm3atMafv/vd7+a+++7bo0MBAADw7rDLqGzdunVa\nt/6/u2QrKipSKpX26FAAAAC8O+zyPZUDBgzIqFGjcuyxx2b79u1ZvHhxBgwY0BSzAQAA0MztMirH\njh2bxYsX5ze/+U1KpVIuvvji9O3btylmAwAAoJnbZVQmyXHHHZfjjjtuT88CAADAu8wu31MJAAAA\nb0dUAgAAUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAAChMVAIAAFCY\nqAQAAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAAAKE5UAAAAUJioBAAAoTFQCAABQmKgEAACgMFEJ\nAABAYaISAACAwkQlAAAAhYlKAAAAChOVAAAAFFaWqNyyZUuGDBmSu+66Ky+88EJGjRqVUaNGZezY\nsdm6dWuS5O67787QoUMzbNiwzJkzJ0mybdu2jBs3LiNHjswZZ5yRlStXJkmeeuqpnH766Rk5cmQm\nT57c+DozZszIqaeemmHDhmXBggVNvk4AAICWrixRedNNN6Vjx45JkhtuuCFnn312Zs2alR49emTu\n3LnZtGlTbrzxxtx2222ZPXt2Zs6cmQ0bNuTee+9N586dM3v27IwdOzbXXXddkmTKlCm5/PLLM3v2\n7GzevDkPP/xwVq5cmQceeCBz5szJrbfemqlTp6ahoaEcywUAAGixmjwqn3322fzhD3/IwIEDkyRL\nlixp/L62tjYLFy7ME088kaOOOipt2rRJZWVljjnmmCxdujSLFi3KoEGDkiR9+/bN0qVLs23btqxe\nvTq9evXa6RhLlizJgAEDUiqV0r59+3Tt2jXPPPNMUy8XAACgRWvyqLz66qszYcKExp83b96cysrK\nJEmnTp2ydu3a1NfXp1OnTo37dO7cOXV1damvr2+8wtmqVauUSqXU19enQ4cOjfvu6hgAAADsPq2b\n8sV+/OMfp0+fPunatWuSvOl21Le7PfWdbn+7fUul0l/dZ9q0aZk+ffrffEwAAIC9XZNG5YIFC7Jy\n5cr8/Oc/z4svvpiqqqrsu+++2bp1a6qqqlJXV5eamprU1NSkvr6+8Xl1dXXp3bt3ampqsm7duiTJ\n9u3b09DQkOrq6mzYsKFx3zVr1jQeY8WKFTsdo6am5q/ON2bMmIwZM2anbatWrcrgwYN3w+oBAABa\nnia9/fX666/PnDlz8sMf/jCnnXZazj///PTv3z/z589PksyfPz+1tbU5+uij8+STT2bTpk157bXX\nsmzZsvTp0ycnnHBC5s2blyR59NFH069fv7Ru3Trdu3fP8uXLkyQPPfRQamtr069fvzzyyCPZsWNH\nXnrppaxduzY9evRoyuUCAAC0eE16pfL/VyqVMmbMmFx00UX5/ve/n4MPPjjjxo1LRUVFLrzwwpx1\n1llJkvPOOy/t2rXLySefnMceeywjR45MZWVlrr766iTJpZdemssuuywNDQ3p3bt3+vfvnyQZOnRo\nhg8fniSZNGlSeRYJAADQgpUtKr/0pS81fj9z5sw3PX7SSSflpJNO2mlbq1atcsUVV7xp3/e9732Z\nPXv2m7afeeaZOfPMM3fDtAAAALyVsvydSgAAAFoGUQkAAEBhohIAAIDCRCUAAACFiUoAAAAKE5UA\nAAAUJioBAAAoTFQCAABQmKgEAACgMFEJAABAYaISAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAA\nKExUAgAAUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAAChMVAIAAFCY\nqAQAAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAAAKE5UAAAAUJioBAAAoTFQCAABQmKgEAACgMFEJ\nAABAYaISAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKExUAgAAUJioBAAAoDBRCQAAQGGiEgAA\ngMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAACisdbkHAIC3snXr1qxYsaLcY9AMHX744amqqir3GAD8\nL1EJQLO0YsWKTLjzlnToUl3uUWhGNqypyxWnnZuePXuWexQA/peoBKDZ6tClOh27HVjuMQCAv8J7\nKgEAAChMVAIAAFCYqAQAAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAAAK83cqm8DWrVuzYsWKco9B\nM3T44Yenqqqq3GMAAEBhorIJrFixIhPuvCUdulSXexSakQ1r6nLFaeemZ8+e5R4FAAAKE5VNpEOX\n6nTsdmC5xwAAANitvKcSAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKExUAgAAUJioBAAAoDBR\nCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAAChMVAIAAFCYqAQAAKAwUQkAAEBhohIA\nAIDCRCUAAACFiUoAAAAKE5UAAAAU1rocLzplypQsW7Ys27dvz+c///l88IMfzEUXXZQk6dy5c665\n5ppUVVXl7rvvzve+9720atUqI0aMyLBhw7Jt27Zccsklef7551MqlXLllVfmkEMOyVNPPZVJkyal\noqIiRx55ZCZPnpwkmTFjRu6///6USqWMGTMmAwcOLMeSAQAAWqQmj8rHHnssq1atyp133pmNGzfm\n5JNPzoABA3L22WdnyJAhueGGGzJ37tx89rOfzY033pgf//jHqaioyNChQ/PRj3408+fPT+fOnXPt\ntddm0aJFue6663L99ddnypQpufzyy9OrV69cfPHFefjhh9O9e/c88MADmTNnTjZu3Jjhw4entrY2\npVKpqZcNAADQIjX57a/9+vXLtddemyRp165dtm/fnl/+8peNVxBra2uzcOHCPPHEEznqqKPSpk2b\nVFZW5pjrya0BAAAMlklEQVRjjsnSpUuzaNGiDBo0KEnSt2/fLF26NNu2bcvq1avTq1evnY6xZMmS\nDBgwIKVSKe3bt0/Xrl3zzDPPNPWSAQAAWqwmj8qKioq0bds2STJ37tz0798/mzZtSmVlZZKkU6dO\nWbt2berr69OpU6fG53Xu3Dl1dXWpr69Px44d/2f4Vq1SKpVSX1+fDh06NO67q2MAAACwe5Ttg3oe\nfPDB3HnnnY3vfXxDQ0PDW+7/Tre/3b5ufQUAANh9yvJBPQsWLMjNN9+cGTNmZL/99kvbtm2zdevW\nVFVVpa6uLjU1NampqUl9fX3jc+rq6tK7d+/U1NRk3bp1SZLt27enoaEh1dXV2bBhQ+O+a9asaTzG\nihUrdjpGTU3N2841bdq0TJ8+ffcvGAAAoIVq8iuV69evz7XXXpvvfOc7ad++fZLk+OOPz/z585Mk\n8+fPT21tbY4++ug8+eST2bRpU1577bUsW7Ysffr0yQknnJB58+YlSR599NH069cvrVu3Tvfu3bN8\n+fIkyUMPPZTa2tr069cvjzzySHbs2JGXXnopa9euTY8ePd52tjFjxuR3v/vdTl9vvBYAAABv1uRX\nKh944IG8/PLLGTt2bJKkVCpl6tSpmTBhQr7//e/n4IMPzrhx41JRUZELL7wwZ511VpLkvPPOS7t2\n7XLyySfnsccey8iRI1NZWZmrr746SXLppZfmsssuS0NDQ3r37p3+/fsnSYYOHZrhw4cnSSZNmtTU\nywUAAGjRmjwqR44cmZEjR75p+8yZM9+07aSTTspJJ52007ZWrVrliiuueNO+73vf+zJ79uw3bT/z\nzDNz5pln/h0TQ8u2devWnW4ThyQ5/PDDU1VVVe4xAIB3gbK8pxJoPlasWJEJd96SDl2qyz0KzcSG\nNXW54rRz07Nnz3KPAgC8C4hKIB26VKdjtwPLPQYAAO9CZfuTIgAAALz7iUoAAAAKE5UAAAAUJioB\nAAAoTFQCAABQmKgEAACgMFEJAABAYaISAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKExUAgAA\nUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAAChMVAIAAFCYqAQAAKAw\nUQkAAEBhohIAAIDCRCUAAACFiUoAAAAKE5UAAAAUJioBAAAoTFQCAABQmKgEAACgMFEJAABAYaIS\nAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKExUAgAAUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAA\nAIWJSgAAAAoTlQAAABQmKgEAAChMVAIAAFCYqAQAAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAAAK\nE5UAAAAUJioBAAAoTFQCAABQmKgEAACgMFEJAABAYaISAACAwkQlAAAAhYlKAAAAChOVAAAAFCYq\nAQAAKExUAgAAUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoTlQAAABQmKgEAAChMVAIA\nAFCYqAQAAKAwUQkAAEBhohIAAIDCRCUAAACFiUoAAAAKE5UAAAAUJioBAAAoTFQCAABQmKgEAACg\nMFEJAABAYXtFVH7zm9/M6aefntNPPz3Lly8v9zgAAAAtRutyD7CnLVmyJKtXr84dd9yR5557LuPH\nj8/tt99e7rEAAABahBZ/pXLRokUZNGhQkuTQQw/Nhg0b8uqrr5Z5KgAAgJahxV+prK+vzwc+8IHG\nnzt16pS6urrsu+++f9Pzt2/fniR58cUXC8+wZs2a1D37p2zZsLHwMWh5Nta/lDVHrknbtm3LOofz\nk/+fc5PmrDmcn85N3kpzODcT5ydvbXednwceeGBat35zQpYaGhoa/q4jN3OTJk3KwIEDM3jw4CTJ\nGWeckSuuuCKHHXbYm/adNm1apk+f3tQjAgAANHvz5s1Lt27d3rS9xV+prKmpybp16xp/rq+vT3V1\n9VvuO2bMmIwZM2anbVu2bMlvf/vbVFdXp6KiYo/OurcYPHhw5s2bV+4x4E2cmzRnzk+aK+cmzZnz\nc/c68MAD33J7i4/KE044Id/+9rdz+umn55lnnklNTc07uuzbpk2b9OnTZw9OuHd6q99wQHPg3KQ5\nc37SXDk3ac6cn3tei4/KY445Jj179szw4cNTKpXyta99rdwjAQAAtBgtPiqTZNy4ceUeAQAAoEVq\n8X9SBAAAgD2nYvLkyZPLPQR7n+OOO67cI8Bbcm7SnDk/aa6cmzRnzs89r8X/SREAAAD2HLe/AgAA\nUJioBAAAoDBRCQAAQGGiEgAAgMJEJQAAAIWJSgAAAAoTlTSpp556KkOGDMmsWbPKPQrsZMqUKTnt\ntNMydOjQ3HvvveUeBxq9+uqr+eIXv5iRI0fmlFNOyc9+9rNyjwQ72bJlS4YMGZK77rqr3KNAkmTx\n4sXp169fRo8endGjR+cb3/hGuUdq8VqXewD2Hps3b86VV16ZAQMGlHsU2Mljjz2WVatW5c4778zG\njRvzyU9+Mp/61KfKPRYkSebPn59jjz0255xzTp5//vmcddZZ+djHPlbusaDRTTfdlP333z+lUqnc\no0Cj4447Lv/2b/9W7jH2GqKSJlNVVZXvfOc7ufnmm8s9CuykX79+OeaYY5Ik7dq1y/bt27N9+/ZU\nVFSUeTJIPv3pTzd+X19fn4MOOqiM08DOnn322fzhD3/Ihz/84TQ0NJR7HGjkfGxabn+lyVRUVKSq\nqqrcY8CbVFRUpG3btkmSuXPnpn///oKSZmfkyJH54he/mAkTJpR7FGh09dVXOydpdkqlUp555pl8\n4QtfyIgRI7JgwYJyj9TiuVIJ8L8efPDB3HHHHbn11lvLPQq8yezZs/P73/8+F154Ye65555yjwP5\n8Y9/nD59+qRr166uCtGsHHbYYRk7dmw+8YlP5Pnnn8+IESPy4IMPurixB4lKgCQLFizILbfckhkz\nZqRdu3blHgcaLV++PNXV1TnwwAPTo0ePJEldXV2qq6vLPBl7uwULFmTlypX5+c9/nhdffDFVVVU5\n8MAD079//3KPxl6uS5cu+cQnPpEk6dq1aw4++OA8//zzOfzww8s7WAsmKmlyfptJc7N+/fpce+21\nmTlzZtq3b1/ucWAnixcvzrp163LxxRenvr4+L7/8cjp16lTusSDXX3994/fTp09Pt27dBCXNwk9+\n8pOsWrUqX/ziF7N+/fq88MIL6dq1a7nHatFKDf6FTxNZtmxZJk6cmHXr1qWioiL7779/fvCDH6RD\nhw7lHo293OzZs3PTTTflsMMOa9x21VVX+UAUmoUtW7Zk/PjxWbt2bTZt2pQvfelLPv2VZueNqPzc\n5z5X7lEgGzduzJe//OVs3Lgx27Zty3nnnZchQ4aUe6wWTVQCAABQmE9/BQAAoDBRCQAAQGGiEgAA\ngMJEJQAAAIWJSgAAAAoTlQAAABQmKgGgmXj22WfzX//1X+UeAwDeEVEJAM3Ez372szz55JPlHgMA\n3pHW5R4AAPZGzz//fC666KKUSqVs3749H/7whzNr1qy0a9cubdu2Tc+ePTNx4sRUVlbmlVdeydix\nYzNo0KBMmzYtzz33XF588cWMHz8+ixcvzn333Zd27dqlTZs2ufrqq7P//vuXe3kA7EVEJQCUwb33\n3ptevXpl0qRJ2bp1a+64446ceOKJ+eAHP5hPfvKTWbx4ccaNG5cPfehDeeKJJzJx4sQMGjQoSbJ2\n7dr8x3/8R5LkzDPPzEMPPZSOHTvmsccey5o1a0QlAE3K7a8AUAaDBg3KI488kgkTJmTevHkZPnx4\nkqShoSFJ0rFjx9x8880ZNWpUvvGNb+SVV15pfO7RRx/d+P2wYcNyzjnn5KabbspBBx2UI488smkX\nAsBeT1QCQBkcccQR+elPf5rPfOYzWbJkSUaNGpUkKZVKSZKvfvWr+exnP5tZs2blm9/8ZmNslkql\ntGr1f//7njhxYr71rW+lY8eOueCCC/KLX/yi6RcDwF5NVAJAGdx999357W9/m/79+2fixImpr69P\nqVTKtm3bkiSvvPJKDj300CTJ/fffn+3btyf5vyuZSbJ+/fpMnz49Bx54YEaMGJHRo0dn2bJlTb8Y\nAPZq3lMJAGXw3ve+N5MnT8573vOeNDQ05KyzzkqnTp1yzTXXZMeOHTnnnHMyfvz4HHzwwTnzzDNz\n33335eqrr84+++zTeDWzY8eOefnll3PKKaekffv2qayszJQpU8q8MgD2NqWGv/yVJwAAALwDbn8F\nAACgMFEJAABAYaISAACAwkQlAAAAhYlKAAAAChOVAAAAFCYqAQAAKOz/AQ3RvbEGywxqAAAAAElF\nTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ax = sns.countplot(x='stars', data=review_df, hue='type')\n", "# Removing spines\n", "sns.despine()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example: Plot review star ratings by year" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA5AAAAH3CAYAAADNIrDvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3X10l+WdJ/73l0DKuDwIaYIg0mRV5JwOR+i4CJUT19Gu\nIzOtDrUFWrHHtnbWqeBYrSiD1HXGtZYqo7judKQ+9GHtVD3Hh2n1nBXWJ45od1iUYYtiy3dqBCEJ\nmsGiJmJ+f/TXbClobjTfJJDX65ycQ+58cn0/142n9u113ddd6uzs7AwAAAB0Y1BfNwAAAMDBQYAE\nAACgEAESAACAQgRIAAAAChEgAQAAKESABAAAoJDBlRr417/+dS677LLs3Lkzb775Zi644IKsXr06\nGzduzOGHH54k+fKXv5yTTz45DzzwQO64444MGjQoc+fOzdlnn52Ojo5cfvnl2bp1a0qlUq677roc\nddRR2bRpU5YuXZqqqqocd9xxueqqq5IkK1euzEMPPZRSqZQFCxbk5JNPrtTUAAAABqRSpd4D+eCD\nD2bHjh350pe+lK1bt2b+/PmZNm1a/uRP/mSvcLd79+7Mnj079913X6qqqjJ79uz84Ac/yOrVq/Pz\nn/88ixcvztq1a/OP//iPWb58eebPn5+//uu/zqRJk7Jo0aL86Z/+aRoaGnLxxRfn7rvvzq5duzJn\nzpz89Kc/TalUqsTUAAAABqSKrUB+8pOf7PpzS0tLxo4dmyT5/bz67LPPZvLkyRk6dGiSZOrUqVm3\nbl3Wrl2bs846K0kybdq0LFq0KB0dHXn55ZczadKkJEljY2OeeuqpNDc3Z+bMmSmVShkxYkTGjRuX\nzZs3Z+LEiZWaHgAAwIBT8Wcg582blwsvvDCLFy9Oknz/+9/P/Pnz89WvfjU7d+5MS0tLRo8e3VVf\nU1OT5ubmtLS0ZNSoUb9pctCglEqltLS0ZOTIkV21o0ePzo4dO951jAP19ttvp6mpKW+//fb7nS4A\nAMAhq2IrkL9111135cUXX8zFF1+cv/7rv87hhx+eSZMm5fbbb8/y5ctz4okn7lX/bjtqD2SnbWdn\nZ7fbV1esWJGbb755vz9btWpVxo8fX/jzAAAABoKKrUBu2LAhr7zySpLkmGOOSZL8+3//77u2n55y\nyinZvHlz6urq0tLS0vV7zc3NqaurS11dXVpbW5Mke/bsSWdnZ2pra9PW1tZVu3379n1qf3eM97Jg\nwYI8//zze32tWrWqZyYPAABwCKpYgHz66adz5513JvnNM5CvvfZarr766rzwwgtJkp/97Gc59thj\nc/zxx2fjxo3ZvXt33nrrraxfvz4nnHBCTjrppK5A9+STT2b69OkZPHhwGhoasmHDhiTJo48+msbG\nxkyfPj1PPPFE3nnnnezcuTM7duzoCq0AAAD0jIptYT3nnHNy2WWXZe7cudm9e3euvPLKDB8+PF//\n+tczfPjwVFdXZ9myZfnQhz6Uiy++OOeee26S5IILLsiwYcMya9asrFmzJvPmzcuQIUOybNmyJMni\nxYuzZMmSdHZ2ZsqUKZkxY0aSZPbs2ZkzZ06SZOnSpZWaFgAAwIBVsdd4HIyamppy6qmnegYSAABg\nPyp+CisAAACHBgESAACAQgRIAAAAChEgAQAAKESABAAAoBABEgAAgEIESADoIz/5yU/ywAMP9HUb\nAFDY4L5uAAAGiueffz7Lly/PqFGjsnv37jQ1NWXEiBH56Ec/mgcffDC//OUv8+abb+ass87KrFmz\ncuaZZ+ZjH/tYTjrppPzrv/5rNm7cmFKplI9+9KP54he/2NfTAWAAEiABoJc89dRT+aM/+qOcf/75\naWpqyg9+8IN89KMfzdFHH53a2tr81V/9Vdra2vIXf/EXmTVrVnbt2pW/+Iu/yBFHHJHPfOYzufHG\nGzNu3Lhs2rSpr6cCwAAlQAJAL5kzZ07+4R/+Ieeee27q6+szfPjwdHZ2Jkna2tryjW98I9XV1Xnr\nrbeSJIMHD84RRxyRJLn66qvzd3/3d9m2bVvOPvvsTJo0qc/mAcDAJUACQC/55S9/ma985Sv5gz/4\ngyxfvjxDhgxJkmzYsCH/8i//kltuuSUvvfRSfvazn+31e52dndm9e3e+9a1vpaOjI3/+53+eM888\nsy+mAMAAJ0ACQC959dVXs3DhwtTU1OTXv/51zjjjjHz3u9/N5Zdfnl27duUb3/hGxo8fnyFDhuSR\nRx5JqVRKkpRKpTz00EP54Q9/mEGDBuW0007r45kAMFCVOn+7d4Y0NTXl1FNPzapVqzJ+/Pi+bgcA\nAKBf8RoPAAAAChEgAQAAKESABAAAoBABEgAAgEIESAAAAArxGg8A6CHt7e0pl8s9OmZ9fX2qq6t7\ndEwAeL8ESADoIeVyOVfcfWtGjqntkfHatjfn2s+cn4kTJ75n3aZNm3LhhRfmvPPOy+c///ke+WwA\n2B8BEgB60MgxtRk1/ohe+7w33ngj1113XWbOnNlrnwnAwOUZSAA4iFVXV+c73/lOPvzhD/d1KwAM\nAFYgAeAgVlVVlaqqqr5uA4ABwgokAAAAhQiQAAAAFGILKwD0oLbtzX0yVmdnZ499LgC8GwESAHpI\nfX19rv3M+T0+5ntZv359rrzyyrS2tqaqqio/+tGP8oMf/CAjR47s0T4AIBEgAaDHVFdXd/vOxp42\nZcqUPPjgg736mQAMXJ6BBAAAoBABEgAAgEIESAAAAAoRIAEAAChEgAQAAKAQp7ACQA9pb29PuVzu\n0THr6+tTXV3do2MCwPslQAJADymXy/nnFUsyoaZn3sH4q9a2ZMHfdvtqkGuuuSbr16/Pnj178sUv\nfjF/9md/1iOfDwC/T4AEgB40oWZkjh4zutc+b82aNWlqasrdd9+dXbt25U//9E8FSAAqRoAEgIPY\n9OnTM3Xq1CTJsGHDsmfPnuzZsydVVVV93BkAhyKH6ADAQayqqiqHHXZYkuTee+/NjBkzhEcAKsYK\nJAAcAh555JH8+Mc/zm233dbXrQDQBw70ILf3e0ibAAkAB7nHHnsst956a1auXJlhw4b1dTsA9IFy\nuZxbf3pRascN77a2eeuunD/rxm4PadsfARIAetCvWtt6dKzabmpeffXVXH/99bnzzjszYsSIHvts\nAA4+teOGZ+yEnjkJ/N0IkADQQ+rr65MFf9tj49X+dsz38PDDD+e1117LwoULu65961vfytixY3us\nDwD4LQESAHpIdXX1+9oO9EHMmzcv8+bN69XPBGDgcgorAAAAhQiQAAAAFCJAAgAAUIgACQAAQCEC\nJAAAAIU4hRUAekh7e3vK5XKPjllfX5/q6uoeHRMA3i8BEgB6SLlczk0rH0xNbc+8g7G1eVsWfvmT\n7/lqkF//+te57LLLsnPnzrz55pu54IIL8p/+03/qkc8HgN8nQAJAD6qpHZsxYyf02uetXr06H/vY\nx/KlL30pW7duzbnnnitAAlAxAiQAHMQ++clPdv25paUlY8f2zOonAOyPAAkAh4B58+alqakp3/nO\nd/q6FQAOYU5hBYBDwF133ZXbb789ixYt6utWADiECZAAcBDbsGFDXnnllSTJMccckyRpbm7uy5YA\nOIRVbAvr/k6Fmzx5ci699NIkSU1NTb797W+nuro6DzzwQO64444MGjQoc+fOzdlnn52Ojo5cfvnl\n2bp1a0qlUq677rocddRR2bRpU5YuXZqqqqocd9xxueqqq5IkK1euzEMPPZRSqZQFCxbk5JNPrtTU\nAOBdtTZv69Wxnn766bS2tmbRokVpaWnJa6+9ltGjR/dYDwDwuyoWIH//VLj58+dn2rRpOe+883La\naaflpptuyr333pszzzwzt9xyS+67775UVVVl9uzZ+cQnPpHVq1enpqYm119/fdauXZsbbrghy5cv\nzzXXXJOrr746kyZNyqJFi/L444+noaEhDz/8cO65557s2rUrc+bMSWNjY0qlUqWmBwD7qK+vz8Iv\nf7L7wgMc872cc845ueyyyzJ37tzs3r07V155Zaqqqnq0BwD4rYoFyP2dCvfMM8/k6quvTpI0Njbm\ntttuS0NDQyZPnpyhQ4cmSaZOnZp169Zl7dq1Oeuss5Ik06ZNy6JFi9LR0ZGXX345kyZN6hrjqaee\nSnNzc2bOnJlSqZQRI0Zk3Lhx2bx583u+NwsAelp1dXWv/7tn6NChuemmm3r1MwEYuCr+DOS8efNy\n4YUXZvHixXnjjTcyZMiQJMno0aOzY8eOtLS07LXVpqamJs3NzWlpacmoUaN+0+SgQSmVSmlpacnI\nkSO7arsbAwAAgJ5T8dd43HXXXXnxxRfzV3/1V3ttKe3s7Nxv/YFef7fa7ravrlixIjfffHPhMQEA\nAAa6iq1A/v6pcKVSKYcddlja29uT/OaEuLq6utTV1aWlpaXr9373emtra5Jkz5496ezsTG1tbdra\n2rpqt2/fvk/t747xXhYsWJDnn39+r69Vq1b12PwBAAAONRULkE8//XTuvPPOJOk6Fe4//If/kNWr\nVyf5zSE7jY2NOf7447Nx48bs3r07b731VtavX58TTjghJ510Ulege/LJJzN9+vQMHjw4DQ0N2bBh\nQ5Lk0UcfTWNjY6ZPn54nnngi77zzTnbu3JkdO3Z0HWUOAABAz6jYFtb9nQo3derUXHrppfne976X\nI488Mpdcckmqqqpy8cUX59xzz02SXHDBBRk2bFhmzZqVNWvWZN68eRkyZEiWLVuWJFm8eHGWLFmS\nzs7OTJkyJTNmzEiSzJ49O3PmzEmSLF26tFLTAgAAGLBKnQfycOEhrqmpKaeeempWrVqV8ePH93U7\nABxk2tvbUy6Xe3TM+vr6VFdX9+iYABx6Xnjhhdy3fknGThjZbe22X7XlrCl/+75ODq/4IToAMFCU\ny+WsWXF/xteM7ZHxmlq3JQvOLPQv+DfffDN/9md/lq9+9av58z//8x75fAD4fQIkAPSg8TVj0zDm\nqF7/3P/+3/97Dj/88G5PIQeAD6Li74EEACrrF7/4RX75y1/mP/7H/3hAr70CgAMlQALAQW7ZsmW5\n4oor+roNAAYAARIADmL33XdfTjjhhIwbN87qIwAV5xlIADiIPfbYY3nppZfyP//n/8wrr7yS6urq\nHHHEEV2vuQKAniRAAkAPamrd1qNjfaSbmuXLl3f9+eabb8748eOFRwAqRoAEgB5SX1+fLDizx8b7\nyG/HBIB+QoAEgB5SXV39vl7K3FMuvPDCPvtsAAYGh+gAAABQiAAJAABAIQIkAAAAhQiQAAAAFCJA\nAgAAUIhTWAGgh7S3t6dcLvfomPX19amuru7RMQHg/RIgAaCHlMvl3PrTi1I7bniPjNe8dVfOn3Xj\ne74a5Omnn85FF12UY489Nkly3HHHZcmSJT3y+QDw+wRIAOhBteOGZ+yEkb36mSeeeGJuvPHGXv1M\nAAYmz0ACwEGus7Ozr1sAYIAQIAHgIFYqlbJ58+Z85Stfydy5c/PYY4/1dUsAHMJsYQWAg9hHPvKR\nLFy4MGeccUa2bt2auXPn5pFHHnHwDgAVYQUSAA5iY8aMyRlnnJEkGTduXI488shs3bq1j7sC4FBl\nBRIAelDz1l09O9aU9665//7709TUlK9+9at59dVXs23btowbN67HegCA3yVAAkAPqa+vz/mzevA0\n1Cm/GfO9/PEf/3G+9rWvZe7cueno6MiSJUtsXwWgYgRIAOgh1dXV7/nOxkoYPnx4br311l79TAAG\nLs9AAgAAUIgACQAAQCECJAAAAIUIkAAAABQiQAIAAFCIAAkAAEAhAiQAAACFCJAAAAAUIkACAABQ\niAAJAABAIQIkAAAAhQiQAAAAFCJAAgAAUIgACQAAQCECJAAAAIUIkAAAABQiQAIAAFCIAAkAAEAh\nAiQAAACFCJAAAAAUIkACAABQiAAJAABAIQIkAAAAhQiQAAAAFCJAAgAAUIgACQAAQCECJAAAAIUI\nkAAAABQiQAIAAFCIAAkAAEAhAiQAAACFDK7k4Ndcc03Wr1+fPXv25Itf/GKefPLJbNy4MYcffniS\n5Mtf/nJOPvnkPPDAA7njjjsyaNCgzJ07N2effXY6Ojpy+eWXZ+vWrSmVSrnuuuty1FFHZdOmTVm6\ndGmqqqpy3HHH5aqrrkqSrFy5Mg899FBKpVIWLFiQk08+uZJTAwAAGHAqFiDXrFmTpqam3H333dm1\na1dmzZqVmTNn5tJLL90r3O3evTu33HJL7rvvvlRVVWX27Nn5xCc+kdWrV6empibXX3991q5dmxtu\nuCHLly/PNddck6uvvjqTJk3KokWL8vjjj6ehoSEPP/xw7rnnnuzatStz5sxJY2NjSqVSpaYHAAAw\n4FRsC+v06dNz/fXXJ0mGDRuWd955Jx0dHens7Nyr7tlnn83kyZMzdOjQDBkyJFOnTs26deuydu3a\nnHLKKUmSadOmZd26deno6MjLL7+cSZMmJUkaGxvz1FNP5ZlnnsnMmTNTKpUyYsSIjBs3Lps3b67U\n1AAAAAakigXIqqqqHHbYYUmSe++9N9OnT8+QIUPy/e9/P/Pnz89Xv/rV7Ny5My0tLRk9enTX79XU\n1KS5uTktLS0ZNWrUb5ocNCilUiktLS0ZOXJkV+3o0aOzY8eOdx0DAACAnlPRZyCT5JFHHsmPf/zj\n3HbbbdmwYUNGjRqVSZMm5fbbb8/y5ctz4okn7lX/+yuU3V1/t9rutq+uWLEiN998c+ExAQAABrqK\nnsL62GOP5dZbb83KlSszbNiwzJgxo2v76SmnnJLNmzenrq4uLS0tXb/T3Nycurq61NXVpbW1NUmy\nZ8+edHZ2pra2Nm1tbV2127dv36f2d8d4LwsWLMjzzz+/19eqVat6cvoAAACHlIoFyFdffTXXX399\n/v7v/z4jRoxIklx44YV54YUXkiQ/+9nPcuyxx+b444/Pxo0bs3v37rz11ltZv359TjjhhJx00kld\nge7JJ5/M9OnTM3jw4DQ0NGTDhg1JkkcffTSNjY2ZPn16nnjiibzzzjvZuXNnduzYkWOOOaZSUwMA\nABiQKraF9eGHH85rr72WhQsXdl379Kc/na9//esZPnx4qqurs2zZsnzoQx/KxRdfnHPPPTdJcsEF\nF2TYsGGZNWtW1qxZk3nz5mXIkCFZtmxZkmTx4sVZsmRJOjs7M2XKlMyYMSNJMnv27MyZMydJsnTp\n0kpNCwAAYMAqdR7Iw4WHuKamppx66qlZtWpVxo8f39ftAAAAFPLCCy/kvvVLMnbCyG5rt/2qLWdN\n+dtMnDjxgD+nos9AAgAAcOgQIAEAAChEgAQAAKAQARIAAIBCBEgAAAAKESABAAAoRIAEAACgEAES\nAACAQgRIAAAAChEgAQAAKESABAAAoBABEgAAgEIESAAAAAoRIAEAAChEgAQAAKAQARIAAIBCBEgA\nAAAKESABAAAoRIAEAACgEAESAACAQgRIAAAAChEgAQAAKESABAAAoBABEgAAgEIESAAAAAoRIAEA\nAChEgAQAAKAQARIAAIBCBEgAAAAKESABAAAoRIAEAACgEAESAACAQgRIAAAAChEgAQAAKESABAAA\noBABEgAAgEIESAAAAAoRIAEAAChEgAQAAKAQARIAAIBCBvd1AwAAAEW1t7enXC4Xrq+vr091dXXl\nGhpgBEgAAOCgUS6Xc+tPL0rtuOHd1jZv3ZXzZ92YiRMn9kJnA4MACQAAHFRqxw3P2Akj+7qNAckz\nkAAAABQiQAIAAFCIAAkAAEAhAiQAAACFCJAAAAAUIkACAABQiAAJAABAIQIkAAAAhQiQAAAAFCJA\nAgAAUIgACQAAQCECJAAAAIUMruTg11xzTdavX589e/bki1/8Yv7oj/4ol156aZKkpqYm3/72t1Nd\nXZ0HHnggd9xxRwYNGpS5c+fm7LPPTkdHRy6//PJs3bo1pVIp1113XY466qhs2rQpS5cuTVVVVY47\n7rhcddVVSZKVK1fmoYceSqlUyoIFC3LyySdXcmoAAAADTsUC5Jo1a9LU1JS77747u3btyqxZszJz\n5sycd955Oe2003LTTTfl3nvvzZlnnplbbrkl9913X6qqqjJ79ux84hOfyOrVq1NTU5Prr78+a9eu\nzQ033JDly5fnmmuuydVXX51JkyZl0aJFefzxx9PQ0JCHH34499xzT3bt2pU5c+aksbExpVKpUtMD\nAAAYcCq2hXX69Om5/vrrkyTDhg3Lnj178rOf/axrZbCxsTFPPfVUnnvuuUyePDlDhw7NkCFDMnXq\n1Kxbty5r167NKaeckiSZNm1a1q1bl46Ojrz88suZNGnSXmM888wzmTlzZkqlUkaMGJFx48Zl8+bN\nlZoaAADAgFSxAFlVVZXDDjssSXLvvfdmxowZ2b17d4YMGZIkGT16dHbs2JGWlpaMHj266/dqamrS\n3NyclpaWjBo16jdNDhqUUqmUlpaWjBw5squ2uzEAAADoORV9BjJJHnnkkdx9991ZuXJlTj/99K7r\nnZ2d+60/0OvvVtvd9tUVK1bk5ptvLjwmAADAQFfRU1gfe+yx/MM//ENuvfXWDB8+PIcddlja29uT\nJM3Nzamrq0tdXV1aWlq6fud3r7e2tiZJ9uzZk87OztTW1qatra2rdvv27fvU/u4Y72XBggV5/vnn\n9/patWpVT04fAADgkFKxAPnqq6/m+uuvz3e+852MGDEiSfLxj388q1evTpKsXr06jY2NOf7447Nx\n48bs3r07b731VtavX58TTjghJ510Ulege/LJJzN9+vQMHjw4DQ0N2bBhQ5Lk0UcfTWNjY6ZPn54n\nnngi77zzTnbu3JkdO3bkmGOOqdTUAAAABqSKbWF9+OGH89prr2XhwoVJklKplG9+85u54oor8r3v\nfS9HHnlkLrnkklRVVeXiiy/OueeemyS54IILMmzYsMyaNStr1qzJvHnzMmTIkCxbtixJsnjx4ixZ\nsiSdnZ2ZMmVKZsyYkSSZPXt25syZkyRZunRppaYFAAAwYJU6D+ThwkNcU1NTTj311KxatSrjx4/v\n63YAAIDf88ILL+S+9UsydsLIbmu3/aotZ03520ycOLEXOutbvXVfKvoMJAAAAIcOARIAAIBCBEgA\nAAAKESABAAAoRIAEAACgEAESAACAQgRIAAAAChEgAQAAKESABAAAoBABEgAAgEIESAAAAAoRIAEA\nAChEgAQAAKAQARIAAIBCBEgAAAAKESABAAAopNsAefnll+9z7fzzz69IMwAAAPRfg9/tBw888EB+\n9KMf5YUXXsjnPve5rutvv/12WlpaeqU5AAAA+o93DZCf+tSncuKJJ+aSSy7JwoUL09nZmSQZNGhQ\njj322F5rEAAAgP7hXQNkkowZMyY/+MEP8uqrr+b111/vur5r164cfvjhFW8OAACA/uM9A2SSXHnl\nlfnJT36yT2BcvXp1xZoCAACg/+k2QP6f//N/snbt2lRXV/dGPwAAAPRT3Z7C2tDQIDwCAADQ/Qrk\nhz/84cyfPz8f+9jHUiqVkiSlUikXXXRRxZsDAACg/+g2QI4ePTrTpk3b69pvgyQAAAADR7cB8i//\n8i97ow8AAAD6uW4D5Ec/+tG9vi+VShkxYkSefvrpijUFAABA/9NtgNy0aVPXn/fs2ZOnn346zz33\nXEWbAgAAoP/p9hTW31VVVZWPf/zjWbt2baX6AQAAoJ/qdgXy7rvv3uv7HTt25OWXX65YQwAAAPRP\n3QbIf/7nf97r1NXDDjssy5cvr2hTAAAA9D/dBshvfvObSZLt27enVCqlrq6u4k0BAADQ/3QbIJ96\n6qksWrQo77zzTpKkuro63/rWt3LCCSdUvDkAAAD6j24D5A033JA777wzDQ0NSZLNmzdn8eLF+zwb\nCQAAwKGt21NYhw4d2hUek+TYY4/N0KFDK9oUAAAA/U+3AbKjoyOrV6/Om2++mTfeeCOPPPJI13ZW\nAAAABo5ut7D+zd/8Tb72ta/l4osvTqlUypQpU/I3f/M3vdEbAAAA/Ui3K5BPPvlkxo8fn2effTbr\n16/P22+/nf/1v/5Xb/QGAABAP9JtgHzwwQezYsWKru9vv/32/OQnP6loUwAAAPQ/3QbIwYMHZ/Dg\n/7fTtaqqKqVSqaJNAQAA0P90+wzkzJkz8/nPfz4f+9jHsmfPnjz99NOZOXNmb/QGAABAP9JtgFy4\ncGGefvrpPPvssymVSlm0aFGmTZvWG70BAADQj3QbIJPkxBNPzIknnljpXgAAAOjHun0GEgAAABIB\nEgAAgIIESAAAAAoRIAEAAChEgAQAAKAQARIAAIBCBEgAAAAKESABAAAoRIAEAACgEAESAACAQgRI\nAAAACql4gNy0aVNOO+20/PCHP0ySXH755fnkJz+Z+fPnZ/78+XnssceSJA888EBmz56ds88+O/fc\nc0+SpKOjI5dccknmzZuXz33uc3nppZe6xvzsZz+befPm5aqrrur6rJUrV+bTn/50zj777K5xAQAA\n6BmDKzn4G2+8keuuuy4zZ87sulYqlXLppZfm5JNP7rq2e/fu3HLLLbnvvvtSVVWV2bNn5xOf+ERW\nr16dmpqaXH/99Vm7dm1uuOGGLF++PNdcc02uvvrqTJo0KYsWLcrjjz+ehoaGPPzww7nnnnuya9eu\nzJkzJ42NjSmVSpWcIgAAwIBR0RXI6urqfOc738mHP/zhva53dnbu9f2zzz6byZMnZ+jQoRkyZEim\nTp2adevWZe3atTnllFOSJNOmTcu6devS0dGRl19+OZMmTUqSNDY25qmnnsozzzyTmTNnplQqZcSI\nERk3blw2b95cyekBAAAMKBUNkFVVVamurt7n+ve///3Mnz8/X/3qV7Nz5860tLRk9OjRXT+vqalJ\nc3NzWlpaMmrUqN80OmhQSqVSWlpaMnLkyK7a0aNHZ8eOHe86BgAAAD2joltY9+dTn/pURo0alUmT\nJuX222/P8uXLc+KJJ+5V8/srlN1df7fa99q+umLFitx8882FxwMAABjoev0U1hkzZnRtPz3llFOy\nefPm1NXVpaWlpaumubk5dXV1qaurS2tra5Jkz5496ezsTG1tbdra2rpqt2/fvk/t747xbhYsWJDn\nn39+r6/PXdSOAAAgAElEQVRVq1b19HQBAAAOGb0SIH935fDCCy/MCy+8kCT52c9+lmOPPTbHH398\nNm7cmN27d+ett97K+vXrc8IJJ+Skk07qCnVPPvlkpk+fnsGDB6ehoSEbNmxIkjz66KNpbGzM9OnT\n88QTT+Sdd97Jzp07s2PHjhxzzDG9MT0AAIABoaJbWNevX58rr7wyra2tqaqqyo9+9KNccMEF+frX\nv57hw4enuro6y5Yty4c+9KFcfPHFOffcc5MkF1xwQYYNG5ZZs2ZlzZo1mTdvXoYMGZJly5YlSRYv\nXpwlS5aks7MzU6ZMyYwZM5Iks2fPzpw5c5IkS5cureTUAAAABpyKBsgpU6bkwQcf3Of6rFmz9rl2\n+umn5/TTT9/r2qBBg3LttdfuU3v00Ufnrrvu2uf6Oeeck3POOecDdAwAAMC76fVnIAEAADg4CZAA\nAAAUIkACAABQiAAJAABAIQIkAAAAhQiQAAAAFCJAAgAAUIgACQAAQCECJAAAAIUIkAAAABQiQAIA\nAFCIAAkAAEAhAiQAAACFCJAAAAAUIkACAABQiAAJAABAIYP7ugEAAAD21d7ennK5XKh2y5YtlW3m\n/ydAAgAA9EPlcjlrVtyf8TVju61d9+JzqfpU5XsSIAEAAPqp8TVj0zDmqG7rmlq3pTnlivfjGUgA\nAAAKsQIJAAD91IE8A5ck9fX1qa6urlxDDHgCJAAA9FPlcjm3/vSi1I4b3m1t89ZdOX/WjZk4cWIv\ndMZAJUACAEA/VjtueMZOGNnXbUASz0ACAABQkAAJAABAIQIkAAAAhQiQAAAAFCJAAgAAUIgACQAA\nQCECJAAAAIUIkAAAABQiQAIAAFCIAAkAAEAhAiQAAACFCJAAAAAUIkACAABQiAAJAABAIQIkAAAA\nhQiQAAAAFDK4rxsAAAAGtvb29pTL5UK1W7ZsqWwzvCcBEgAA6FPlcjlrVtyf8TVju61d9+JzqfpU\nLzTFfgmQAABAnxtfMzYNY47qtq6pdVuaU658Q+yXZyABAAAoRIAEAACgEAESAACAQgRIAAAAChEg\nAQAAKESABAAAoBABEgAAgEIESAAAAAoRIAEAAChEgAQAAKAQARIAAIBCBEgAAAAKESABAAAopOIB\nctOmTTnttNPywx/+MEmybdu2fP7zn8/nP//5LFy4MO3t7UmSBx54ILNnz87ZZ5+de+65J0nS0dGR\nSy65JPPmzcvnPve5vPTSS11jfvazn828efNy1VVXdX3WypUr8+lPfzpnn312HnvssUpPDQAAYECp\naIB84403ct1112XmzJld12666aacd955+eEPf5hjjjkm9957b3bv3p1bbrkl/+N//I/cddddufPO\nO9PW1pZ/+qd/Sk1NTe66664sXLgwN9xwQ5LkmmuuydVXX5277rorb7zxRh5//PG89NJLefjhh3PP\nPffktttuyze/+c10dnZWcnoAAAADSkUDZHV1db7zne/kwx/+cNe1Z555JieffHKSpLGxMU899VSe\ne+65TJ48OUOHDs2QIUMyderUrFu3LmvXrs0pp5ySJJk2bVrWrVuXjo6OvPzyy5k0adJeYzzzzDOZ\nOXNmSqVSRowYkXHjxmXz5s2VnB4AAMCAUtEAWVVVlerq6r2uvfHGGxkyZEiSZPTo0dmxY0daWloy\nevTorpqampo0NzenpaUlo0aN+k2jgwalVCqlpaUlI0eO7KrtbgwAAAB6xuC+/PB322J6oNffrbZU\nKr3rz1esWJGbb7658HgAAAADXa8HyMMOOyzt7e2prq5Oc3Nz6urqUldXl5aWlq6a5ubmTJkyJXV1\ndWltbU2S7NmzJ52dnamtrU1bW1tX7fbt27vGKJfLe41RV1f3rn0sWLAgCxYs2OtaU1NTTj311B6a\nKQAAcDBqb2/fK1sUUV9fv8/uy0NRrwTI3105/PjHP57Vq1fnT/7kT7J69eo0Njbm+OOPz9KlS7N7\n9+5UVVVl/fr1ueKKK/L6669n1apVOemkk/Lkk09m+vTpGTx4cBoaGrJhw4ZMnjw5jz76aObMmZP6\n+vp8//vfz0UXXZTXXnstO3bsyDHHHNMb0wMAAA4h5XI5V9x9a0aOqS1U37a9Odd+5vxMnDixwp31\nvYoGyPXr1+fKK69Ma2trqqqq8qMf/SgrV67MZZddlu9973s58sgjc8kll6SqqioXX3xxzj333CTJ\nBRdckGHDhmXWrFlZs2ZN5s2blyFDhmTZsmVJksWLF2fJkiXp7OzMlClTMmPGjCTJ7NmzM2fOnCTJ\n0qVLKzk1AADgEDZyTG1GjT+ir9vodyoaIKdMmZIHH3xwn+t33nnnPtdOP/30nH766XtdGzRoUK69\n9tp9ao8++ujcdddd+1w/55xzcs4553yAjgEAAHg3fXqIDgAADCQH+mzdli1bKtcMvA8CJAAA9JJy\nuZw1K+7P+JqxherXvfhcqj5V4abgAAiQAADQi8bXjE3DmKMK1Ta1bktzypVtCA7AoL5uAAAAgIOD\nAAkAAEAhAiQAAACFCJAAAAAUIkACAABQiAAJAABAIQIkAAAAhXgPJAAAfa69vT3lcrlwfX19faqr\nqyvXELBfAiQAAH2uXC7n1p9elNpxw7utbd66K+fPujETJ07shc6A3yVAAgDQL9SOG56xE0b2dRvA\ne/AMJAAAAIUIkAAAABQiQAIAAFCIAAkAAEAhAiQAAACFCJAAAAAUIkACAABQiAAJAABAIQIkAAAA\nhQiQAAAAFDK4rxsAAODQ1N7ennK5XKh2y5YtlW0G6BECJAAAFVEul7Nmxf0ZXzO229p1Lz6Xqk/1\nQlPAByJAAgBQMeNrxqZhzFHd1jW1bktzypVvCPhAPAMJAABAIQIkAAAAhQiQAAAAFCJAAgAAUIgA\nCQAAQCECJAAAAIV4jQcAwAfU3t6ecrlcuDZJqqurC9XX19cXrgWoNAESAOADKpfLWbPi/oyvGdtt\n7f9+8bnsPGFDascN77a2eeuunD/rxkycOLEn2gT4wARIAIAeML5mbBrGHNVtXVPrtlSNG56xE0b2\nQlcAPcszkAAAABQiQAIAAFCIAAkAAEAhAiQAAACFOEQHAAAGsAN5DU3i1TIDnQAJAAADWLlczhV3\n35qRY2q7rW3b3pxrP3O+V8sMYAIkAAAMcCPH1GbU+CP6uo0B4UBWfLds2dLvnjkUIAEAAHpJuVzO\nTSsfTE3t2G5rf/H8c/nC+D/sha6KEyABAAB6UU3t2IwZO6Hbutbmbb3QzYHpbyuiAAAA9FMCJAAA\nAIUIkAAAABQiQAIAAFCIAAkAAEAhAiQAAACFCJAAAAAUIkACAABQyOC+bgAAADj0tLe3p1wuF6rd\nsmWLla2DhAAJAAD0uHK5nJtWPpia2rHd1v7i+efyhfF/2Atd8UEJkAAAQEXU1I7NmLETuq1rbd7W\nC93QE3o9QD799NO56KKLcuyxxyZJjjvuuHzpS1/KpZdemiSpqanJt7/97VRXV+eBBx7IHXfckUGD\nBmXu3Lk5++yz09HRkcsvvzxbt25NqVTKddddl6OOOiqbNm3K0qVLU1VVleOOOy5XXXVVb08NAADg\nkNYnK5Annnhibrzxxq7vr7jiipx33nk57bTTctNNN+Xee+/NmWeemVtuuSX33XdfqqqqMnv27Hzi\nE5/I6tWrU1NTk+uvvz5r167NDTfckOXLl+eaa67J1VdfnUmTJmXRokV5/PHH09jY2BfTAwAABpA9\nb7+dLVu2FKotWtdf9UmA7Ozs3Ov7Z555JldffXWSpLGxMbfddlsaGhoyefLkDB06NEkyderUrFu3\nLmvXrs1ZZ52VJJk2bVoWLVqUjo6OvPzyy5k0aVLXGE899ZQACQAAVNzrLa9m58bvZljNyG5rN77Y\nlBw/vxe6qoxeD5ClUimbN2/OV77ylfzbv/1b/vN//s954403MmTIkCTJ6NGjs2PHjrS0tGT06NFd\nv1dTU5Pm5ua0tLRk1KhRSZJBgwalVCqlpaUlI0f+v7+s344BAPSsAzlVMUnq6+tTXV1duYYA+okJ\nNSNz9JjR3db9qrUtbb3QT6X0eoD8yEc+koULF+aMM87I1q1bM2fOnL1+/vurk+/n+rvV/q4VK1bk\n5ptvLtAxAPBb5XI5a1bcn/E13Z+q2NS6LVlwZiZOnNgLnQHQG3o9QI4ZMyZnnHFGkmTcuHE58sgj\n8+yzz6a9vT3V1dVpbm5OXV1d6urq0tLS0vV7zc3NmTJlSurq6tLa2pok2bNnTzo7O1NbW5u2tv+X\n43fs2JG6urr37GPBggVZsGDBXteamppy6qmn9tRUAeCQNL5mbBrGHNXXbQDQB3r9fZ33339//tt/\n+29JkldffTWvvPJKPvOZz2T16tVJktWrV6exsTHHH398Nm7cmN27d+ett97K+vXrc8IJJ+Skk07K\nqlWrkiRPPvlkpk+fnsGDB6ehoSEbNmxIkjz66KOefwQAAOhhvb4C+cd//Mf52te+lrlz56ajoyNL\nlizJ8ccfn0svvTTf+973cuSRR+aSSy5JVVVVLr744px77rlJkgsuuCDDhg3LrFmzsmbNmsybNy9D\nhgzJsmXLkiSLFy/OkiVL0tnZmSlTpmTGjBm9PTUAAIBDWq8HyOHDh+fWW2/d5/qdd965z7XTTz89\np59++l7XBg0alGuvvXaf2qOPPjp33XVXzzUKAADAXnp9CysAAAAHpz55DyQAABwqDuT1Nlu2bLGC\nw0FNgAQAgA+gXC7nppUPpqa2+9fb/OL55/KF8X/YC11BZQiQAADwAdXUjs2YsRO6rWtt3tYL3UDl\nCJAAQEV07Hk7W7ZsKVxfX1+f6urqCnYEwAclQAIAFbH9teb835//Y2rbhndb27x1V86fdWMmTpzY\nC50B8H4JkABAxdSOG56xE0b2dRtAD9nzdvGdBQeyA4GDhwAJAAAU8nrLq9m58bsZVtP9fxja+GJT\ncvz8XuiK3iRAAgAAhU2oGZmjx4zutu5XrW1p64V+6F1eQwMAAEAhAiQAAACF2MIKAMCA0N7ennK5\nXLjeq2VgXwIkAACFHGgA27JlS7/a7lYul3PF3bdm5Jjabmvbtjfn2s+c79Uy8HsESAAACimXy7lp\n5YOpqR1bqP4Xzz+XL4z/wwp3dWBGjqnNqPFH9HUbcNASIAEAKKymdmzGjJ1QqLa1eVuFuwF6mwAJ\nALAfB7Jds79t1QSoFAESAGA/DmS7Zn/cqglQCQIkAMC7KLpd01ZNYKCw2wIAAIBCBEgAAAAKsYUV\nAAB+z563386WLVsK1Ratg0OBAAkAAL/n9ZZXs3PjdzOsZmS3tRtfbEqOn98LXUHfEyABAGA/JtSM\nzNFjRndb96vWtrT1Qj/QH3gGEgAAgEIESAAAAAqxhRUA9qO9vT3lcvmAfqe+vj7V1dWVaQgA+gEB\nEgD2o1wuZ82K+zO+Zmyh+qbWbcmCMzNx4sQKdwYAfUeABIB3Mb5mbBrGHNXXbVTcgay2btmyxfMv\nB4EDXUG3eg4UJUACwABXLpdz08oHU1Pb/WrrL55/Ll8Y/4e90BUfRLlczhV335qRY2q7rW3b3pxr\nP3O+1XOgEAESAEhN7diMGTuh27rW5m290A09YeSY2owaf0S3dXvefjtbtmwpNGbROuDQJUACAAxg\nr7e8mp0bv5thNSO7rd34YlNy/Pxe6ArorwRIAOgBHXuKr+Iknjmjf5lQMzJHjxndbd2vWtvS1gv9\nAP2XAAkAPWD7a835vz//x9S2De+2tnnrrpw/60bPnAFw0BEgAaCH1I4bnrETut8GSN850BNnAdib\nAAkADBjlcjn/vGJJJnjeD+B9ESABgAHF834A758ACQActA5kS2rym22pwyrXDsAhT4AE4ID/T7gT\nROkvyuVyrrj71owcU1uovmnj8/nGyEEV7grg0CVAApByuZw1K+7P+Jqx3dY2tW5LFpzpBFH6jZFj\najNq/BGFatteaU7efq3CHQEcugRIAJIk42vGpmHMUX3dBgDQj9nDAQAAQCFWIAE4IB173j6g9+P1\np+clD/QdgP4rKwDsTYAE4IBsf605//fn/5jatuHd1jZv3ZXzZ93Yb56XLJfLuWnlg6mp7f5Zz188\n/1y+MP4Pe6GryjjQsAwARQiQAByw2nHDM3ZC9y9i749qasdmzNgJ3da1Nm/rhW4qp1wu559XLMmE\nmu7/nja+2JQcP78XugLgYCdAAsAhakLNyBw9ZnS3db9qbUtbL/QDwMHP4x0AAAAUYgUSAA4SB/pc\n47DKtgPAACRAAhyCDiRoJE4cPViUy+VccfetGTmmttvapo3P5xsj/a0C0LMESIBD0IGcNpoc/CeO\nDiQjx9Rm1Pgjuq1re6U5efu1XugIgIFEgAQ4RBU9bTQ5uE8c9boKAOg9AiQwoBzo1s76+vpUV1dX\nriE+MK+rAIDeI0ACA0q5XM6aFfdnfE33WzvLO17KljM/loaGhkJjC5s950BXFb2uAgB6hwAJDDjj\na8amYcxR3dY1tW7LIz//u9S2De+2tnnrrpw/68ZMnDixJ1rcr4G0VdNhMQDQPwmQAO+hdtzwjJ3Q\n/dbI3jDQtmo6LAYA+h8BEjjoHejKXH9aq7JVEwA4mBxSAfK//tf/mvXr1ydJrrzyykyePLmPOwJ6\nw4G8sqK/va7CVk0A4GByyATIZ555Ji+//HJ+/OMf51e/+lUuu+yy/OhHP+rrtqDPHMjKVnt7e5IU\nPgCmNw6LOdCVuaKvrOiPr6uwVRMAOFgcMgFy7dq1OeWUU5IkEyZMSFtbW37961/n3/27f9fHnVFp\nB/pahv4WlioV9LZs2ZKt//RcodNG//eLz2XnCRtSO65yh8Uc6N/Tli1bsvOfvjtgnvcDADgYHDIB\nsqWlJR/96Ee7vh89enSam5sPKEDu2bMnSfLKK6/0eH/vpb+cllj0VQW/r6/7f+mll/LsvU/kwyNG\nFap/YWs5rx+7JSNr/qDb2rbWN/KXc771vu5N0fvy0ksv5ef3rkzdiO7/Wd20tSXbaj6WESO7n+vW\npi05Y8zR+YPXu5/nrrd2Z/e/dWTXH7R3W7v73zqyffv2HHbYYd3W/q4tW7Zk6fduyWGHFzuQpuVf\nm/KFEYNz2Otvdlu7662ONP3rC3l9V/erc9u3lvMvQ9vS+nr3tZtf+de0bdqZXa92f192bn8928ce\n+H3Zvn17mn/xr3mzbVe3ta++tDXP7vl1Wgrckxde2ZmmzmL3JHFf9tvDAdyTxH3Zn/52TxL3ZX/8\nb8v+9Yd/VhL3ZX/62z1JDs37csQRR2Tw4H3jYqmzs7Oz2084CCxdujQnn3xyTj311CTJ5z73uVx7\n7bX5yEc+st/6FStW5Oabb+7NFgEAAA4Kq1atyvjx4/e5fsisQNbV1aW1tbXr+5aWltTWvvuhFAsW\nLMiCBQv2uvbmm2/mX/7lX1JbW5uqqqqK9VrUqaeemlWrVvV1G/2O+7J/7su+3JP9c1/2z33ZP/dl\nX+7J/rkv++e+7J/7sq/+dk+OOGL/5zMcMgHypJNOyt///d/ns5/9bDZv3py6uroDXqoeOnRoTjjh\nhAp1+P7sL/Xjvrwb92Vf7sn+uS/7577sn/uyL/dk/9yX/XNf9s992dfBcE8OmQA5derUTJw4MXPm\nzEmpVMp/+S//pa9bAgAAOKT8f+3dfUyV9f/H8ecRIUVxU4flMlvh/IOphDeh4kzsD3ORY2gj05UT\nc/6BqUOP3MwNSyaOSJMNExJtzRwRzKEFLiUtnamboWbrXlaJKeQNIejB4+f3hz/YF8PvztfrOufi\n5vX4j7Nzzt7v1z7nnM/7Ote56DEDJEBqaqrTJYiIiIiIiPRY+o/UIiIiIiIi4pOgrKysLKeLkAeL\niYlxuoQuSbl0Trn8mzLpnHLpnHLpnHL5N2XSOeXSOeXSOeXyb90hkx7zbzxERERERETEv3QKq4iI\niIiIiPhEA6SIiIiIiIj4RAOkiIiIiIiI+EQDpIiIiIiIiPhEA6SIiIiIiIj4RAOkiIiIiIiI+KSv\n0wX0RtnZ2dTU1OD1elm8eDETJkxg9erVAAwdOpR33nmHkJAQKioq2LVrF3369OGVV15h3rx57c/R\n0NDA7NmzKSgoYNKkSU61YisruZSXl7N161aeeOIJAGJjY1m2bJmT7djG6nrZsWMHlZWVeL1e1q9f\nz7hx45xsxzZWcnn//fc5duwYAMYY6uvrOXDggJPt2MJKJnV1dWRkZOD1emltbSU9PZ2oqCiHO7KH\nlVyamppYvXo1jY2NBAUFkZuby2OPPeZwR/bwNZfr16+TmprKgAED2Lp1KwCtra2kpaVRV1eHy+Vi\n06ZN7e+/3Z2VXOrq6khJSSEmJoa1a9c62YatrGTi8XhIT0/n4sWLeDwe1qxZw5QpU5xsxzZWcrly\n5QputxuPx8Pt27dJT09n4sSJTrZjGyu5tOnN+9zOculy+1wjAXX06FGzbNkyY4wxjY2NZtq0aSYt\nLc188cUXxhhj3nvvPfPxxx+bmzdvmlmzZpmWlhbj8XhMfHy8uX79evvzrFmzxiQmJpqTJ0860ofd\nrOZSXl5uNm3a5GQLfmE1l59++skkJSWZu3fvmvPnz5vNmzc72Y5t7HodGWNMWVmZKS4uDngPdrOa\nyYYNG0xpaakxxpgzZ86YRYsWOdaLnazmsn379vbXzenTp43b7XasFzv5mosxxqxatcps377dLF++\nvP3x5eXlJjs72xhjzPHjx83KlSsD3IF/WM1l8eLFJi8vz+Tk5AS+eD+xmsknn3xi3n77bWOMMRcu\nXDBz5swJcAf+YTWXoqIis3//fmOMMSdPnjSvvfZagDvwD6u5tOmt+1xjHvye25X2uTqFNcAmT55M\nXl4eAAMHDsTr9XLq1Cmee+45AKZPn87x48c5e/YsY8eOpV+/fgQHBxMdHc3p06cBOH78OGFhYYwe\nPRpjjGO92MmOXHpKFv/Jai6HDx/mhRdewOVyERkZycqVK51sxzZ2rBe49y1KSUkJCxYscKQPO1nN\nJDw8nGvXrgFw48YNhgwZ4lgvdrKayx9//MGYMWMAiI6OpqamxrFe7ORrLgAbNmzgmWee6fD4b775\nhri4OACeffbZDq+r7sxqLvn5+Tz11FOBLdrPrGaSkJDQ4VuWf/75J4DV+4/VXJYsWcKLL74IQH19\nPcOHDw9g9f5jNRfo3ftceHAuXSkLDZABFhQURGhoKABlZWVMmTKF5uZmgoODARgyZAhXrlyhoaGh\nwwZu6NCh1NfX4/F42LZtG6tWrQLA5XIFvgk/sJoLwIkTJ0hOTmbhwoWcPXs28E34gdVcLl68yKVL\nl1i6dCkLFy7ku+++c6QPu9mxXgAqKyuZOXMmISEhgW3AD6xm8vrrr1NRUUFiYiIZGRk95mCD1VxG\njRrFV199BcDp06epq6sLfBN+4GsuAKGhof/auPxnXn369MHlcnHnzp0AduAfVnNpe2xPYjWT4OBg\n+vXrB0BxcTEvvfRSAKv3H6u5wL3B8eWXX2bLli2sWLEicMX7kdVcevs+Fx68XrrSPlcDpEMOHjxI\naWkpWVlZHW5/0NGFttsLCwuZP38+AwcO/K/3764eNpeoqChWrFjBjh07SEtLw+12+7vUgHrYXO7c\nucPdu3cpLCzE7XaTmZnp71ID6mFzabN3714SEhL8VZ4jrLy3JCQkUF5ezsaNG8nJyfF3qQH1MLm4\nXC7mz5+Px+NhwYIFnDp1igEDBgSg2sD5X3N50H16+2dRb2A1k927d3Pu3DmWL1/uh+qcYyWX8PBw\nSktLWb9+PWlpaX6q0BkPm4v2uZ3ravtcDZAOOHLkCIWFhRQVFREWFkZoaCgejwe4dzRq2LBhDBs2\njIaGhvbHtN1+7Ngxdu3aRVJSEocPH2b9+vX8+uuvTrViq4fN5dFHH+Xpp59uPw1gzJgxNDc309ra\n6kgfdrOyXoYOHcqECRMAGDduHJcvX3akB3+wsl4AmpqaaGhoaP+7J7CyVr799ltiY2OBe6fanDlz\nxpEe/MFKLiEhIeTk5LB7927mzZvH4MGDnWrDdr7k0ub+bwGGDRvG33//DYDX68UYQ9++PeO6fFZy\n6amsZlJSUsKRI0coKCjoMesErOVy4sQJGhsbAZgyZQo//PBD4Ar3Myu59PZ9bpv7c+lq+1wNkAF2\n7do18vLy2L59O4MGDQJg6tSpVFdXA1BdXc306dOJiori/PnzNDc3c/v2bWpqapg4cSJ79uyhpKSE\nkpISZsyYQVZWFhEREU62ZAsruUyYMIHCwkJKS0sBuHDhAqGhoe2nBXRnVtdLbGwsX3/9NQC1tbWE\nh4c71oudrK4XgO+//55Ro0Y51oPdrK6VESNGcO7cOQDOnz/PyJEjHevFTlbXypdffkl+fj4AFRUV\nzJw507Fe7ORrLm3uPzoeGxvLoUOHADh69CiTJ08OUOX+ZTWXB93WnVnN5LfffuPTTz8lPz+/R/xc\noI3VXKqqqti3bx8AP/74Y6/7fG5zfy69fZ/b5v5cuto+12V62jtdF7dnzx62bdvGk08+Cdw7wpCT\nk0N6ejqtra08/vjj5OTkEBQUxIEDBygqKgJg0aJFxMfHd3iu9PR0EhMTe8Tlja3mcunSJVJTU3G5\nXO2XDB8/fryTLdnCjvWSm5tLTU0Nt27d6jGXCbcjl88++4wzZ86QkZHhWB92sprJ5cuXSUtLo7W1\nFa/Xy7p164iMjHSyJVtYzaWlpYWUlBRu3rzJoEGDePfdd9tPrerOfM0F7l0Epbm5mRs3bjB8+HDW\nrl3L1KlTyczMpLa2luDgYHJzc3vEt/lWclmyZAkffPABjY2N3L59m/DwcPLy8hg9erSTLVlmJRO3\n283Jkyepqqrq8O9viouLu/1BXquvocjISFavXk1LSwu3bt0iMzOzV30+Q+e5TJs2rf25euM+FzrP\nJSIiokvtczVAioiIiIiIiE90CquIiIiIiIj4RAOkiIiIiIiI+EQDpIiIiIiIiPhEA6SIiIiIiIj4\nRLU9By4AAAJPSURBVAOkiIiIiIiI+EQDpIiIiIiIiPhEA6SIiIiIiIj4RAOkiIiIiIiI+KSv0wWI\niIj0BvPnz2flypXExMQAkJyczPPPP8+hQ4fweDy0tLSQkpLCjBkz+Pnnn1m3bh3BwcE0NTXx5ptv\nEhcXR35+Pr///jt//fUXbrebsWPHOtyViIj0NhogRUREAiApKYmysjJiYmK4evUqtbW1HDx4kJSU\nFMaPH099fT1z586lurqaq1evkpqayqRJkzh79izr1q0jLi4OgCtXrvDRRx853I2IiPRWGiBFREQC\nYPbs2WzevJmmpiaqqqqYM2cOO3fuZPPmze336d+/Pw0NDQwePJjc3Fy2bNlCa2srTU1N7feJiopy\nonwRERFAA6SIiEhAPPLII8yaNYvPP/+cyspKNm7cyM6dOykoKCAsLKzDfVetWsWCBQuIj4/nl19+\nYenSpQC4XC769NHlC0RExDn6FBIREQmQpKQkPvzwQ0JCQhgxYgTR0dFUVVUBcPXqVbKzswFoampi\n5MiRAFRWVuL1egEwxjhTuIiIyP/TACkiIhIgERERhIaGMnfuXACysrLYu3cvr776KsnJyR0usON2\nu0lOTmbMmDH079+f3NxcXC4XLpfLyRZERKSXcxkdzhQREQmIP//8kzfeeIN9+/bRt69+RSIiIt2P\nPr1EREQCYNu2bezfv5+33npLw6OIiHRb+gZSREREREREfKLfQIqIiIiIiIhPNECKiIiIiIiITzRA\nioiIiIiIiE80QIqIiIiIiIhPNECKiIiIiIiIT/4PlolQlgz/ytAAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "review_df['datetime'] = pd.to_datetime(review_df['date'])\n", "review_df['year'] = review_df['datetime'].dt.year\n", "ax = sns.countplot(x='year', data=review_df, hue='stars')\n", "sns.despine()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Featurize the data\n", "\n", "- Convert date string to date delta\n", " - For example, business_age\n", "- Convert strings to categorical features\n", " - For example, noise level: quiet, loud, very loud.\n", "- Drop unused features\n", " - For example, business_name" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def calculate_date_delta(df, from_column, to_column):\n", " datetime = pd.to_datetime(df[from_column])\n", " time_delta = datetime.max() - datetime\n", " df[to_column] = time_delta.apply(lambda x: x.days)\n", " df.drop(from_column, axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def to_length(df, from_column, to_column):\n", " df[to_column] = df[from_column].apply(lambda x: len(x))\n", " df.drop(from_column, axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def drop_columns(df, columns):\n", " for column in columns:\n", " df.drop(column, axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def to_boolean(df, columns):\n", " for column in columns:\n", " to_column = column+'_bool'\n", " df[to_column] = df[column].apply(lambda x: bool(x))\n", " df.drop(column, axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "FILL_WITH = 0.0" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def to_category(df, columns):\n", " for column in columns:\n", " df[column] = df[column].astype('category')\n", " # add FILL_WITH category for fillna() to work w/o error\n", " if (FILL_WITH not in df[column].cat.categories):\n", " df[column] = df[column].cat.add_categories([FILL_WITH])\n", " #print 'categories for ', column, ' include ', df[column].cat.categories" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def category_rename_to_int(df, columns):\n", " for column in columns:\n", " df[column].cat.remove_unused_categories()\n", " size = len(df[column].cat.categories)\n", " #print 'column ', column, ' has ', size, ' columns, include ', df[column].cat.categories\n", " df[column] = df[column].cat.rename_categories(range(1, size+1))\n", " #print 'becomes ', df[column].cat.categories" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "calculate_date_delta(df=review_df, from_column='date', to_column='date_delta')" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [], "source": [ "to_length(df=review_df, from_column='text', to_column='text_len')" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "drop_columns(df=review_df, columns=['type', 'year', 'datetime'])" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [], "source": [ "review_df.fillna(value=0.0, inplace=True)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [], "source": [ "calculate_date_delta(df=user_df, from_column='yelping_since', to_column='date_delta')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "to_length(df=user_df, from_column='friends', to_column='friends_count')" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": true }, "outputs": [], "source": [ "to_length(df=user_df, from_column='elite', to_column='elite_count')" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [], "source": [ "drop_columns(df=user_df, columns=['name', 'type'])" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [], "source": [ "user_df.fillna(value=0.0, inplace=True)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [], "source": [ "drop_columns(\n", " df=biz_df,\n", " columns=[\n", " 'type',\n", " 'name',\n", " 'city',\n", " 'full_address',\n", " 'state',\n", " 'categories',\n", " 'longitude',\n", " 'latitude',\n", " 'neighborhoods',\n", " 'hours.Monday.open',\n", " 'hours.Monday.close',\n", " 'hours.Tuesday.open',\n", " 'hours.Tuesday.close',\n", " 'hours.Wednesday.open',\n", " 'hours.Wednesday.close',\n", " 'hours.Thursday.open',\n", " 'hours.Thursday.close',\n", " 'hours.Friday.open',\n", " 'hours.Friday.close',\n", " 'hours.Saturday.open',\n", " 'hours.Saturday.close',\n", " 'hours.Sunday.open',\n", " 'hours.Sunday.close',\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [], "source": [ "to_cat_columns = [\n", " 'attributes.Ambience.casual',\n", " 'attributes.Attire',\n", " 'attributes.Alcohol',\n", " 'attributes.Noise Level',\n", " 'attributes.Smoking',\n", " 'attributes.Wi-Fi',\n", " 'attributes.Ages Allowed',\n", " 'attributes.BYOB/Corkage',\n", "]\n", "to_category(\n", " df=biz_df,\n", " columns=to_cat_columns,\n", ")" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [], "source": [ "biz_df.fillna(value=FILL_WITH, inplace=True)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [], "source": [ "category_rename_to_int(\n", " df=biz_df,\n", " columns=to_cat_columns,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Join tables to populate the features\n", "\n", "Join three tables (review, biz, user) to one (review-with-all-info).\n", "Each join is a many-to-one join." ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# The `user_df` DataFrame is already indexed by the join key (`user_id`). Make sure it's on the right side of join.\n", "review_join_user = review_df.join(user_df, on='user_id', lsuffix='_review', rsuffix='_user')" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [], "source": [ "review_join_user_join_biz = review_join_user.join(biz_df, on='business_id', rsuffix='_biz')" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "drop_columns(df=review_join_user_join_biz, columns=['user_id', 'business_id'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Identify data X and target y\n", "Data X: all features we gathered from business, user, and review tables.\n", "\n", "Target y: what we'd like to predict: Whether the review is Five-star or not." ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "# target y is whether a review is five-star\n", "y = review_join_user_join_biz.stars.apply(lambda x: x == 5)\n", "\n", "# We've already dropped not informative features data X\n", "X = review_join_user_join_biz\n", "review_join_user_join_biz.drop('stars', axis=1, inplace=True)\n", "\n", "# get the feature names - this will be useful for the model visualization and feature analysis\n", "features = X.columns.values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Split training set and testing set" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.cross_validation import train_test_split\n", "\n", "# Split the data into a training set and a test set\n", "X_train, X_test, y_train, y_test = train_test_split(X, y)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "training data shape (1668909, 99)\n", "test data shape (556304, 99)\n", "converted label data shape (1668909,)\n" ] } ], "source": [ "print 'training data shape', X_train.shape\n", "print 'test data shape', X_test.shape\n", "print 'converted label data shape', y_train.shape\n", "#print 'features', features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Model the data: Logistic regression\n", "\n", "Logistic regression estimates the probability of a binary response based on one or more features.\n", "\n", "Here we estimate the probability of a review being five-star." ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn import preprocessing\n", "\n", "# Standardize features by removing the mean and scaling to unit variance\n", "scaler = preprocessing.StandardScaler().fit(X_train)\n", "\n", "X_train_scaled = scaler.transform(X_train)\n", "X_test_scaled = scaler.transform(X_test)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn.cross_validation import cross_val_score\n", "import numpy as np\n", "\n", "# Function used to print cross-validation scores\n", "def training_score(est, X, y, cv):\n", " acc = cross_val_score(est, X, y, cv = cv, scoring='accuracy')\n", " roc = cross_val_score(est, X, y, cv = cv, scoring='roc_auc')\n", " print '5-fold Train CV | Accuracy:', round(np.mean(acc), 3),'+/-', \\\n", " round(np.std(acc), 3),'| ROC AUC:', round(np.mean(roc), 3), '+/-', round(np.std(roc), 3)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from sklearn import linear_model\n", "\n", "# Build model using default parameter values\n", "lrc = linear_model.LogisticRegression()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.cross_validation import StratifiedKFold\n", "\n", "# cross-validation \n", "cv = StratifiedKFold(y_train, n_folds=5, shuffle=True)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5-fold Train CV | Accuracy: 0.754 +/- 0.0 | ROC AUC: 0.824 +/- 0.001\n" ] } ], "source": [ "# print cross-validation scores\n", "training_score(est=lrc, X=X_train_scaled, y=y_train, cv=cv)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Evaluation via Confusion Matrix \n", "\n", "False positive (upper right).\n", "\n", "False negative (bottom left)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Compute confusion matrix\n", "import matplotlib.pyplot as plt\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn.metrics import confusion_matrix\n", "\n", "def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):\n", " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", " plt.title(title)\n", " plt.colorbar()\n", " plt.tight_layout()\n", " plt.ylabel('True label')\n", " plt.xlabel('Predicted label')\n", "\n", "# Run classifier\n", "lrc_fit = lrc.fit(X_train_scaled, y_train)\n", "y_pred = lrc_fit.predict(X_test_scaled)\n", "cm = confusion_matrix(y_test, y_pred)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Normalized confusion matrix\n", "[[ 0.83402455 0.16597545]\n", " [ 0.36468911 0.63531089]]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/nail/home/xun/venv/ipynb/local/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", " if self._edgecolors == str('face'):\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjEAAAI0CAYAAADsonhHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VOW9x/HvmZAYQgIkkJElCUXaxMpFQKlCg0BIBESQ\nChJAjEtuy1VMxLKjAmILsmMFLSiWAtLGIASFukFEXyhEqizFq7YRryxhMRNMhAhmm/tHypSBbCSZ\nyTzyefOaV+fMWZ4nPZnm1+95znMsp9PpFAAAgGFsDd0BAACA2qCIAQAARqKIAQAARqKIAQAARqKI\nAQAARmrU0B0AAAA1V1JSohMnTni1zVatWqlRI98rGXyvRwAAoFInTpxQfHy8V9vMzMxURESEV9us\nCYoYAAAMdPKqbiqxAj3aRiPnOV39w8cebaMuKGIAADBQiS1IpbbGnm2kzLeHzvp27wAAACpBEQMA\nAIzE5SQAAExkSbIsz7fhw0hiAACAkUhiAAAwkWUrf3m6DR/m270DAACoBEkMAAAmsiwvjInx7UEx\nJDEAAMBIJDEAAJiIMTEkMQAAwEwkMQAAGMkLY2J8fKIYkhgAAGAkkhgAAExkWV4YE0MSAwAAUO9I\nYgAAMBHzxJDEAAAAM1HEAAAAI3E5CQAAEzHZHUkMAAAwE0kMAAAmYmAvRQwAAKh/c+bM0b59+yRJ\n06dPV6dOnVzrVq1apTfeeEN+fn6KiYnRrFmzVFxcrKlTp+rYsWOyLEvz5s1TZGRklW1QxAAAYCIf\nHhOze/du5eTkKD09XYcPH9bkyZOVlpYmScrPz9e6dev0zjvvyGaz6Te/+Y0++eQTHT58WC1atNCi\nRYuUlZWlxYsXa8mSJVW2w5gYAABQr7KyshQXFydJioqKUkFBgQoLCyVJgYGB8vf31/fff6/S0lKd\nPXtWzZs3d9vnpptu0p49e6pthyQGAAAT+fCYGIfDoY4dO7qWw8LClJubqyZNmigwMFBjxozRgAED\nFBgYqL59+6pDhw5yOBwKCwuTJNlsNlmWpZKSEjVqVHmpQhEDAAA8yul0yvp3QXT69Gn96U9/0htv\nvKGgoCA98MAD+uyzz1zbXbhPdShiAAAwkRcfABkfH3/JqpSUFKWmpla4m91uV15enmvZ4XAoPDxc\nkvSvf/1LUVFRatq0qSTpxhtv1IEDB9z2KS0tldPprDKFkShiAABANTIzMxUREVHj7WNjY7V8+XIl\nJiYqOztbdrtdQUFBkqSIiAhlZ2erqKhIAQEB+t///V/dfPPNatKkiTIzMxUbG6sPPvhA3bt3r7Yd\nihgAAIzkhbuTann/T9euXRUdHa0RI0bIsizNmjVLGRkZCgkJUUJCgpKSkjRq1Cj5+/vr+uuvV48e\nPVRWVqYPP/zQ9fmCBQuqbcdy1uSiEwAA8AlHjx5VfHy8cloOUWmjYI+25VdyRm0dr112EuMtJDEA\nAJjIZpW/PN2GD2OeGAAAYCSKGAAAYCQuJwEAYCIffuyAt/h27wAAACpBEgMAgIl8+LED3kISAwAA\njEQSAwCAibz42AFfRRIDAACMRBIDAICJGBNDEgMAAMxEEgMAgImYJ4YkBgAAmIkkBgAAI3lhTIwY\nEwMAAFDvSGIAADARY2JIYgAAgJkoYgAAgJG4nAQAgIkseWGyO88evq5IYgAAgJFIYgAAMBEDe0li\nAACAmShicMW49tpr9fjjj7t99tFHHykpKanB+nPy5Elt27ZNjz32WL0c88SJE7r22mvr5VhV2bNn\nj3r27KmHHnqoVvtPmTJF7733Xv12qg7y8vL07rvvVrju5MmTGjx4sJd7BNTA+QdAevrlw7ichCvK\nxx9/rM8//1w///nPa30Mp9Mpqx6/2AkJCUpISKi343nDzp07FRsbq3nz5tVq/9ru5ylZWVnatWuX\n+vbt6/Z5WVmZrr76am3evLmBegagKhQxuKKMHz9ec+bM0dq1ay9ZV1ZWprlz5+rdd9+VZVnq2LGj\n5syZo6CgICUlJalr167KzMzU7373O6Wnpys8PFz79+/XwYMHddddd6lt27Zat26dTp06pT/84Q+6\n4YYbdPLkSU2aNEknTpxQcXGxRo4cqf/5n/9xa3fjxo3avHmzXnrpJQ0cONBVIBUUFKhVq1bauHGj\nCgoK9MQTT+jzzz+Xn5+fkpKSdM8990iS1q1bp+XLlys8PFy33XZbpT/7e++9p4ULF+rcuXNq166d\nFixYoLCwML3//vuaP3++ysrKFBAQoBkzZujGG2/Uxo0b9d5776lJkybau3evSkpKtHTpUh06dEgv\nv/yySktLNWbMGN122216/fXXtWrVKrefZ9WqVfrwww81d+5clZSUyGazKTU1VQMGDFBSUpKGDx+u\nO+6447Lbv7gA/eijj7Ro0SJ17txZ7733npo2baqZM2dqwYIF+vLLLzV06FBNmjRJkrRkyRL97W9/\nU1lZmdq3b69FixYpJydHTz31lMrKynT27FmNHz9eI0aM0KBBg/Tpp59q3rx5uvXWW/XZZ5/pwQcf\nVPfu3XX//ffr9OnTuv322/Xiiy8qJiam9r+UQG0xJobLSbiy9O/fX06nU2+//fYl6zZu3Kh9+/bp\njTfe0NatW2Wz2bRixQrX+uzsbP3tb3/TDTfcIEnatWuXVq5cqZdfflkrV67UmTNn9Nprr+nOO+/U\nyy+/LEn64x//qGuuuUbvvPOO/vKXv2jp0qU6evRohX2z2Wx666239Oabb+q1115Ty5YtNXbsWEnl\nyUVoaKi2bdum9PR0rVq1SgcOHJDD4dDChQv1l7/8RRs3blR+fn6Fxz5z5oymTJmiZcuWadu2berQ\noYOeeeYZnT59WhMnTtT8+fP15ptvavz48ZowYYKcTqckaceOHbr//vv11ltvqW/fvlq9erUGDBig\ne+65RwMGDNALL7zg2vZC5wux+fPna+bMmXrzzTf10ksvKTMz022b2rRfkc8//1wDBw7U1q1b5e/v\nr9///vd66aWX9Je//EV//vOfde7cOe3fv1+vvvqqNm/erHfffVc2m01r1qxRx44dlZSUpAEDBmjR\nokVyOp3Kz89Xp06d9PLLL7slb08++aT+/Oc/69SpU1q6dKmGDBlCAQM0IIoYXHEee+wxLVy4UEVF\nRW6f79ixQ4MHD1ZAQIAkadCgQdq5c6dr/S233OJ6b1mWYmNjFRAQoPbt28vpdKpPnz6SpGuuuUZ5\neXmSpJkzZ7rGu7Ru3VqtWrXSiRMnqu3j008/rRtuuMF1mem9997TyJEjJUnNmjVTv379tH37dv3j\nH/9Qhw4dFBkZKUmVjt345JNPFBERoZ/85CeSpEmTJmn69Onas2ePIiIi1LFjR0lS7969dfbsWX31\n1VeSpJiYGNcf6ZiYGOXm5koqv6RWUfFy3vl1oaGhev311/XVV1+pVatWWrBggds2tW3/Ys2bN1fX\nrl0lSR06dNDNN9/sdm7y8/PVuXNnZWZmqnHjxpKkzp076/jx4xX+PCUlJYqPj7+knVatWik5OVmT\nJk3Sjh07lJqaWul/B4DHnU9iPP3yYVxOwhXnuuuuU7du3bRq1SrXHz5J+u6779S0aVPXckhIiAoK\nCtyWLxQYGOh6b1mWa9myLJWVlUkqLx6WLFmi3Nxc+fn56cSJE1X+8Zekbdu26eOPP9aGDRtcn+Xn\n5+vRRx+Vn5+fJOmHH35Qv379dPr0aQUHB7u2u7D/F/r222/d+u/v7+867sX7hISEuBKd83/wL/65\namrhwoV6/vnndf/99yswMFC//e1v3S55FRQU1Ev7F2931VVXXbLf6dOnNXv2bO3du1c2m035+fmu\nwvNifn5+buf3QkOHDtXChQv1m9/8xlXwAmgYFDG4Io0fP15Dhw5VRESE67PmzZvr22+/dS3n5+cr\nNDS0Tu1MmDBBjz76qO68805JqvSP5nknT57U7373O7300ktufyDDwsK0fPlyXXPNNW7bZ2Zm6syZ\nM67lC/t/odDQULd1586dc/18F1+CKigoUGhoqA4dOlSjn9GyLLfCrLCw0PW+ZcuWmjFjhmbMmKGs\nrCyNHTvWlWhZlqVmzZrVuf2acDqdeumll5SXl6fXX39dV111lf7whz/UKBW72HPPPadf/epX2rBh\ng0aMGCG73V5v/QQuCzP2/ngvJy1durShu4AqNPT5CQ8P1+jRo/Xss8+6xjv07t1bb7zxhoqKilRW\nVqbXX3/d7RJSdQlKRb7//nv99Kc/lSRt2bJFZ86ccfsjfyGn06mJEyfqwQcfdO1zXp8+fbR+/XpJ\n5Zc65syZo08//VSdOnXSl19+6RpnU9ldNDfeeKOOHz+uL774QlL5H+Jnn31WN9xwg44fP67PPvtM\nUnkKZFnWJcVSVVq0aKGvv/5aRUVF+uGHH/T222/LsiwVFxfrnnvucV0C+vnPf35JclFR+6GhoZfV\nfk19//33ioyM1FVXXaWTJ0/q/fffd50Lf39/t2KwMl988YUyMzP1xBNP6N5779Xvf//7eu9ndRr6\nu4OqcX6860dbxCxbtqyhu4AqNMT5ufi26OTkZBUXF7uWhwwZohtvvFG33367+vXrp+DgYI0ZM6bS\n/S9cvvj9+eWHH35YDz74oIYMGaJvv/1Ww4cP1xNPPKEjR464tjm//SeffKK///3vWrNmjW677Tbd\ndtttGjhwoEpKSjR58mQdPXpUt956q/r376/i4mJdd911stvtevTRRzVy5EgNHTpUbdu2rfD27+Dg\nYC1atEgTJ0503WkzceJEBQcHa/HixZoyZYoGDBig5cuXuy6hXfhzXLx84fsePXooJiZG8fHx+vWv\nf61evXpJKi8Mhg0b5hoEPHLkSFebF/br4vafeeaZatuv7txevM6yLI0YMUIffvihBgwYoNmzZ2vS\npEnauXOn1qxZo9jYWO3atUuJiYkVtnM+bZoxY4amTJmigIAA3XvvvTp48KC2b99eaduewP+2+TbO\nj3dZztr838taKi4u1tSpU3Xs2DFZlqV58+a5BiSe17FjR9fdH5K0evVq2WyXX2vFxMTon//8Z537\nDM/g/Pguzo1v4/z4Nm+cn6NHjyo+Pl451/y3SgOaebQtv6ICtf2q/M7CCy+/+wqvjonZsmWLWrRo\noUWLFikrK0uLFy/WkiVL3LYJCQmpcA4PAACAC3n1clJWVpbi4uIkSTfddJP27NnjzeYBAPjx4LED\n3k1iHA6HwsLCJJVP7GVZlkpKStSo0X+68cMPP+jRRx/VsWPHlJCQ4DYmoabOnTsnSTp06JDrllT4\nnsomfUPD49z4Ns6PbyotLZVU/jeoslv0Ub88VsSsX79er776qttn+/fvd1uuaDjOtGnTdMcdd8hm\ns2n06NHq1q2b2xiZiy1durTSgVT9+vWrRc/hLRVNJgbfwLnxbZwf39a5c+dLPktJSan/yRF57IDn\nipjhw4dr+PDhbp9NmzZNDodDMTExKi0tldPpdEthJCkxMdH1/pe//KWys7OrLGJSU1Mv+cU4dOiQ\n+vXrpxMB3VRqoxoGLtf+12c1dBcA45w8cUIP3Dta77zzjtq1a9fQ3bkiePVyUmxsrDIzMxUbG6sP\nPvhA3bt3d1t/8OBBLV68WMuWLXNNSd67d+/Lbuf8JaRSW6BKbY2r2RrAxdq29b27EABTeG0YgzfG\nrDAm5j8GDhyoDz/8UKNGjZK/v7/rOSovvPCCbrrpJnXp0kWRkZEaOnSoGjVqpJ49e1aZwgAAgCuX\nV4sYm82mp59++pLPLxy8O3XqVG92CQAAI1U1AWR9tuHLfHvEDgAAQCV4ACQAAAYqHxLj6STGo4ev\nM5IYAABgJJIYAABMZP375ek2fBhJDAAAMBJFDAAAMBKXkwAAMBC3WJPEAAAAQ5HEAABgIEteSGJ8\nfGQvSQwAADASSQwAAAZiTAxJDAAAMBRJDAAABiKJIYkBAACGIokBAMBEPHaAJAYAAJiJJAYAABN5\nYUyMfHxMDEUMAACod3PmzNG+ffskSdOnT1enTp0kSSdPntTEiRNd2x09elQTJ07UrbfeqmnTpikn\nJ0dFRUWaNGmSevToUWUbFDEAABjIl+9O2r17t3JycpSenq7Dhw9r8uTJSktLkyRdffXVWrt2rSSp\npKRE9957r+Lj47Vp0yaFhoZq0aJF+vrrrzVu3Di99tprVbZDEQMAAOpVVlaW4uLiJElRUVEqKChQ\nYWGhmjRp4rbdhg0bdNtttykwMFB33nmnSktLJUktWrTQ6dOnq22Hgb0AAKBeORwOhYaGupbDwsKU\nm5vrto3T6dT69es1fPhwSZK/v78CAwMlSX/60580ePDgatshiQEAwEC+fDnpYk6n85JjffLJJ4qJ\niXEVLuetW7dOBw4c0PLly6s9LkUMAACoUnx8/CWfpaSkKDU1tcLt7Xa78vLyXMsOh0Ph4eFu22zf\nvl09e/Z0++yVV17R+++/r+eff16NGlVfolDEAABgIMvy/GMBzh8+MzNTERERNd4vNjZWy5cvV2Ji\norKzs2W32xUUFOS2zYEDB3TXXXe5lr/66iu9+uqrevnllxUQEFCjdihiAABAveratauio6M1YsQI\nWZalWbNmKSMjQyEhIUpISJAkffPNN2rZsqVrn4yMDOXn5+vXv/6167M//elP8vf3r7QdihgAAEzl\nw3PRTZgwwW05JibGbfmtt966ZPuL96kOdycBAAAjkcQAAGAgk+5O8hSSGAAAYCSSGAAADEQSQxID\nAAAMRRIDAICBSGJIYgAAgKEoYgAAgJG4nAQAgIkseX6yO9++mkQSAwAAzEQSAwCAgRjYSxIDAAAM\nRRIDAICJvJDEiCQGAACg/pHEAABgIMuyPJ6UMCYGAADAA0hiAAAwkCUvJDE+PlEMSQwAADASSQwA\nACZixl6SGAAAYCaSGAAADGRZ8sLdSR49fJ2RxAAAACNRxAAAACNxOQkAAAMx2R1JDAAAMBRJDAAA\nBiKJIYkBAACGIokBAMBETHZHEgMAAMxEEgMAgIEYE0MSAwAADEUSAwCAgUhiSGIAAIChSGIAADCS\n5YWkhCQGAACg3pHEAABgIG+MifH48euIJAYAABiJIgYAABiJy0kAAJiIxw6QxAAAADORxAAAYCAG\n9pLEAAAAQ5HEAABgIMuSF5IYzx6+rkhiAACAkUhiAAAwkMXdSSQxAADATCQxAAAYiLuTSGIAAICh\nSGIAADAQY2JIYgAAgKFIYgAAMJAlL4yJ8fEohiQGAAAYiSIGAAAYictJAACYyAtXk5y+fTWJJAYA\nAJiJJAYAAAPZbJYsm2ejEqfNUplHW6gbkhgAAGAkihgAAAx0/qkDnn7V1pw5c5SYmKjExEQdOHDA\nbd3x48d1zz33aPjw4Xr88cfd1p07d04JCQnKyMiotg2KGAAAUK92796tnJwcpaena+HChZo9e7bb\n+rlz5+o3v/mN1q9fr6uuukpHjx51rfvjH/+o5s2blz8bqhoUMQAAGMiyLK+8aiMrK0txcXGSpKio\nKBUUFKiwsFCSVFZWpr1796p3796SpBkzZigiIkKSdPDgQX311Vfq06ePnE5nte1QxAAAgHrlcDgU\nGhrqWg4LC1Nubq4k6dSpU2ratKlmz56tkSNHat68ea7tFixYoGnTptW4He5OAgDAQHUds1KzRsr/\nIz4+/pJVKSkpSk1NrdFhnE6nK9UpKSnRsWPHlJycrFatWumhhx7S1q1bVVhYqG7duqlNmzY1SmEk\nihgAAFCNzMxM1yWfmrDb7crLy3MtOxwOhYeHS5JCQ0PVtm1btW7dWpLUvXt3HTx4UP/85z915MgR\nbd26VSdOnFBAQIBatWqlHj16VNoORQwAAAaqy5iVy2ikVrvFxsZq+fLlSkxMVHZ2tux2u4KCgiRJ\nV111lcLDw3XkyBFFRkbq008/1a233qoHH3zQtf+yZcsUERFRZQEjUcQAAIB61rVrV0VHR2vEiBGy\nLEuzZs1SRkaGQkJClJCQoMcee0xTp05VaWmp2rdvr379+tWqHYoYAABQ7yZMmOC2HBMT43r/05/+\nVOvWrat035SUlBq1QREDAICRPH85ySlPjxyuG26xBgAARiKJAQDAQN64xdrjt3DXUYMkMVU9T2Hn\nzp0aNmyYEhMT9fzzzzdE9wAAgAG8XsRU9zyF2bNna8WKFUpPT9euXbt08OBBb3cRAACf58uPHfAW\nrxcxVT1P4ciRI2rWrJlatmwpSbrlllu0a9cub3cRAAAYwOtFTEXPU3A4HJKk3NxchYWFua07/6wF\nAADwH+fHxHj65csafGBvVc9HqMmzE5YuXaply5bVZ5cAAKi1uj5nCDXn9SKmqucpXH311a5URpK+\n+eYb2e32Ko+Xmpp6yS/G0aNHK/wlAgDA0y73OUO1VZ6UeDYq8fUkxuuXk2JjY5WZmSlJlzxPoW3b\ntiosLNTJkydVVlamHTt2qGfPnt7uIgAAMIDXk5jqnqcwc+ZM13TDAwcOVLt27bzdRQAAfB7zxDTQ\nmJiqnqfQrVs3rV+/3ttdAgAAhmnwgb0AAODyeWMeF+aJAQAA8ACKGAAAYCQuJwEAYCAG9pLEAAAA\nQ5HEAABgJG88oNG3oxiSGAAAYCSSGAAADMSYGJIYAABgKJIYAAAMxGR3JDEAAMBQJDEAABiIMTEk\nMQAAwFAkMQAAGIgxMSQxAADAUCQxAAAYiDExJDEAAMBQFDEAAMBIXE4CAMBADOwliQEAAIYiiQEA\nwEAkMSQxAADAUCQxAAAYyseDEo8jiQEAAEYiiQEAwECMiSGJAQAAhiKJAQDAQDx2gCQGAAAYiiQG\nAAADlScxnh4T49HD1xlJDAAAMBJJDAAABmJMDEkMAAAwFEUMAAAwEpeTAAAwkM2yZPPw9R5PH7+u\nSGIAAICRSGIAADAQA3tJYgAAgKFIYgAAMJEXHgDp61EMSQwAADASSQwAAAaySbJ5OCjx9aTD1/sH\nAABQIZIYAAAMZHlhTIzHx9zUEUkMAAAwEkkMAAAGYp4YkhgAAGAokhgAAFDv5syZo3379kmSpk+f\nrk6dOrnW9e3bV61bt5bNVp6lLFq0SHa7Xa+//rpWr16tkpISjRs3Tn379q2yDYoYAAAMZP37n6fb\nqI3du3crJydH6enpOnz4sCZPnqy0tDS3bVauXKnGjRu7lr/99lstX75cGRkZ+u677/TMM89UW8Rw\nOQkAANSrrKwsxcXFSZKioqJUUFCgwsJCt22cTqfb8q5du/TLX/5SV111lcLDwzV79uxq26GIAQDA\nQDbLO6/acDgcCg0NdS2HhYUpNzfXbZvp06dr5MiRmjt3rpxOp3JycvTDDz9o7NixGjlypHbs2FFt\nO1xOAgAAVYqPj7/ks5SUFKWmptZof6fT6TbnzLhx49SrVy81a9ZMqamp2rJli0pKSvTtt9/queee\n0/HjxzVq1Cht377dNW6mIhQxAACYyIsPgMzMzFRERESNd7Pb7crLy3MtOxwOhYeHu5aHDBniet+n\nTx9lZ2crMjJSXbt2lWVZatOmjZo3by6HwyG73V5pO1xOAgAA9So2NlaZmZmSpOzsbNntdgUFBUmS\nCgoKlJSUpHPnzkkqHwT8s5/9TN27d9fOnTtd2xQUFKhFixZVtkMSAwCAgXx5sruuXbsqOjpaI0aM\nkGVZmjVrljIyMhQSEqKEhATFxcUpMTFRQUFBio6O1qBBg2RZlnr16qWRI0eqqKhI06ZNk5+fX5Xt\nUMQAAIB6N2HCBLflmJgY1/vk5GQlJydfss99992n++67r8ZtUMQAAGAgm2XJ5uEoxtPHryvGxAAA\nACORxAAAYCBfHhPjLSQxAADASCQxAAAYyJLn54nx9LOZ6ookBgAAGIkiBgAAGInLSQAAGIiBvSQx\nAADAUCQxAAAYyLI8PxkdSQwAAIAHkMQAAGAg698vT7fhy0hiAACAkUhiAAAwkGV5YbI7Hx8UQxID\nAACMRBIDAICBbFb5y9Nt+DKSGAAAYCSSGAAADMSYGJIYAABgKJIYAAAMxLOTSGIAAIChKk1iysrK\nqtzRZqP+AQAADafSIua6666rdCfLsvT55597pEMAAKB6DOytooj54osvvNkPAACAy1LtNaFTp07p\nd7/7nX77299KkrZt2yaHw+HxjgEAgMpZ+s+Ed556+XYOU4MiZurUqYqKitLx48clScXFxZo8ebLH\nOwYAAFCVaouYM2fO6L777pO/v78k6bbbbtPZs2c93jEAAFC582NiPP3yZdUWMWVlZSopKXEtnzp1\niiIGAAA0uGonuxs5cqSGDRsmh8OhsWPHau/evZo4caI3+gYAACphyfNjVnw7h6lBEfOrX/1Kv/jF\nL/SPf/xDkvTEE0+oTZs2Hu8YAABAVaotYs6cOaOtW7fqyy+/lGVZcjgcGjZsmIKCgrzRPwAAUAGb\nZcnm4TErnj5+XVU7Juahhx5Sdna2rr/+enXs2FGfffaZxo4d642+AQAAVKraJKakpESzZ892LY8c\nOVIjR470aKcAAEDVeABkDZKYdu3aKS8vz7V86tQp/eQnP/FknwAAAKpVaRJz9913S5KKiooUHx+v\nDh06yGaz6csvv9S1117rtQ4CAABUpNIiZty4cZXu5OuT3wAA8GPHAyCrKGJuvvlm1/vvvvtOZ86c\nkVQ+RmbChAlav36953sHAABQiWoH9i5btkxr1qzR2bNnFRISosLCQt1+++3e6BsAAKiMFwb2+vps\nd9UO7H333Xf14YcfqkuXLtq5c6eeffZZtW/f3ht9AwAAqFS1RUxQUJD8/f1VXFwsSerdu7c++OAD\nj3cMAABU7vxkd55++bJqLycFBwdr06ZNat++vWbMmKGoqCjl5ubWqdE5c+Zo3759kqTp06erU6dO\nrnV9+/ZV69atZbOV11cLFy7U1VdfXaf2AADAj0+1RcyiRYt06tQpxcfHa+XKlTpx4oQWL15c6wZ3\n796tnJwcpaen6/Dhw5o8ebLS0tLctlm5cqUaN25c6zYAAPixY7K7KoqYI0eOuC3n5+dr2LBhkup2\ny1VWVpbi4uIkSVFRUSooKFBhYaGaNGni2sbpdNb6+AAA4MpQaRFz3333Vbnju+++W6sGHQ6HOnbs\n6FoOCws/mDQBAAAdVElEQVRTbm6uWxEzffp05eTkqEuXLpoyZYrP36cOAIC3WfL8PC6+/te30iKm\ntkXK5XI6nW4nYdy4cerVq5eaNWum1NRUbdmyRYMHD67VsZc/84ha2FvXV1eBK8ZNT21r6C4AxnEW\n5lW/EepVtWNi6pvdbnd7FpPD4VB4eLhreciQIa73ffr0UXZ2dpXHW7p0qZYtW1b/HQUAoBbi4+Mv\n+SwlJUWpqan12o5NNbjFuB7a8GVe719sbKwyMzMlSdnZ2bLb7QoKCpIkFRQUKCkpSefOnZNUPgj4\nZz/7WZXHS01N1T//+U+31/njAwDgbZmZmZf8XarvAgblvJ7EdO3aVdHR0RoxYoQsy9KsWbOUkZGh\nkJAQJSQkKC4uTomJiQoKClJ0dLQGDRrk7S4CAODzeHZSDYqYQ4cO6emnn1ZBQYH++te/6pVXXtEv\nfvELXXPNNbVudMKECW7LMTExrvfJyclKTk6u9bEBAMCVodrLSU888YTuuusu1+Rz11xzjR5//HGP\ndwwAAKAq1RYxTqdTCQkJriLmF7/4hes9AABoGJYl2Tz88vGrSdUXMaWlpTpz5oxr+f/+7/909uxZ\nj3YKAACgOtWOiXnooYc0dOhQnTp1SkOGDNGJEyc0b948b/QNAABU4nxa4uk2fFm1RUyvXr30+uuv\n61//+pckKTo6WoGBgR7vGAAAQFWqLWKeeeYZWZblep7R9u3bJZXPrAsAABoGt1jXYEyMn5+f/Pz8\n1KhReb3z97//XadPn/Z4xwAAAKpSbRJz8SyDZWVlSklJ8ViHAABA9RgTU4vHDpSUlOjrr7/2QFcA\nAMCPxZw5c5SYmKjExEQdOHCgwm0WLVqkpKQkSdLp06f10EMP6Z577tHw4cNr9CDqGg3svfCa2Hff\nfaeBAwfW9GcAAAAeYHlhHpfaHn/37t3KyclRenq6Dh8+rMmTJystLc1tmy+//FIff/yx/P39JUkb\nNmzQtddeq3HjxikvL0+jRo1S3759q2yn2iLmr3/9q2tQryQFBQUpLCysNj8TAAC4AmRlZSkuLk6S\nFBUVpYKCAhUWFqpJkyaubebPn6/x48fr2WeflSSFh4e7rvQUFBTUqNaosohxOp2aN2+eqwEAAOAb\nLMuSzUfvTnI4HOrYsaNrOSwsTLm5ua4iZuPGjerevbvatGnj2mbgwIF69dVXNXjwYOXl5em5556r\ntp0qixjLstS2bVtt2rRJnTt3dt2hJEmRkZGX/UMBAADzxMfHX/JZSkrKJTf/VMbpdLoKovz8fG3e\nvFkrV67U8ePHXdtkZGSoffv2WrVqlQ4ePKhJkyZp48aNVR632stJb7/9tt5+++1LPq/JgBsAAOAZ\nNtXi7pxatCFJmZmZioiIqPF+drtdeXl5rmWHw6Hw8HBJ0kcffaTc3FzdfffdKioq0uHDh/X000/r\n+++/V+/evSVJHTp0UF5enoqLi11jZipSaRHz2muvaciQIRQrAADgssTGxmr58uVKTExUdna27Ha7\ngoKCJEn9+/dX//79JUk5OTmaOnWqpk2bphdeeEEHDhxQQkKCTp48KT8/vyoLGKmKIubVV1/VkCFD\n6vFHAgAAV4KuXbsqOjpaI0aMkGVZmjVrljIyMhQSEqKEhATXdhdeZrr77rs1ZcoUjR49WsXFxXrq\nqaeqbafay0kAAMD3WPLCLdZ12HfChAluyzExMZdsExERoTVr1kiSgoODazSY90KVFjH79u1zXZu6\nmGVZeu+99y6rIQAAgPpUaRFz3XXXafHixW5zxAAAAN9g88It1p4+fl1VWsQEBASobdu23uwLAABA\njVVaxFx//fXe7AcAALgMvvzYAW+p9BbzSZMmebMfAAAAl4W7kwAAMJBlSTaSGAAAAPOQxAAAYCDu\nTiKJAQAAhiKJAQDAQNydRBIDAAAMRRIDAICBbF64O8nTx68rkhgAAGAkihgAAGAkLicBAGAg69//\nPN2GLyOJAQAARiKJAQDAQDx2gCQGAAAYiiQGAAAD2eSFW6w9e/g68/X+AQAAVIgkBgAAA1mWJcvD\ng1Y8ffy6IokBAABGIokBAMBAPHaAJAYAABiKJAYAAANZlufncfHxITEkMQAAwEwUMQAAwEhcTgIA\nwEDljx3w9C3WHj18nZHEAAAAI5HEAABgIG6xJokBAACGIokBAMBA3GJNEgMAAAxFEgMAgIFssmST\nZ6MSTx+/rkhiAACAkUhiAAAwEGNiSGIAAIChSGIAADCQJc/P4+LjQQxJDAAAMBNJDAAABrJZlsef\nneTp49cVSQwAADASRQwAADASl5MAADAQt1iTxAAAAEORxAAAYCAG9pLEAAAAQ5HEAABgIMbEkMQA\nAABDkcQAAGAgS55PInw8iCGJAQAAZiKJAQDAQJZlyfLwoBVPH7+uSGIAAICRSGIAADCQJc+PWfHt\nHIYkBgAAGIokBgAAAzFjL0kMAAAwFEUMAAAwEkUMAAAGsrz0qq05c+YoMTFRiYmJOnDgQIXbLFq0\nSElJSZe1z4UYEwMAAOrV7t27lZOTo/T0dB0+fFiTJ09WWlqa2zZffvmlPv74Y/n7+9d4n4uRxAAA\nYKDzD4D09Ks2srKyFBcXJ0mKiopSQUGBCgsL3baZP3++xo8fL6fTKUnatWtXtftcjCIGAADUK4fD\nodDQUNdyWFiYcnNzXcsbN25U9+7d1aZNG9dneXl5Ve5TES4nAQBgJM8/duD8qJj4+PhL1qSkpCg1\nNbVGR3E6na6+5ufna/PmzVq5cqWOHz9eo30qQxEDAACqlJmZqYiIiBpvb7fblZeX51p2OBwKDw+X\nJH300UfKzc3V3XffraKiIh0+fFhPP/10lftUhstJAAAYyOalV23ExsYqMzNTkpSdnS273a6goCBJ\nUv/+/bVlyxa98sorWrZsma677jpNmzatyn0qQxIDAADqVdeuXRUdHa0RI0bIsizNmjVLGRkZCgkJ\nUUJCgmu7Cy8ZVbRPdShiAAAwkGV5fkxMXY4/YcIEt+WYmJhLtomIiNCaNWsq3ac6XE4CAABGIokB\nAMBAdZ1Rt6Zt+DKSGAAAYCSKGAAAYCQuJwEAYKDyxwJ4emCvRw9fZyQxAADASCQxAAAYqC6T0V1O\nG77M1/sHAABQIZIYAABM5IXJ7nx9UAxJDAAAMBJJDAAABmKyO5IYAABgqAYpYr744gslJCRo3bp1\nl6zbuXOnhg0bpsTERD3//PMN0DsAAHxf+Twxnn/5Mq8XMWfPntW8efPUs2fPCtfPnj1bK1asUHp6\nunbt2qWDBw96uYcAAMAEXi9iAgICtGLFCrVs2fKSdUeOHFGzZs1c62655Rbt2rXL210EAMDn2WR5\n5eXLvF7E+Pn5KSAgoMJ1ubm5CgsLcy2HhYUpNzfXW10DAAAG8am7ky6+393pdFa7z9KlS7Vs2TJP\ndQkAgMsSHx9/yWcpKSlKTU2t13a8MWbF18fE+FQRY7fb5XA4XMvffPON7HZ7lfukpqZe8otx9OjR\nCn+JAADwtMzMTEVERDR0N64IDXaLdUUpS9u2bVVYWKiTJ0+qrKxMO3bsqHQAMAAAuLJ5PYnZt2+f\npk+frry8PPn5+SktLU1Dhw5VZGSkEhISNHPmTKWkpEiSBg4cqHbt2nm7iwAA+Dzr3/883YYv83oR\n06VLF23evLnS9d26ddP69eu92CMAAGAinxoTAwAAaoaBvTx2AAAAGIokBgAAA1lemIzO18fEkMQA\nAAAjkcQAAGAgxsSQxAAAAEORxAAAYCBLXkhiPHv4OiOJAQAARiKJAQDAQMzYSxIDAAAMRRIDAICB\nbFb5y9Nt+DKSGAAAYCSKGAAAYCQuJwEAYCAG9pLEAAAAQ5HEAABgIi88dsDHgxiSGAAAYCaSGAAA\nDMSYGJIYAABgKJIYAAAMxGR3JDEAAMBQJDEAABjIkufHrPh4EEMSAwAAzEQSAwCAgSwvzBPj8Xlo\n6ogkBgAAGIkkBgAAA1ny/JgVHw9iSGIAAICZKGIAAICRuJwEAICBLMuSzcMjby0fH9lLEgMAAIxE\nEgMAgIEY2EsSAwAADEUSAwCAiYhiSGIAAICZSGIAADCQ9e9/nm7Dl1HEAACAejdnzhzt27dPkjR9\n+nR16tTJtW7dunXasGGDbDabYmJi9Pvf/16WZWn27Nnat2+fSktLlZycrEGDBlXZBkUMAAAG8uUH\nQO7evVs5OTlKT0/X4cOHNXnyZKWlpUmSzpw5o3feeUfr16+Xn5+f7rvvPn3yyScqKirS0aNHtX79\nep0+fVq33347RQwAAPCurKwsxcXFSZKioqJUUFCgwsJCNWnSRMHBwVq9erUk6dy5czp79qzCw8MV\nERGhLl26SJKCg4NVWlqq0tJS+fn5VdoOA3sBADCQ5aVXbTgcDoWGhrqWw8LClJub67bNCy+8oH79\n+qlXr15q166d/Pz8FBQUJEnasGGDevToUWUBI1HEAAAAD3M6nZc8wmDMmDHatm2b9u7dq7///e+u\nz7dt26b09HQ9+eST1R6XIgYAAFQpPj5eMTExbq+lS5dWur3dbldeXp5r2eFwKDw8XJKUn5+v3bt3\nS5ICAgLUs2dP7dmzR5L0/vvv68UXX9TKlSsVHBxcbb8YEwMAgKm8dAd0ZmamIiIiarx9bGysli9f\nrsTERGVnZ8tut7suFf3www967LHHtGXLFgUGBmrv3r0aOHCgvv32Wy1cuFBr1qxR06ZNa9QORQwA\nAKhXXbt2VXR0tEaMGCHLsjRr1ixlZGQoJCRECQkJGjNmjEaNGuW6xbpfv3565ZVXVFBQoEceecR1\nnPnz56t169aVtkMRAwCAgXx9srsJEya4LcfExLjeJyYmKjEx0W39qFGjNGrUqMtqgzExAADASCQx\nAAAYyJcnu/MWkhgAAGAkkhgAAAxUl8noLqcNX0YSAwAAjEQSAwCAiYhiSGIAAICZSGIAADCS5+eJ\n8fUohiQGAAAYiSQGAAADMU8MSQwAADAURQwAADASl5MAADAQd1iTxAAAAEORxAAAYCKiGJIYAABg\nJpIYAAAMZHlhsjvPT6ZXNyQxAADASCQxAAAYiMnuSGIAAIChSGIAADAQNyeRxAAAAEORxAAAYCKi\nGJIYAABgJpIYAAAMxDwxJDEAAMBQFDEAAMBIXE4CAMBATHZHEgMAAAxFEgMAgKF8PCjxOJIYAABg\nJJIYAABMdYVHMSQxAADASCQxAAAYiMnuSGIAAIChSGIAADAQ88SQxAAAAEORxAAAYCBLnr85yceD\nGJIYAABgJpIYAABMRBRDEgMAAMxEEQMAAIzE5SQAAAxUfjXJ05Pd+TaSGAAAYCSSGAAADMRkdyQx\nAADAUCQxAAAYiDusSWIAAIChSGIAADARUQxJDAAAMBNJDAAABrL+/c/TbfiyBklivvjiCyUkJGjd\nunWXrOvbt69Gjx6tpKQkJSUl6eTJkw3QQwAA4Ou8nsScPXtW8+bNU8+ePSvdZuXKlWrcuLEXewUA\ngGG8ME+Mjwcx3k9iAgICtGLFCrVs2bLSbZxOpxd7BAAATOT1JMbPz09+fn5VbjN9+nTl5OSoS5cu\nmjJliqzLLDVLS0slSfmOb2rdT+BK5izMa+guAMZxfv+tpP/8DYLn+dzA3nHjxqlXr15q1qyZUlNT\ntWXLFg0ePLjS7ZcuXaply5ZVvO6JFE91EwCACvXr1++Sz1JSUpSamlqv7XCHtQ8WMUOGDHG979On\nj7Kzs6vcPjU19ZJfjHPnzqlz58565513qk190DDi4+OVmZnZ0N1ABTg3vo3z47tKS0vVr18/7d+/\nX4GBgQ3dnStCgxUxFY17KSgoUEpKil588UUFBgZq9+7d6tWr12Uf+/wvT7t27ercT3hOREREQ3cB\nleDc+DbOj2/zWgHj41HMnDlztG/fPknlw0Q6derkWvfBBx9oyZIl8vf3V2RkpObOnesKHc6dO6dB\ngwbp4Ycf1p133lllG14vYvbt26fp06crLy9Pfn5+SktL09ChQxUZGamEhATFxcUpMTFRQUFBio6O\n1qBBg7zdRQAAUAe7d+9WTk6O0tPTdfjwYU2ePFlpaWmu9U8++aT++te/Kjw8XBMmTND27duVkJAg\nSfrjH/+o5s2b12g8rNeLmC5dumjz5s2Vrk9OTlZycrIXewQAgHl8ebK7rKwsxcXFSZKioqJUUFCg\nwsJCNWnSRJK0adMmBQcHS5JatGihM2fOSJIOHjyor776Sn369KnRnco8dgAAANQrh8Oh0NBQ13JY\nWJhyc3Ndy+cLmNzcXL3//vvq3bu3JGnBggWaNm1ajdvxuYG99SUlhTuTfBnnx3dxbnwb58e3efP8\nWF6Y7K6+ju90Oi+5PORwOPTQQw9p+vTpCg0N1aZNm9StWze1adOmxvPF/WiLmPq+lQ31i/Pjuzg3\nvo3z49t+rOcnPj7+ks+qum3cbrcrL+8/8005HA6Fh4e7lr/77juNGTNG48ePd83g//777+vIkSPa\nunWrTpw4oYCAALVq1Uo9evSotF8/2iIGAIAfM2/enJSZmXlZd8XFxsZq+fLlSkxMVHZ2tux2u4KC\nglzrZ8+ereTkZLdHEC1ZssT1ftmyZYqIiKiygJEoYgAAQD3r2rWroqOjNWLECFmWpVmzZikjI0Mh\nISGKjY3V3/72Nx07dkyvvPKKJGnw4MFKTEy87HYoYgAAMJAlL4yJqcO+EyZMcFuOiYlxvf/000+r\n3LemY4u4OwkAABiJJAYAACP5+JS9XkASAwAAjPSjSWKKi4s1depUHTt2TJZlad68eYqMjHTbpmPH\njrrhhhtcy6tXr5bNRh3nSVU9O2Pnzp1atGiR/Pz81KdPH40dO7ahunnFqur89O3bV61bt3Z9RxYu\nXKirr766Qfp5pfriiy+UkpKiBx54QKNHj3Zbx/enYVV1bvjueM+PpojZsmWLWrRooUWLFikrK0uL\nFy92u11LkkJCQrR27doG6uGVp7pnZ8yePVurV69Wy5YtlZSUpP79+6tDhw4N2OMrS3XnR5JWrlyp\nxo0bN1APr2xnz57VvHnz3G5BvRDfn4ZT3bmRvPPdMWmyO0/50cQQFz6n4aabbtKePXsauEeo7NkZ\nknTkyBE1a9ZMLVu2lCTdcsst2rVrV4P19UpU1fk5r6azZqL+BQQEaMWKFa7vyIX4/jSsqs7NeXx3\nvONHU8Q4HA6FhYVJkmw2myzLUklJids2P/zwgx599FElJibqhRdeaIhuXlEqenaGw+GQVP68jPPn\n6/y6C5+rAc+r7tkmUvklppEjR2ru3Ln8j7KX+fn5KSAgoMJ1fH8aVlXn5jxvfHcsL718mZGXk9av\nX69XX33V7bP9+/e7LVf0SzNt2jTdcccdstlsGj16tLp16+Y2RgaeVdUXmT+QDe/iZ5uMGzdOvXr1\nUrNmzZSamqotW7Zo8ODBDdhDnHfxM2j4/vgWvjveY2QRM3z4cA0fPtzts2nTpsnhcCgmJkalpaVy\nOp1q1Mj9x7twNsBf/vKXys7OpojxoKqenXH11Ve7UhlJ+uabb2S3273exytZdc82GTJkiOt9nz59\nlJ2d7dX+oXJ2u53vjw/z2nfHC2NifD2K+dFcToqNjVVmZqYk6YMPPlD37t3d1h88eFAPP/ywnE6n\nysrKtGfPHv3sZz9riK5eMS48Jxc/O6Nt27YqLCzUyZMnVVZWph07dlQ5SA71r6rzU1BQoKSkJJ07\nd05S+SBgvi8No6KUhe+Pb6jo3PDd8S4jk5iKDBw4UB9++KFGjRolf39/LViwQJL0wgsv6KabblKX\nLl0UGRmpoUOHqlGjRurZsycpjIdV9eyMhIQEzZw50zW19MCBA9WuXbsG7vGVpbrzExcXp8TERAUF\nBSk6OlqDBg1q6C5fUfbt26fp06crLy9Pfn5+SktL09ChQxUZGcn3p4FVd2689d2x/v3Pkzx9/Lqy\nnFxMBQDAGEePHlV8fLzSNr2t1m3aerSt48dyNPJX/S/7Kdbe8qNJYgAAuKLw1IEfz5gYAABwZSGJ\nAQDAQAQxJDEAAMBQJDEAABiIZyeRxAAAAENRxAAN7OjRo/qv//ovJSUlKSkpScOGDVNqaqry8/Nr\nfcz169dr2rRpkqTx48frm2++qXTbvXv36siRIzU+dklJia699tpLPl+6dKmeeeaZKvft27fvZbU1\ndepUrV+/vsbbA7iyUMQAPqBFixZau3at1q5dqw0bNigyMlJLly5126a2UzotXry4yinpN2zYoKNH\nj9bq2Be6+Hk+lbmcn8OyrBofF7jSWF7658sYEwP4oG7dumndunWSytOL/v37KycnR88++6w2bdqk\ntLQ02Ww2BQcHa+7cuQoLC9OqVau0fv16RUVFqUWLFq5j9e3bV6tXr1abNm00a9Ys/etf/1JRUZGS\nk5MVGBiot99+W59++qmmTZumNm3a6Mknn1RRUZHOnj2rlJQU17Nfxo8fr5YtW+rGG2+stv9r167V\na6+9psDAQDVq1EjPPPOMmjdvLklKT0/X559/rmPHjunxxx9Xz549deTIkQrblXi4IYDKUcQAPqa0\ntFRbt251eyxGTEyMpkyZopycHK1atUobNmxQo0aNtHr1aj333HN65JFHtHz5cmVmZio4OFiPPPKI\nmjRp4trf6XTqtdde0/fff6+0tDSdPn1a48eP14oVK3Tttddq7Nixuvnmm/XAAw8oNTVVN9xwg3Jz\nczVs2DC9++67Wrp0qe6//34NGzZM27dvr9HP8Oc//1nBwcGaNWuWNm3apPvvv19S+cM/J06cqI8/\n/lhPPfWUevbsqRkzZlTYLoAqcI81RQzgC06dOqWkpCRJUlFRkW688UaNGTPGtb5z586SpP3798vh\ncOiBBx6QJBUXF6t169b6+uuvFRUVpeDgYEnSzTffrE8//dStjf3796tbt26SpJCQEL344ouX9GPP\nnj1asmSJa7lx48bKzc1Vdna2xo0bJ0m66aabqv15AgMD9etf/1r+/v7KycnR0KFDXet69OghSbr+\n+ut18ODBKtsFgKpQxAA+ICwsTGvXrq10faNG5V9Vy7LUuXNnPf/8827r9+7d6zZ2pLS09JJjWJZV\n4ecXb/P8888rJCTE7XOn0+k6fnWXdw4dOqRly5bpjTfeUNOmTV0PY72wjYvfV9YugMoRxDCwFzDK\n9ddfr7179+rUqVOSpDfffFPbtm1T+/btdejQIZ05c0aStGvXrkv27dy5s+vz06dP66677lJRUZFs\nNpuKi4sllT/Z+q233pJUng7Nnj1bktShQwf94x//qPTYFzpz5oyaNWumpk2b6vTp0/rggw9UUlLi\nWp+VlSWp/EnA0dHRVbYLAFUhiQF8QE3vwGnbtq0mTpyo5ORkNW7cWI0bN9b8+fPVvHlzJScnKzEx\nUW3btlVERITOnTvndvw77rhDn3zyiRITE1VcXKwHHnhAAQEBio2N1axZszR16lQ9+eSTeuyxx5SR\nkaGzZ8/q4YcfliSlpKRo0qRJ2rRpk7p16+ZKhir6Oa677jpFRUXprrvuUtu2bfXggw/q6aefVs+e\nPSVJeXl5+u///m8dO3ZMM2fOlKRK272c/26AKw2T3UmWk6H/AAAY4+jRo4qPj9eGLe+odZu2Hm3r\n+LEcDRvUT5mZmYqIiPBoW7VBEgMAgIG8MY+Lr88Tw5gYAABgJJIYAABM5IUxMT4exJDEAAAAM1HE\nAAAAI1HEAAAAI1HEAAAAIzGwFwAAA1nywmR3nj18nZHEAAAAI5HEAABgICa7I4kBAACGIokBAMBA\nPACSJAYAABiKJAYAAANZ8vzdQz4exJDEAAAAM5HEAABgIqIYkhgAAGAmkhgAAAxUHsR4ep4Y30YS\nAwAAjEQRAwAAjMTlJAAADMRkdyQxAADAUCQxAAAYiDusSWIAAIChSGIAADARUQxJDAAAMBNJDAAA\nRrI8Ptmdr0cxJDEAAMBIJDEAABiIeWIoYgAAMNLJEyd+FG3UBUUMAAAGeuDe0Q3dhQZnOZ1OZ0N3\nAgAA1ExJSYlOeDkhadWqlRo18r3cgyIGAAAYibuTAACAkShiAACAkShiAACAkShiAACAkf4f4/IM\nICv9CaMAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Normalize the confusion matrix by row (i.e by the number of samples in each class)\n", "cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", "print('Normalized confusion matrix')\n", "print(cm_normalized)\n", "plt.figure(figsize=(8, 8))\n", "plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Make prediction with the model\n", "\n", "Randomly pick an anonymous user from the dataset.\n", "\n", "Example: Predict whether the user will give a business a Five-star rating." ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def predict_given_user_biz(user, biz, biz_name):\n", " a_user = user.copy()\n", " a_biz = biz.copy()\n", " for column in {\n", " 'review_count',\n", " 'stars',\n", " }:\n", " a_biz[column+'_biz'] = a_biz[column]\n", " a_biz.drop(column, axis=1, inplace=True)\n", " for column in {\n", " 'date_delta',\n", " 'votes.funny',\n", " 'votes.useful',\n", " 'votes.cool',\n", " }:\n", " a_user[column+'_user'] = a_user[column]\n", " a_user.drop(column, axis=1, inplace=True)\n", " a_X_test = pd.concat([a_user.iloc[0], a_biz.iloc[0]], axis=0).to_frame().transpose()\n", " a_X_test['review_id'] = 'xun_on_postino-arcadia-phoenix'\n", " a_X_test = a_X_test.set_index('review_id')\n", " for column in {\n", " 'votes.cool_review',\n", " 'votes.funny_review',\n", " 'votes.useful_review',\n", " 'date_delta_review',\n", " 'text_len',\n", " }:\n", " a_X_test[column] = \"\"\n", " a_X_test[column] = '0.0'\n", " # XXX(xun): fix this\n", " a_X_test['attributes.BYOB/Corkage'] = 4\n", " #a_X_test\n", " a_X_test_scaled = scaler.transform(a_X_test)\n", " a_y_pred = lrc_fit.predict(a_X_test_scaled)\n", " print 'prediction for user ', a_user.index.values[0], ' on business ', biz_name, ' is ', a_y_pred[0]" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "user_id\n", "HcOguFNyg9jNkNpTBD2D3g 4\n", "Name: review_count, dtype: int64" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a_user = user_df[user_df.index == 'HcOguFNyg9jNkNpTBD2D3g']\n", "a_user.review_count" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [], "source": [ "a_biz = biz_df[biz_df.index == 'SDwYQ6eSu1htn8vHWv128g']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# https://www.yelp.com/biz/postino-arcadia-phoenix\n", "\n", "\"postino-arcadia-phoenix\"\n" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "prediction for user HcOguFNyg9jNkNpTBD2D3g on business Postino Arcadia Phoenix is True\n" ] } ], "source": [ "predict_given_user_biz(user=a_user, biz=a_biz, biz_name=\"Postino Arcadia Phoenix\")" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "user_id\n", "o625WyBtvJ_G3s0FRr6RmQ 10\n", "Name: review_count, dtype: int64" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "another_user = user_df[user_df.index == 'o625WyBtvJ_G3s0FRr6RmQ']\n", "another_user.review_count" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": true }, "outputs": [], "source": [ "another_biz = biz_df[biz_df.index == '1n0n_-Iz0e3iVpH8sereiA']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# https://www.yelp.com/biz/port-authority-of-allegheny-county-pittsburgh\n", "\n", "\"port-authority-of-allegheny-county-pittsburgh\"" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "prediction for user HcOguFNyg9jNkNpTBD2D3g on business Postino Arcadia Phoenix is True\n" ] } ], "source": [ "predict_given_user_biz(user=a_user, biz=another_biz, biz_name=\"Postino Arcadia Phoenix\")" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "prediction for user o625WyBtvJ_G3s0FRr6RmQ on business Port Authority of Allegheny County is False\n" ] } ], "source": [ "predict_given_user_biz(user=another_user, biz=another_biz, biz_name=\"Port Authority of Allegheny County\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }