{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Mô tả dữ liệu\n", "- Dataset: [Dresses_Attribute_Sales](https://archive.ics.uci.edu/ml/datasets/Dresses_Attribute_Sales)\n", "- Associated Tasks: Classification, Clustering" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# import all necessary library\n", "import pandas as pd\n", "import numpy as np\n", "from pyspark.sql import SparkSession\n", "from pyspark.sql import functions as F\n", "from pyspark.sql.types import *\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# initialize Spark\n", "spark = SparkSession.builder \\\n", " .master(\"local\") \\\n", " .appName(\"Data description Spark\") \\\n", " .getOrCreate()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+----------+-------+-------+------+----+------+--------+------------+---------+-------------+----------+----------+------------+--------------+\n", "| Dress_ID| Style| Price|Rating|Size|Season|NeckLine|SleeveLength|waiseline| Material|FabricType|Decoration|Pattern Type|Recommendation|\n", "+----------+-------+-------+------+----+------+--------+------------+---------+-------------+----------+----------+------------+--------------+\n", "|1006032852| Sexy| Low| 4.6| M|Summer| o-neck| sleevless| empire| null| chiffon| ruffles| animal| 1|\n", "|1212192089| Casual| Low| 0.0| L|Summer| o-neck| Petal| natural| microfiber| null| ruffles| animal| 0|\n", "|1190380701|vintage| High| 0.0| L|Automn| o-neck| full| natural| polyster| null| null| print| 0|\n", "| 966005983| Brief|Average| 4.6| L|Spring| o-neck| full| natural| silk| chiffon|embroidary| print| 1|\n", "| 876339541| cute| Low| 4.5| M|Summer| o-neck| butterfly| natural|chiffonfabric| chiffon| bow| dot| 0|\n", "+----------+-------+-------+------+----+------+--------+------------+---------+-------------+----------+----------+------------+--------------+\n", "only showing top 5 rows\n", "\n" ] } ], "source": [ "# reading dataset to dataframe\n", "schema = StructType([\n", " StructField(\"Dress_ID\", StringType(), True),\n", " StructField(\"Style\", StringType(), True),\n", " StructField(\"Price\", StringType(), True),\n", " StructField(\"Rating\", FloatType(), True),\n", " StructField(\"Size\", StringType(), True),\n", " StructField(\"Season\", StringType(), True),\n", " StructField(\"NeckLine\", StringType(), True),\n", " StructField(\"SleeveLength\", StringType(), True),\n", " StructField(\"waiseline\", StringType(), True),\n", " StructField(\"Material\", StringType(), True),\n", " StructField(\"FabricType\", StringType(), True),\n", " StructField(\"Decoration\", StringType(), True),\n", " StructField(\"Pattern Type\", StringType(), True),\n", " StructField(\"Recommendation\", IntegerType(), True)])\n", "\n", "df = spark.read \\\n", " .schema(schema) \\\n", " .format(\"com.databricks.spark.csv\") \\\n", " .option(\"header\", \"true\") \\\n", " .load(\"Dresses_Attribute_Sales.csv\")\n", "\n", "df.show(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Các loại dữ liệu\n", "- Nomial\n", "- Numbers\n", "- Odinal\n", "- Ratio" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+-------+--------+-------------+------------+\n", "| Style|NeckLine| Material|Pattern Type|\n", "+-------+--------+-------------+------------+\n", "| Sexy| o-neck| null| animal|\n", "| Casual| o-neck| microfiber| animal|\n", "|vintage| o-neck| polyster| print|\n", "| Brief| o-neck| silk| print|\n", "| cute| o-neck|chiffonfabric| dot|\n", "+-------+--------+-------------+------------+\n", "only showing top 5 rows\n", "\n" ] } ], "source": [ "# Nomial\n", "df_nomial = df[[\"Style\", \"NeckLine\", \"Material\", \"Pattern Type\"]]\n", "df_nomial.show(5)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+----------+\n", "| Dress_ID|\n", "+----------+\n", "|1006032852|\n", "|1212192089|\n", "|1190380701|\n", "| 966005983|\n", "| 876339541|\n", "+----------+\n", "only showing top 5 rows\n", "\n" ] } ], "source": [ "# Numbers\n", "df_numbers = df[[\"Dress_ID\"]]\n", "df_numbers.show(5)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+----+\n", "|Size|\n", "+----+\n", "| M|\n", "| L|\n", "| L|\n", "| L|\n", "| M|\n", "+----+\n", "only showing top 5 rows\n", "\n" ] } ], "source": [ "# Ordinal\n", "df_ordinal = df[[\"Size\"]]\n", "df_ordinal.show(5)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+------+\n", "|Rating|\n", "+------+\n", "| 4.6|\n", "| 0.0|\n", "| 0.0|\n", "| 4.6|\n", "| 4.5|\n", "+------+\n", "only showing top 5 rows\n", "\n" ] } ], "source": [ "# ratio\n", "df_ratio = df[[\"Rating\"]]\n", "df_ratio.show(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Trung tâm dữ liệu\n", "- Trung bình (mean)\n", "$$\\mu = \\bar{x} = \\frac{1}{n} \\sum_{i=1}^n x_i = \\frac{1}{n} (x_1 + ... + x_n)$$\n", "- Trung vị (median)\n", "- Mode" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+-------+-----+--------+--------+------------+\n", "|summary|Style|NeckLine|Material|Pattern Type|\n", "+-------+-----+--------+--------+------------+\n", "| count| 500| 499| 499| 499|\n", "| mean| null| null| null| null|\n", "| stddev| null| null| null| null|\n", "| min|Brief| NULL| acrylic| animal|\n", "| max| work| v-neck| wool| striped|\n", "+-------+-----+--------+--------+------------+\n", "\n" ] } ], "source": [ "df_nomial.describe().show()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+-------+--------------------+\n", "|summary| Dress_ID|\n", "+-------+--------------------+\n", "| count| 500|\n", "| mean| 9.0554168105E8|\n", "| stddev|1.7361896065394258E8|\n", "| min| 1000425584|\n", "| max| 999081623|\n", "+-------+--------------------+\n", "\n" ] } ], "source": [ "df_numbers.describe().show()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+-------+-----+\n", "|summary| Size|\n", "+-------+-----+\n", "| count| 500|\n", "| mean| null|\n", "| stddev| null|\n", "| min| L|\n", "| max|small|\n", "+-------+-----+\n", "\n" ] } ], "source": [ "df_ordinal.describe().show()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+-------+------------------+\n", "|summary| Rating|\n", "+-------+------------------+\n", "| count| 500|\n", "| mean|3.5285999937057495|\n", "| stddev| 2.00536405618619|\n", "| min| 0.0|\n", "| max| 5.0|\n", "+-------+------------------+\n", "\n" ] } ], "source": [ "df_ratio.describe().show()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean rating: 3.52859999371\n" ] } ], "source": [ "mean_rating = df_ratio.agg(F.mean(df_ratio.Rating)).first()[0]\n", "print \"Mean rating:\", mean_rating" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Median rating: 4.59999990463\n" ] } ], "source": [ "sqlContext.registerDataFrameAsTable(df_ratio, \"df_ratio\")\n", "\n", "median_rating = sqlContext.sql(\"\"\"\n", " SELECT percentile(Rating, 0.5) AS median_rating \n", " FROM df_ratio\n", "\"\"\").first()[\"median_rating\"]\n", "\n", "print \"Median rating:\", median_rating" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mode rating: 0.0\n" ] } ], "source": [ "counts = df_ratio.groupBy(\"Rating\").count()\n", "\n", "mode_rating = counts.join(\n", " counts.agg(F.max('count').alias('count')),\n", " on='count'\n", " ).limit(1).select(\"Rating\").first()[\"Rating\"]\n", "\n", "print \"Mode rating:\", mode_rating" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/hongong/virtualenv/trustingsocial/lib/python2.7/site-packages/matplotlib/axes/_axes.py:545: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.\n", " warnings.warn(\"No labelled objects found. \"\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA34AAAFRCAYAAAArTH/eAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xm4XWV99vHvLRFBQFGJFBkMVtSidYCU2mItirYoSnAs\nFC1WKq+KU22r0VrH6ovzUNGKSAUnRBSlQlVE1OorIFFkFEkFNQgGVATEgsDv/WM9R7ch52Qn2fvs\nc9b5fq5rX1nz/p1zrSTrXs+znpWqQpIkSZLUX7ebdAGSJEmSpPEy+EmSJElSzxn8JEmSJKnnDH6S\nJEmS1HMGP0mSJEnqOYOfJEmSJPWcwW8OSlJJPjwwvyjJVUk+u57HuSzJNhtYwx2SfDzJyiRnJlky\nzXb7JLm4bbd8YPnObb+V7TibbkgdkiRptEZ1nbGRNSxLcm6Sc5KcneRh02z35XadcU773L0tf3aS\n89qyryXZdbZq12iN8Lr3y0mWtulTkmw96lrbse+UZFWSd8+wzfOTfDfJBUneNLD8Ze3a+OIkfzmO\n+mZi8Jubfgk8IMnmbf7RwOWzXMMhwM+r6t7A24E3rrlBkk2AI4DHALsCBw78w/tG4O1t/5+340mS\npMmbC9cZpwEPqqoHA88Ejpph24Oq6sHts7ot+2hV/WHb/03A28Zcr8Zn5OdjVT22qq7Z6MrW7nXA\nV6dbmeQRwDK68/v+wFva8l2BA4D7A/sA72nX0rPG4Dd3nQLs26YPBD42tSLJXZN8ut0pOyPJA9vy\nuyX5Qru7cBSQgX2eluSsdmfsfUOcaMuAY9r0CcDeSbLGNnsAK6vq+1V1E3AcsKxt98i2H+04+6/v\nL0CSJI3NTNcZWyQ5ul03fDvJsrZ8SZL/TvKt9vnTtnyv1tpyQmvl+Mharhl+R1VdX1XVZrcAaqbt\n17L/tQOz672/5pwNOR83T3JckouSnAhsPrDPb3q9tWvmFe36+NCBba5P8vok32nX09uuq8gkuwPb\nAl+YYbPnAIdX1Y0AAzcrlgHHVdWNVXUpsJLuWnrWGPzmruOAA5JsBjwQOHNg3WuAb1fVA4GXA8e2\n5a8CvtbuLpwI7ASQ5A+AvwL2bHfGbgEOauuOmmoWX8P2wI8Aqupm4BfA3abbplnVlt0NuKbtN7hc\nkiTNDTNdZ/wz8KWq2gN4BPDmJFsAq4FHV9VudNcV7xrY5yHAi+h6AN0L2BMgyWuT7Le2ApI8Icl3\ngZPpWv2m8x/txvW/DAbKJIcl+R+6Fr8XrMfPrrlnQ87H5wA3VNUf0F0D7z7NsZ9ZVbsDS4EXJJm6\nnt0COKOqHkTXgvcsgCT7JXntmgdJcjvgrcA/ruNnuQ/wZ+keefpKkj9qy6e7bp41i2bzyxa6dsL8\nKXA/4C50J9yUs6rqlKmZqjo33XN1B9LdBRn0MOBJbbsvtZa+OwEPB57Ylp+c5Odt+73p/jJ8s/17\nuTndP95U1d+N8EeUpFmX5CDg4Kr6iwl9/wXAYVX15Ul8vzSlhaIHt89dgK34be+fS6rqI1PbruM6\n4y+A/ZJMXeBuRncz+cfAu5NM3US+z8A+Z1XVqlbHOcASupvRr5yu3qo6ETgxycPpus89ai2bHVRV\nlyfZCvgk8HTaDe+qOgI4IslfA68ADp7uuzT7WsDaG1gMbA3cvq26BTiiqn42te0Gno8Pp918aPuf\nO00pL0jyhDa9I7AL8FPgJmDqOcIVdF1MqaqTgJPWcpznAqdU1ap1NGgvAu4KPBT4I+D4JPeaaYfZ\nYvAbsxb29gSeAjwZuBo4m+65t+uZuWvCSXT9gvfitq1t61UGcExVvWw99rmc7i/HqiSLgDvT/SVZ\n2zZTdmjLfgpsnWRRa/WbWi5JJLmMrqvMLXT/Dn4OeF5VXT/EvkuAS4HbT/UqaBezH5lht42t9e+q\n6osDy57Rlj2sff/9hzjOEtaoWxqVFsSeSnetcTvg63T/F18H3DrDrtNdZwR4UlVdvMb3vBr4CfCg\n9j3/O7D6xoHpW1iPa8yq+mqSeyXZpqquXmPd5e3P65J8lK5r3LFrHOI44L3Dfp/Gp4W9J9Cdiw8F\nvkLXsnUNXdCayfqej8PUsxfdDYU/qaobknyZLjgC/Hqgu/Ew5+yf0LXkPRfYEtg0yfVVtXyN7VYB\nn2rHPivJrcA2TH/dPGsMfmPUnqM7hq5p+cPAI9Y8adfhaLouk+e1E3fKf9N11XxdW351VV2b5KvA\nXwP/muQxdHf7oHuA+jNJ3l5Vq5PcFdiqqn4ww3efRHfn7Bt0gfVLA385pnwT2CXJznQn7gHAX1dV\nJTm97XdcO85n1uPnltR/j6+qLyb5PeDzwMvouvNoAwzcaNMC0lr4XkPXTfLDdP8Pf2st/19PZ7rr\njM8Dz0/y/PZ/+kOq6tt0N4FXVdWtSQ4GNnhgiiT3Bv6nHX834A6scYO53XjeuqquTnJ74HHAF9u6\nXarqkrbpvsAlaKKSPAT4L7puk0cBT6yqX67HIdb3fJy67v1SkgfQdRFd053pBiu8Icn96MLoBqmq\ng6am2w3ApWsJfQCfpuuSenqS+wCb0jX8nAR8NMnbgHvQtTyetaH1bAif8RuTFvqOpburvVtV/et6\nhj6qalVVvWstq14N7N6atA/nt10bXgM8vHU7eiLww3acC+m6QHyh7XMqsF2rc7pn/D4A3C3JSuDF\nwPK2/T2SnNKOezPwPLq/kBcBx1fVBW3/lwIvbvvfrR1Pkn5HVV1J92/Ig6eWJdk33QP81yb5UWtl\nmDI1kto17cH8P0nyjCRfG9i/0g31fkmSa5IcMfVcUJJNkrw1ydVJLk3yvLb9Bt8ITTeIwKPa9B7p\nhqa/NslP2n/w09V9uySvSPKDJKuTHJvkzgPH/Zu27qfpnm0a/J5XpxtI48NJrgWe0b77G+1nviLJ\nuzPwKp32cz63/V6uS/K6JL+f5P+1eo+Pr96ZNwZC3xPorjOWV9WK9Qh9M11nvI6uW9657ZridW35\ne4CDk3yH7rGVdV7UZ/pn/J4EnJ+uW+gRwF9N1d6WQRcGP9+uXc6hu8n8/rbueekG6ziH7jrFbp4T\nNBD6Dquqp1bVJ9Yz9G3I+fheYMskFwGvpeuuuabPAYvaNocDZwzxs6z1Gb917DN4PX00cK8k59Ma\nQKpzAXA8cGGr67CqumV9vmejVZWfMXzo/mE6FbjjpGvx48ePn7n0AS4DHtWmdwDOA945sH4v4A/p\nbk4+kK5r2f5t3RK6LvKLBrZ/Bt2zRFPzRffcxtZ0z4FcBezT1j2b7j/dHeh6RXxxzeNNV+sM3zf4\n83wDeHqb3hJ46Ax1P5NuVLd7tW0/BXyorduVrhvsw+juFr8F+PXA97y6ze/ffk+b0z3L/VC63jxL\n6G7IvWiN38tngDvRDSd+I12PkHvR3RW/kO4CZeLniJ+h/h69pP3dufuka/GzsD/AvYEr6bpjTrwe\nP9N/bPEbg3TDwT4FeEJV3TDpeiRpDvp0kuvoRjhbTTciGwBV9eWqOq+qbq2qc+mG9f7z9Tz+4VV1\nTVX9EDid37YoPpUuZK6qqp/T3QEeptZrpj50rR7T+TVw73TPKl1fVTPdXT4IeFt1r8S5nq676wGt\n9fHJwH9W1deqe13OK7ntM+HfqKpPt9/Tr6pr7Tmjqm6uqsuA93Hb39ubqura6u48nw98oX3/L+ju\n1j9kiN+HJqydI/9Ed0Nk9bq2l8bsWcAHq+qTky5EMzP4jccTgZNriIEKJGmB2r+qtqJr3bsf3YPv\nACT54ySnJ7kqyS/oWum2WfthpnXlwPQNdC1q0D1XMTic9uD0TLVuPfWhG9ltOofQjXT43STfTPK4\nGba9BzD4rPUP6Frrtl2zznYTcc0Btn6n9iT3SfLZJFe27p9v4La/t58MTP9qLfNbovlgL+DSqvqf\nSReiha11OX4KXZdGzXEGv/F4CvCJSRchSXNdVX0F+CBdV8YpH6V7CH7Hqroz8O/8dkj6jX1J8xV0\n3Tyn7Djdhhuiqi6pqgOBuwNvBE5I976ptdX9Y+CeA/M7ATfThbHfqTPJ5tx2dOc1j/le4LvALlV1\nJ7r3vK572DvNR15naK7YjW5EzO9MuhCtm8FvxJJsTTeK5+cnXYskzRPvAB6d5EFtfivgZ1X1v0n2\noBu1bcpVdMPTb+g7kY4HXphk+/bv9Us3tOi1SfK0JIur6la64cuhq3dtdX8M+PskOyfZkq6F7uPV\nDZx1AvD4JH/aBlx5NesOcVsB1wLXpxu97jmj+rk05yyje6edNGnLgE9W1cbelNMsMPiN3nbAj6vq\nV5MuRJLmg6q6im4U5KkXPT8XeG17BvCVdGFtatsbgNcDX2/P3K3v0NzvB74AnAt8m+5FwTfT3bEe\nhX2AC5JcD7wTOKA9f7e2uo8GPkQ34ueldO9Eez5Aewbv+XTdp66gG+hlNb/7rrQ1/SNdSL6u/Zwf\nH9HPpDkk3WsNtqE7Z6RJ2xFfpTFvxIA+GkkWA28Ftqd7selpwOer6uiJFiZJmla6d57+e1Xdc50b\nT1BrEbyGrhunF/wLUAt87wEWA4+he1fYd6rqDRMtTAtSkqfTvVdxT7rnky8F/qmqrphoYZqRL3Af\nnV/T9bnfrM0/EViv9/ZJksarPSv3CLpWv23pRhM9caJFTSPJ4+luIobuGcjz6F4doYXpVuCxdAP/\nQHfN4XWcJmVHumvdRXSNHkuBF020Iq2TXT1HpKquAd4O3NQW3Qi8bfo9JEkTMPXS65/TdfW8iN92\nMZ1rltENAPNjYBe6bqN201mgqnvR83J++9L0G+kG8JEm4d/oRgKGrvHjiKq6eoL1aAh29RyhJHel\n+w96U+Bfq2quXkxIkqR5pr2/bxVda/WnqupJEy5JC1iSfwZeSxf8dmzPa2sOs8VvhKrqZ3SjbBW2\n9kmSpBFqI77+3zZra58m7V1017ynGPrmB/uGj97zgJNa109JkqRRejdwXVU5joAmqqquS3IA3cjE\nmgfmdVfPbbbZppYsWTLpMn5jxYrbLtt999mvQ5Ik9ZAXGppDVvz4tufj7vfwfJyEFStWXF1Vi9e1\n3bwOfkuXLq2zzz570mX8Rtbyat15/OuVJElziRcamkPymtuej/Uqz8dJSLKiqpauazuf8ZMkSZKk\nnjP4SZIkSVLPGfwkSZIkqecMfpIkSZLUcwY/SZIkSeo5g58kSZIk9ZzBT5IkSZJ6bmzBL8nRSVYn\nOX9g2ZuTfDfJuUlOTLL1wLqXJVmZ5OIkfzmuuiRJkiRpoRlni98HgX3WWHYq8ICqeiDwPeBlAEl2\nBQ4A7t/2eU+STcZYmyRJkiQtGGMLflX1VeBnayz7QlXd3GbPAHZo08uA46rqxqq6FFgJ7DGu2iRJ\nkiRpIZnkM37PBP6rTW8P/Ghg3aq2TJIkSZK0kRZN4kuT/DNwM/CRDdj3UOBQgJ122mnElY3ekuUn\nT7vussP3ncVKJEmSJC1Us97il+QZwOOAg6qq2uLLgR0HNtuhLbuNqjqyqpZW1dLFixePtVZJkiRJ\n6oNZDX5J9gFeAuxXVTcMrDoJOCDJHZLsDOwCnDWbtUmSJElSX42tq2eSjwF7AdskWQW8im4UzzsA\npyYBOKOqnl1VFyQ5HriQrgvoYVV1y7hqkyRJkqSFZGzBr6oOXMviD8yw/euB14+rHkmSJElaqCY5\nqqckSZIkaRYY/CRJkiSp5wx+kiRJktRzBj9JkiRJ6jmDnyRJkiT1nMFPkiRJknrO4CdJkiRJPWfw\nkyRJkqSeM/hJkiRJUs8Z/CRJkiSp5wx+kiRJktRzBj9JkiRJ6jmDnyRJkiT1nMFPkiRJknrO4CdJ\nkiRJPWfwkyRJkqSeM/hJkiRJUs8Z/CRJkiSp5wx+kiRJktRzBj9JkiRJ6jmDnyRJkiT1nMFPkiRJ\nknrO4CdJkiRJPWfwkyRJkqSeM/hJkiRJUs8Z/CRJkiSp5wx+kiRJktRzBj9JkiRJ6jmDnyRJkiT1\nnMFPkiRJknrO4CdJkiRJPWfwkyRJkqSeM/hJkiRJUs8Z/CRJkiSp58YW/JIcnWR1kvMHlt01yalJ\nLml/3qUtT5J3JVmZ5Nwku42rLkmSJElaaMbZ4vdBYJ81li0HTquqXYDT2jzAY4Bd2udQ4L1jrEuS\nJEmSFpSxBb+q+irwszUWLwOOadPHAPsPLD+2OmcAWyfZbly1SZIkSdJCMtvP+G1bVVe06SuBbdv0\n9sCPBrZb1ZZJkiRJkjbSxAZ3qaoCan33S3JokrOTnH3VVVeNoTJJkiRJ6pfZDn4/merC2f5c3ZZf\nDuw4sN0ObdltVNWRVbW0qpYuXrx4rMVKkiRJUh/MdvA7CTi4TR8MfGZg+d+00T0fCvxioEuoJEmS\nJGkjLBrXgZN8DNgL2CbJKuBVwOHA8UkOAX4APLVtfgrwWGAlcAPwt+OqS5IkSZIWmrEFv6o6cJpV\ne69l2wIOG1ctkiRJkrSQTWxwF0mSJEnS7DD4SZIkSVLPGfwkSZIkqecMfpIkSZLUcwY/SZIkSeo5\ng58kSZIk9ZzBT5IkSZJ6zuAnSZIkST1n8JMkSZKknjP4SZIkSVLPGfwkSZIkqecMfpIkSZLUcwY/\nSZIkSeo5g58kSZIk9ZzBT5IkSZJ6zuAnSZIkST1n8JMkSZKknjP4SZIkSVLPGfwkSZIkqecMfpIk\nSZLUcwY/SZIkSeo5g58kSZIk9ZzBT5IkSZJ6zuAnSZIkST1n8JMkSZKknjP4SZIkSVLPGfwkSZIk\nqecMfpIkSZLUcwY/SZIkSeo5g58kSZIk9dyiSRcgSZIkqZ+WLD95xvWXHb7vLFUiW/wkSZIkqecM\nfpIkSZLUcwY/SZIkSeq5iQS/JH+f5IIk5yf5WJLNkuyc5MwkK5N8PMmmk6hNkiRJkvpm1oNfku2B\nFwBLq+oBwCbAAcAbgbdX1b2BnwOHzHZtkiRJktRHk+rquQjYPMki4I7AFcAjgRPa+mOA/SdUmyRJ\nkiT1yqwHv6q6HHgL8EO6wPcLYAVwTVXd3DZbBWw/27VJkiRJUh9NoqvnXYBlwM7APYAtgH3WY/9D\nk5yd5OyrrrpqTFVKkiRJUn9Moqvno4BLq+qqqvo18ClgT2Dr1vUTYAfg8rXtXFVHVtXSqlq6ePHi\n2alYkiRJkuaxSQS/HwIPTXLHJAH2Bi4ETgee3LY5GPjMBGqTJEmSpN6ZxDN+Z9IN4vIt4LxWw5HA\nS4EXJ1kJ3A34wGzXJkmSJEl9tGjdm4xeVb0KeNUai78P7DGBciRJkiSp1yb1OgdJkiRJ0iwx+EmS\nJElSzxn8JEmSJKnnDH6SJEmS1HMGP0mSJEnqOYOfJEmSJPWcwU+SJEmSem6o4JfkD8ddiCRJkiRp\nPIZt8XtPkrOSPDfJncdakSRJkiRppIYKflX1Z8BBwI7AiiQfTfLosVYmSZIkSRqJoZ/xq6pLgFcA\nLwX+HHhXku8meeK4ipMkSZIkbbxhn/F7YJK3AxcBjwQeX1V/0KbfPsb6JEmSJEkbadGQ2/0bcBTw\n8qr61dTCqvpxkleMpTJJkiRJ0kgMG/z2BX5VVbcAJLkdsFlV3VBVHxpbdZIkSZKkjTbsM35fBDYf\nmL9jWyZJkiRJmuOGDX6bVdX1UzNt+o7jKUmSJEmSNErDBr9fJtltaibJ7sCvZthekiRJkjRHDPuM\n34uATyT5MRDg94C/GltVkiRJkqSRGSr4VdU3k9wPuG9bdHFV/Xp8ZUmSJEmSRmXYFj+APwKWtH12\nS0JVHTuWqiRJkiRJIzNU8EvyIeD3gXOAW9riAgx+kiRJkjTHDdvitxTYtapqnMVIkiRJkkZv2FE9\nz6cb0EWSJEmSNM8M2+K3DXBhkrOAG6cWVtV+Y6lKkiRJkjQywwa/V4+zCEmSJEnS+Az7OoevJLkn\nsEtVfTHJHYFNxluaJEmSJGkUhnrGL8mzgBOA97VF2wOfHldRkiRJkqTRGXZwl8OAPYFrAarqEuDu\n4ypKkiRJkjQ6wwa/G6vqpqmZJIvo3uMnSZIkSZrjhg1+X0nycmDzJI8GPgH85/jKkiRJkiSNyrDB\nbzlwFXAe8H+AU4BXjKsoSZIkSdLoDDuq563A+9tHkiRJkjSPDBX8klzKWp7pq6p7jbwiSZIkSdJI\nDfsC96UD05sBTwHuOvpyJEmSJEmjNtQzflX104HP5VX1DmDfMdcmSZIkSRqBYbt67jYwezu6FsBh\nWwvXdrytgaOAB9B1IX0mcDHwcWAJcBnw1Kr6+YZ+hyRJkiSpM2x4e+vA9M20YLYR3/tO4HNV9eQk\nmwJ3BF4OnFZVhydZTjeS6Es34jskSZIkSQw/qucjRvWFSe4MPBx4Rjv2TcBNSZYBe7XNjgG+jMFP\nkiRJkjbasF09XzzT+qp623p858507wT8jyQPAlYALwS2raor2jZXAttOU8uhwKEAO+2003p8rSRJ\nkiQtTMO+wH0p8Bxg+/Z5NrAbsFX7rI9Fbd/3VtVDgF/Sdev8jaoq1vL6iLbuyKpaWlVLFy9evJ5f\nLUmSJEkLz7DP+O0A7FZV1wEkeTVwclU9bQO+cxWwqqrObPMn0AW/nyTZrqquSLIdsHoDji1JkiRJ\nWsOwLX7bAjcNzN/ENF0x16WqrgR+lOS+bdHewIXAScDBbdnBwGc25PiSJEmSpN81bIvfscBZSU5s\n8/vTDcCyoZ4PfKSN6Pl94G/pQujxSQ4BfsDGjRoqSZIkSWqGHdXz9Un+C/iztuhvq+rbG/qlVXUO\n3XODa9p7Q48pSZIkSVq7Ybt6QveuvWur6p3AqiQ7j6kmSZIkSdIIDfs6h1fRtdDdF/gP4PbAh4E9\nx1eaJEmSJN3WkuUnT7vussP3ncVK5o9hW/yeAOxH9+oFqurHrP9rHCRJkiRJEzBs8Ltp8N16SbYY\nX0mSJEmSpFEaNvgdn+R9wNZJngV8EXj/+MqSJEmSJI3KsKN6viXJo4Fr6Z7ze2VVnTrWyiRJkiRp\nFvX52cF1Br8kmwBfrKpHAIY9SZIkSZpn1tnVs6puAW5NcudZqEeSJEnSHDZdq9iS5SfP2GKmyRqq\nqydwPXBeklNpI3sCVNULxlKVJEmSJGlkhg1+n2ofSZIkSdI8M2PwS7JTVf2wqo6ZrYIkSZIkSaO1\nrmf8Pj01keSTY65FkiRJkjQG6wp+GZi+1zgLkSRJkiSNx7qCX00zLUmSJEmaJ9Y1uMuDklxL1/K3\neZumzVdV3Wms1UmSJEmSNtqMwa+qNpmtQiRJkiRJ47HOF7hLkiRJkuY3g58kSZIk9ZzBT5IkSZJ6\nzuAnSZIkST1n8JMkSZKknjP4SZIkSVLPGfwkSZIkqecMfpIkSZLUcwY/SZIkSeo5g58kSZIk9ZzB\nT5IkSZJ6zuAnSZIkST1n8JMkSZKknjP4SZIkSVLPGfwkSZIkqecMfpIkSZLUcwY/SZIkSeo5g58k\nSZIk9dzEgl+STZJ8O8ln2/zOSc5MsjLJx5NsOqnaJEmSJKlPJtni90LgooH5NwJvr6p7Az8HDplI\nVZIkSZLUMxMJfkl2APYFjmrzAR4JnNA2OQbYfxK1SZIkSVLfTKrF7x3AS4Bb2/zdgGuq6uY2vwrY\nfhKFSZIkSVLfzHrwS/I4YHVVrdjA/Q9NcnaSs6+66qoRVydJkiRJ/TOJFr89gf2SXAYcR9fF853A\n1kkWtW12AC5f285VdWRVLa2qpYsXL56NeiVJkiRpXpv14FdVL6uqHapqCXAA8KWqOgg4HXhy2+xg\n4DOzXZskSZIk9dFceo/fS4EXJ1lJ98zfByZcjyRJkiT1wqJ1bzI+VfVl4Mtt+vvAHpOsR5IkSZL6\naC61+EmSJEmSxsDgJ0mSJEk9Z/CTJEmSpJ4z+EmSJElSzxn8JEmSJKnnDH6SJEmS1HMGP0mSJEnq\nuYm+x0+SJEnS5C1ZfvKM6y87fN9ZqkTjYoufJEmSJPWcwU+SJEmSes7gJ0mSJEk9Z/CTJEmSpJ4z\n+EmSJElSzxn8JEmSJKnnDH6SJEmS1HMGP0mSJEnqOYOfJEmSJPWcwU+SJEmSes7gJ0mSJEk9Z/CT\nJEmSpJ4z+EmSJElSzxn8JEmSJKnnDH6SJEmS1HOLJl2AJEmSpOEsWX7yjOsvO3zfWapE840tfpIk\nSZLUcwY/SZIkSeo5g58kSZIk9ZzP+EmSJEnqrZmei1xIz0Ta4idJkiRJPWfwkyRJkqSeM/hJkiRJ\nUs8Z/CRJkiSp5wx+kiRJktRzBj9JkiRJ6rlZD35JdkxyepILk1yQ5IVt+V2TnJrkkvbnXWa7NkmS\nJEnqo0m0+N0M/ENV7Qo8FDgsya7AcuC0qtoFOK3NS5IkSZI20qwHv6q6oqq+1aavAy4CtgeWAce0\nzY4B9p/t2iRJkiSpjyb6jF+SJcBDgDOBbavqirbqSmDbCZUlSZIkSb2yaFJfnGRL4JPAi6rq2iS/\nWVdVlaSm2e9Q4FCAnXbaaTZKlSRJksZmyfKTZ1x/2eH7zlIl6rOJtPgluT1d6PtIVX2qLf5Jku3a\n+u2A1Wvbt6qOrKqlVbV08eLFs1OwJEmSJM1jkxjVM8AHgIuq6m0Dq04CDm7TBwOfme3aJEmSJKmP\nJtHVc0/g6cB5Sc5py14OHA4cn+QQ4AfAUydQmyRJkiT1zqwHv6r6GpBpVu89m7VIkiRJ0kIw0VE9\nJUmSJEnjZ/CTJEmSpJ4z+EmSJElSzxn8JEmSJKnnDH6SJEmS1HMGP0mSJEnqOYOfJEmSJPWcwU+S\nJEmSes7gJ0mSJEk9Z/CTJEmSpJ4z+EmSJElSzxn8JEmSJKnnDH6SJEmS1HMGP0mSJEnqOYOfJEmS\nJPWcwU+SJEmSes7gJ0mSJEk9Z/CTJEmSpJ4z+EmSJElSzxn8JEmSJKnnDH6SJEmS1HMGP0mSJEnq\nOYOfJEmSJPXcokkXIEmSJI3akuUnz7j+ssP3naVKpLnBFj9JkiRJ6jlb/CRJkqQhzdSSaCui5jJb\n/CRJkiSp52zxkyRJ0oJmK54WAlv8JEmSJKnnDH6SJEmS1HMGP0mSJEnqOYOfJEmSJPWcwU+SJEmS\nes5RPSVJkhaomUazBEe0lPpkzrX4JdknycVJViZZPul6JEmSJGm+m1PBL8kmwBHAY4BdgQOT7DrZ\nqiRJkiSmYvvXAAAFz0lEQVRpfptrXT33AFZW1fcBkhwHLAMunGhV0hxk9xxJ85n/ho3G1O9xzd+X\nLySXtKY51eIHbA/8aGB+VVsmSZIkSdpAqapJ1/AbSZ4M7FNVf9fmnw78cVU9b2CbQ4FD2+x9gYtn\nvdB12wa4etJFqNc8xzROnl8aN88xjZPnl8Ztrp1j96yqxevaaK519bwc2HFgfoe27Deq6kjgyNks\nan0lObuqlk66DvWX55jGyfNL4+Y5pnHy/NK4zddzbK519fwmsEuSnZNsChwAnDThmiRJkiRpXptT\nLX5VdXOS5wGfBzYBjq6qCyZcliRJkiTNa3Mq+AFU1SnAKZOuYyPN6a6o6gXPMY2T55fGzXNM4+T5\npXGbl+fYnBrcRZIkSZI0enPtGT9JkiRJ0ogZ/EYsyT5JLk6yMsnySdejfklydJLVSc6fdC3qnyQ7\nJjk9yYVJLkjywknXpP5IslmSs5J8p51fr5l0TeqnJJsk+XaSz066FvVLksuSnJfknCRnT7qe9WVX\nzxFKsgnwPeDRdC+f/yZwYFVdONHC1BtJHg5cDxxbVQ+YdD3qlyTbAdtV1beSbAWsAPb33zCNQpIA\nW1TV9UluD3wNeGFVnTHh0tQzSV4MLAXuVFWPm3Q96o8klwFLq2ouvcNvaLb4jdYewMqq+n5V3QQc\nByybcE3qkar6KvCzSdehfqqqK6rqW236OuAiYPvJVqW+qM71bfb27ePdZ41Ukh2AfYGjJl2LNNcY\n/EZre+BHA/Or8KJJ0jyUZAnwEODMyVaiPmld8M4BVgOnVpXnl0btHcBLgFsnXYh6qYAvJFmR5NBJ\nF7O+DH6SpN+RZEvgk8CLquraSdej/qiqW6rqwcAOwB5J7LKukUnyOGB1Va2YdC3qrYdV1W7AY4DD\n2iM484bBb7QuB3YcmN+hLZOkeaE9e/VJ4CNV9alJ16N+qqprgNOBfSZdi3plT2C/9hzWccAjk3x4\nsiWpT6rq8vbnauBEuse85g2D32h9E9glyc5JNgUOAE6acE2SNJQ2+MYHgIuq6m2Trkf9kmRxkq3b\n9OZ0A6F9d7JVqU+q6mVVtUNVLaG7BvtSVT1twmWpJ5Js0QY+I8kWwF8A82qUdYPfCFXVzcDzgM/T\nDYpwfFVdMNmq1CdJPgZ8A7hvklVJDpl0TeqVPYGn090lP6d9HjvpotQb2wGnJzmX7kbpqVXlcPuS\n5ottga8l+Q5wFnByVX1uwjWtF1/nIEmSJEk9Z4ufJEmSJPWcwU+SJEmSes7gJ0mSJEk9Z/CTJEmS\npJ4z+EmSJElSzxn8JEkLWpJb2qsrzk/yn1Pvmpth+62TPHdg/h5JThh/pZIkbThf5yBJWtCSXF9V\nW7bpY4DvVdXrZ9h+CfDZqnrA7FQoSdLGs8VPkqTf+gawPUCSLZOcluRbSc5Lsqxtczjw+62V8M1J\nliQ5v+3zjCSfSvK5JJckedPUgZMckuR7Sc5K8v4k7571n06StGAtmnQBkiTNBUk2AfYGPtAW/S/w\nhKq6Nsk2wBlJTgKWAw+oqge3/ZascagHAw8BbgQuTvJvwC3AvwC7AdcBXwK+M9YfSJKkAQY/SdJC\nt3mSc+ha+i4CTm3LA7whycOBW9v6bYc43mlV9QuAJBcC9wS2Ab5SVT9ryz8B3GekP4UkSTOwq6ck\naaH7VWu9uydd2DusLT8IWAzs3tb/BNhsiOPdODB9C95klSTNAQY/SZKAqroBeAHwD0kWAXcGVlfV\nr5M8gi4YQtdVc6v1PPw3gT9Pcpd27CeNqm5JkoZh8JMkqamqbwPnAgcCHwGWJjkP+Bvgu22bnwJf\nb69/ePOQx70ceANwFvB14DLgFyP/ASRJmoavc5AkaRYk2bKqrm8tficCR1fViZOuS5K0MNjiJ0nS\n7Hh1G0TmfOBS4NMTrkeStIDY4idJkiRJPWeLnyRJkiT1nMFPkiRJknrO4CdJkiRJPWfwkyRJkqSe\nM/hJkiRJUs8Z/CRJkiSp5/4/ZSB5a9HkxV8AAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# visualize price column\n", "fig, ax = plt.subplots(figsize=(15, 5))\n", "ax.axvline(mean_rating, color='red', linewidth=5)\n", "ax.axvline(median_rating, color='green', linewidth=5)\n", "ax.axvline(mode_rating, color='blue', linewidth=5)\n", "\n", "# Add arrows annotating the means:\n", "def add_arrow(label, val, align=\"left\"):\n", " ax.annotate(label + ': {:0.2f}'.format(val), xy=(val, 1), xytext=(15, 15),\n", " xycoords=('data', 'axes fraction'), textcoords='offset points',\n", " horizontalalignment=align, verticalalignment='center',\n", " arrowprops=dict(arrowstyle='-|>', fc='black', shrinkA=0, shrinkB=0,\n", " connectionstyle='angle,angleA=0,angleB=90,rad=10'),\n", " )\n", "\n", "add_arrow(\"Mean\", mean_rating)\n", "add_arrow(\"Median\", median_rating)\n", "add_arrow(\"Mode\", mode_rating)\n", "ax.legend(loc='upper left')\n", "ax.margins(0.05)\n", "\n", "bins, hist = df_ratio.select(\"Rating\").rdd.flatMap(lambda x: x).histogram(70)\n", "hist = np.asarray(hist)\n", "bins = np.asarray(bins)\n", "width = 0.7 * (bins[1] - bins[0])\n", "center = (bins[:-1] + bins[1:]) / 2\n", "plt.bar(center, hist, align='center', width=width)\n", "plt.title(\"Rating Histogram\")\n", "plt.xlabel(\"Rating\")\n", "plt.ylabel(\"Frequency\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Biến đổi của dữ liệu \n", "- Khoảng đoạn (range)\n", "$$range = maxValue - minValue$$\n", "\n", "- Phương sai (variance)\n", "$$\\sigma^2 = \\frac{\\sum_{i=1}^n (x_i - \\mu)}{n}$$\n", "\n", "- Độ lệch chuẩn (standard deviation)\n", "$$\\sigma = \\sqrt{\\frac{\\sum_{i=1}^n (x_i - \\mu)}{n}}$$\n", "\n", "- Z-score: biến đổi từ sample mean để thực hiện Z-test\n", " * Shift trung bình mẫu về 0 bằng $X - \\mu$\n", " * Nén độ lệch chuẩn của mẫu ban đầu lại bằng cách chia cho $\\sigma$\n", "$$Z = \\frac{X - \\mu}{\\sigma}$$\n", "\n", "- Phân vị (percentile)\n", "$$percentile \\ of \\ x = \\frac{No. value \\ below \\ x}{n} * 100\\\\$$\n", "$$quartiles = \\frac{percentile * n}{100}$$" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Min rating: 0.0\n", "Max rating: 5.0\n", "Rating range: 5.0\n" ] } ], "source": [ "# range of rating\n", "min_rating = df_ratio.agg(F.min(df_ratio.Rating)).first()[0]\n", "max_rating = df_ratio.agg(F.max(df_ratio.Rating)).first()[0]\n", "range_rating = max_rating - min_rating\n", "print \"Min rating:\", min_rating\n", "print \"Max rating:\", max_rating\n", "print \"Rating range:\", range_rating" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Rating variance: 4.02148499784\n" ] } ], "source": [ "# variance of rating\n", "var_rating = df_ratio.agg(F.variance(df_ratio.Rating)).first()[0]\n", "print \"Rating variance:\", var_rating" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Rating standard deviation: 2.00536405619\n" ] } ], "source": [ "# standard deviation of rating\n", "std_rating = df_ratio.agg(F.stddev(df_ratio.Rating)).first()[0]\n", "print \"Rating standard deviation:\", std_rating" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Z-score of rating:\n", "+-------------------+\n", "| Rating|\n", "+-------------------+\n", "| 2.840419152789433|\n", "|-1.7595807518431354|\n", "|-1.7595807518431354|\n", "| 2.840419152789433|\n", "| 2.7404192481568646|\n", "+-------------------+\n", "only showing top 5 rows\n", "\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA+AAAAFuCAYAAAABAnL8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAMTQAADE0B0s6tTgAAIABJREFUeJzt3XuU13WBP/7nB0ZABRwFBHUYkbhUgqAcOni/XyITQ9nK\nS2K5gOUxG7egtvWY23opd7xsxzU9Chkdk8LNXTETXa9lBK6geRlBHQbQcZS8QIoKzO+Pvs4vFGEU\n5v2B4fE453PO531/fj6+j/qc1/v9/pSam5ubAwAAALSpDuUOAAAAANsCBRwAAAAKoIADAABAARRw\nAAAAKIACDgAAAAVQwAEAAKAACjgAAAAUQAEHgG1MVVVVpk2bliRpaGhI165d89xzz23WY1x88cU5\n5phjWqYPO+ywfP/739+sx0iSX/ziFxk8ePBm3y8AtAUFHIBt3oMPPpiuXbt+4LX99tunVCrl5z//\nebkjtpnq6uqsXLky/fv33+i6F154YQ466KBW7fd73/te7rrrrk2Nt471lfhTTz01dXV1m/U4ANBW\nKsodAADK7eCDD87KlSvXmbdmzZocf/zx+ctf/pKTTz65TMn+f++88046depU7hgb1dzcnDVr1qSi\nwv9iAMD7GQEHgPX45je/maeeeir//d//ne2333696zQ0NGT06NHZZZddstNOO2XIkCF58MEHW5bP\nmjUro0aNys4775wePXqsU+SfeuqpfPazn03Pnj1TVVWViRMn5vXXX29Zfthhh+Wcc87Jl770pey8\n884599xzkyRPP/10jj/++PTu3Tt77LFHvv71r+evf/3rh36OlStX5mtf+1p69OiRPfbYI1ddddU6\ny+vr61MqlbJo0aIkyYIFC3LooYemsrIyO++8c0aMGJG6urr84he/yMUXX5yHH3645QqBBx98sGX7\nG264IcOGDcsOO+yQefPmrXe0/LXXXsvYsWPTrVu3DBgwIDfddFPLsmnTpqWqqmqd9f9+H5MmTcqD\nDz6YH/3oRy3HX992q1atyuTJk7PXXntl5513zsEHH5w5c+Z84Dg//elP069fv+y0004ZN25c3njj\njQ/9DgFgc1HAAeB9rr766vz85z/P7bffnt69e3/oet/97nezxx575IUXXsirr76amTNntpTB2bNn\n5+STT87555+fl156KcuWLcvZZ5+dJFmxYkWOOuqofPrTn05DQ0Pmzp2bp59+OmecccY6+586dWq+\n8pWvZPny5amtrc0rr7ySgw8+OEceeWQaGhqyYMGCPPPMMznvvPM+NGNNTU0effTRlnXnz5+fxsbG\nD13/61//eo488si88sorefnll3PDDTeksrIyp556ar73ve9l//33z8qVK7Ny5cocfPDBLdvdeOON\nuf3227Ny5crsu+++6933DTfckDPPPDOvvvpqrr766px11ln5/e9//6FZ/t61116bgw8+ON/5znda\njr8+3/72t3PHHXdk9uzZeemll3LiiSfmqKOOytKlS1vWaWxszNNPP52nnnoqTz/9dObPn59///d/\nb1UOANgUCjgA/J077rgj3/nOdzJjxowMGTJkg+t26tQpjY2NefbZZ1MqlTJ48ODstddeSZKrrroq\nX/va1zJu3Lh06tQpXbp0yZFHHpkkuf322/POO+/ksssuyw477JDddtstV155ZW677bZ1yvEJJ5yQ\n0aNHp0OHDtlhhx1y0003ZcCAAfnWt76Vzp07p2fPnvnBD36Qm266KWvWrPlAvrVr1+amm27KD37w\ng1RVVWXHHXfMlVdembVr127wMzU0NGTx4sWpqKjI8OHDN/hHiPdccMEF6du3bzp27JjOnTuvd53R\no0fn85//fCoqKjJ69Oh84QtfyI033rjRfbfW2rVrc8MNN+SHP/xhBgwYkE6dOuX8889P//79M336\n9Jb1Kioq8qMf/Sjbb799dtttt5x44on505/+tNlyAMCHUcAB4P957LHH8qUvfSlXXnlljj322I2u\nf/nll2fAgAEZO3ZsevfunTPPPDMvvfRSkuT555//0KdzL1myJHvuuec690kPGDAgyd8ua3/Pe2X+\nPQsXLswjjzySysrKltfo0aNTKpXWO6r98ssv5+23315nPzvttFN22WWXD/1M06ZNS6lUyhFHHJGq\nqqqcd955Hzra/Pfen7U16+y1115ZsmTJRrdrrVdeeSVvvfVWPvGJT6wzf8CAAet8rz179sx2223X\nMr3jjjtmxYoVmy0HAHwYBRwA8rfLko8//vj84z/+YyZNmtSqbXr06JErrrgidXV1efTRR1NfX5+a\nmpokSb9+/fLMM8+sd7u+ffumoaEhq1evbpn37LPPJvnbU8nf06HDuv+Z7tOnTw466KC89tprLa/X\nX389q1atyh577PGB4/Tq1SudO3dOfX19y7zXX389r7766od+pj333DPXX399Fi9enPvuuy+zZ8/O\nJZdcst48f29Dy97z9znem37vkv1u3bp94F72F1544SMdo2fPnunSpUvLd/meZ599dp3vFQDKRQEH\nYJv31ltv5YQTTsh+++2XH//4x63e7pe//GWeffbZrF27Nt26dUvnzp1bRrW/+c1v5oYbbsjMmTPz\nzjvvZNWqVbnnnnuSJJ/73OdSUVGR733ve3nrrbfS2NiYb33rW/n85z+fPn36fOjxzjzzzDz66KO5\n5ppr8uabb6a5uTlLlizJb37zm/Wu36FDh5x22mm58MILs2zZsvz1r3/N+eefn1Kp9KHHmDZtWpYu\nXZrm5uZ07949FRUVLZ+pT58+aWhoyKpVq1r9Hf29O+64I7NmzcqaNWty55135r/+679y5plnJkn2\n3XffrFixIrfcckvWrl2b++67L7/61a/W2b5Pnz4f+keN9z7vV7/61VxwwQV57rnn8s477+SKK67I\nokWLcuqpp36szACwOSngAGzzfv3rX2fu3Ln53e9+l+7du3/g98A/bER8wYIFOeKII9KtW7d84hOf\nSGVlZS6//PIkyTHHHJObb745l156aXr16tXy5O0k6d69e2bPnp0FCxakqqoqI0aMyIABA/Kzn/1s\ngzmrq6vz8MMPZ/bs2S3HO/bYY/P4449/6DZXXHFFhg4dmqFDh2bQoEEZOnToBkv+vffem8985jPp\n2rVrhg0blv333z+TJ09Oknzxi1/M4MGDs/vuu6eysjIPPfTQBvO+31e/+tWWh7p94xvfaHmwWpL0\n798/P/nJT/JP//RPqayszE9/+tOWcv6e888/P3V1ddl5551TWVm53mNcfvnlOeaYY3L44Ydn1113\nzcyZMzN79uz07dv3I2UFgLZQam5ubi53CAAAAGjvjIADAABAARRwAAAAKIACDgAAAAVQwAEAAKAA\nCjgAAAAUoKLcATanzp07p1evXuWOAQAAwDbi5Zdfzttvv92qddtVAe/Vq1eWLl1a7hgAAABsI6qq\nqlq9rkvQAQAAoAAKOAAAABRAAQcAAIACKOAAAABQAAUcAAAACqCAAwAAQAEUcAAAACiAAg4AAAAF\nUMABAACgAG1ewM8999z069cvpVIp8+fPT5KsWrUqJ554YgYNGpRhw4bl6KOPzqJFi1q2aWpqynHH\nHZeBAwdmyJAheeCBB9o6JgAAALSpNi/gJ598ch566KHsueee68yfMGFC6urqsmDBgowZMyZnnXVW\ny7IpU6Zk1KhRWbhwYaZOnZpTTjkl7777bltHBQAAgDbT5gX8kEMOSVVV1TrzunTpktGjR6dUKiVJ\nRo0alfr6+pblM2bMyKRJk5IkI0eOzO67757777+/raMCAABAm6kod4AkueqqqzJmzJgkyfLly/Pu\nu++mT58+Lcv79euXhoaGD2xXW1ub2tralumVK1e2fdjNpN+UWRtcXn/p5wpKAgAAQBHKXsAvvvji\nLFq0KPfcc89H3rampiY1NTUt0+8faQcAAIAtRVmfgn755Zfn1ltvzW9/+9vssMMOSZIePXqkoqIi\njY2NLevV19enurq6XDEBAABgk5WtgNfW1ubmm2/O7NmzU1lZuc6ycePG5dprr02SzJ07N8uWLcuh\nhx5ajpgAAACwWbT5JegTJ07MrFmz0tjYmGOPPTbdunXLfffdl/PPPz/9+/fP4YcfniTp3Llz5syZ\nkyS57LLLcvrpp2fgwIHp1KlTpk+fnu22266towIAAECbafMC/tOf/nS985ubmz90m969e+euu+5q\nq0gAAABQuLLeAw4AAADbCgUcAAAACqCAAwAAQAEUcAAAACiAAg4AAAAFUMABAACgAAo4AAAAFEAB\nBwAAgAIo4AAAAFAABRwAAAAKoIADAABAARRwAAAAKIACDgAAAAVQwAEAAKAACjgAAAAUQAEHAACA\nAijgAAAAUAAFHAAAAAqggAMAAEABFHAAAAAogAIOAAAABVDAAQAAoAAKOAAAABRAAQcAAIACKOAA\nAABQAAUcAAAACqCAAwAAQAEUcAAAACiAAg4AAAAFUMABAACgAAo4AAAAFEABBwAAgAIo4AAAAFAA\nBRwAAAAKoIADAABAARRwAAAAKIACDgAAAAVQwAEAAKAACjgAAAAUoM0L+Lnnnpt+/fqlVCpl/vz5\nLfMXLlyYAw44IIMGDcrIkSPzxBNPtGoZAAAAbI3avICffPLJeeihh7LnnnuuM3/ixImZMGFCnnnm\nmUyePDnjx49v1TIAAADYGrV5AT/kkENSVVW1zrympqbMmzcvp512WpLkpJNOypIlS7Jo0aINLgMA\nAICtVVnuAV+yZEl22223VFRUJElKpVKqq6vT0NCwwWXvV1tbm6qqqpbXypUrC/0cAAAA0Fpb9UPY\nampqsnTp0pZX165dyx0JAAAA1quiHAft27dvXnzxxaxevToVFRVpbm5OQ0NDqqur07179w9dBgAA\nAFursoyA77rrrtlvv/0yffr0JMnMmTNTVVWVAQMGbHAZAAAAbK1Kzc3NzW15gIkTJ2bWrFlpbGxM\njx490q1btyxatCh1dXUZP358li9fnu7du2fq1KkZOnRokmxw2YZUVVVl6dKlbflxNpt+U2ZtcHn9\npZ8rKAkAAAAf10fpoW1ewIukgAMAAFCkj9JDt+qHsAEAAMDWQgEHAACAAijgAAAAUAAFHAAAAAqg\ngAMAAEABFHAAAAAogAIOAAAABVDAAQAAoAAKOAAAABRAAQcAAIACKOAAAABQAAUcAAAACqCAAwAA\nQAEUcAAAACiAAg4AAAAFUMABAACgAAo4AAAAFEABBwAAgAIo4AAAAFAABRwAAAAKoIADAABAARRw\nAAAAKIACDgAAAAVQwAEAAKAACjgAAAAUQAEHAACAAijgAAAAUAAFHAAAAAqggAMAAEABFHAAAAAo\ngAIOAAAABVDAAQAAoAAV5Q4AAADAR9NvyqyNrlN/6ecKSMJHYQQcAAAACqCAAwAAQAEUcAAAACiA\nAg4AAAAFUMABAACgAAo4AAAAFKCsBfyOO+7Ifvvtl+HDh2fIkCH52c9+liRpamrKcccdl4EDB2bI\nkCF54IEHyhkTAAAANlnZfge8ubk5p512Wu67777ss88+qa+vzyc/+cmMHTs2U6ZMyahRo3LnnXdm\n7ty5+cIXvpDnn38+2223XbniAgAAwCYp6wh4qVTKa6+9liR544030qNHj3Tu3DkzZszIpEmTkiQj\nR47M7rvvnvvvv7+cUQEAAGCTlG0EvFQq5ZZbbsnYsWOz44475tVXX82tt96aFStW5N13302fPn1a\n1u3Xr18aGho+sI/a2trU1ta2TK9cubKQ7AAAAPBRlW0EfPXq1fnhD3+YW2+9NYsXL84999yT008/\nPatXr271PmpqarJ06dKWV9euXdswMQAAAHx8ZSvg8+fPzwsvvJBDDjkkyd8uNa+qqspjjz2WioqK\nNDY2tqxbX1+f6urqckUFAACATVa2At63b9+8+OKLeeqpp5IkixYtyrPPPpvBgwdn3Lhxufbaa5Mk\nc+fOzbJly3LooYeWKyoAAABssrLdA967d+9cd911+Yd/+Id06NAha9euzU9+8pNUV1fnsssuy+mn\nn56BAwemU6dOmT59uiegAwAAsFUrWwFPki9/+cv58pe//IH5vXv3zl133VWGRAAAANA2yvozZAAA\nALCtUMABAACgAAo4AAAAFEABBwAAgAIo4AAAAFAABRwAAAAKoIADAABAARRwAAAAKIACDgAAAAVo\ndQH/3e9+15Y5AAAAoF1rdQG/6KKLMnjw4Fx11VV544032jITAAAAtDutLuC///3v88tf/jJ//vOf\nM2jQoHz961/Pk08+2ZbZAAAAoN34SPeA77vvvrn++utz55135vbbb88+++yTo48+Oo8//nhb5QMA\nAIB24SMV8LvvvjtjxozJ2LFj841vfCONjY2ZOHFivvCFL7RVPgAAAGgXKlq74qc+9an07Nkz5557\nbsaOHZuOHTsmSU4++eTccMMNbRYQAAAA2oNWF/Dp06dnxIgR613229/+drMFAgAAgPao1ZegP/LI\nI/nLX/7SMr18+fJcf/31bRIKAAAA2ptWF/Brrrkmu+yyS8t0jx49cs0117RJKAAAAGhvWl3Am5ub\nPzBvzZo1mzUMAAAAtFetLuC77bZbZsyY0TJ9yy23ZLfddmuTUAAAANDetPohbFdeeWXGjBmT73zn\nO0mSHXbYIbfddlubBQMAAID2pNUF/JOf/GSefPLJ1NXVJUkGDx7c8lNkAAAAwIa1uoAnSalUSmVl\nZVavXp1ly5YlSaqrq9skGAAAALQnrS7g06ZNy7nnnpvtttsuHTr87dbxUqmUpqamNgsHAAAA7UWr\nC/i//uu/Zu7cuRk8eHBb5gEAAIB2qdVPQe/Zs6fyDQAAAB9Tqwv4iSeemCuvvDJNTU154403Wl4A\nAADAxrX6EvR//ud/TpLU1NSkVCqlubk5pVIpa9asabNwAAAA0F60uoCvXbu2LXMAAABAu9bqS9CT\n5JFHHsnPf/7zJMlrr72WF198sU1CAQAAQHvT6gJ+zTXX5Ktf/WouvPDCJMny5ctzyimntFUuAAAA\naFdaXcCvu+66/PGPf0z37t2TJJ/4xCfy8ssvt1kwAAAAaE9aXcA7d+6c7bfffp15FRWtvoUcAAAA\ntmmtLuC9evXKM888k1KplCSZNm1aqqur2ywYAAAAtCetHsK+8sor8+UvfzlPP/10+vbtm+7du+f2\n229vy2wAAADQbrS6gA8YMCBz5sxJXV1dmpubM3jw4HTs2LEtswEAAEC70eoC3tDQkCTZcccdkyTL\nli1LEpehAwAAQCu0uoCPGDEipVIpzc3NWbVqVd5888306NEjTU1NbZkPAAAA2oVWF/D3/+TYrbfe\nmgULFmz2QAAAANAetfop6O83duzYzJo1a5MO/vbbb+ecc87JwIEDM3To0Jx22mlJkoULF+aAAw7I\noEGDMnLkyDzxxBObdBwAAAAot1aPgL/xxhst79esWZM5c+asM+/jmDJlSkqlUsvPmzU2NiZJJk6c\nmAkTJmT8+PH59a9/nfHjx2fu3LmbdCwAAAAop1YX8MrKypZ7wDt27JiBAwfm6quv/tgH/utf/5ob\nbrghS5cubflt8T59+qSpqSnz5s3LXXfdlSQ56aSTcs4552TRokUZMGDAxz4eAAAAlFOrC/jatWs3\n64GfffbZ7LLLLrn44otz9913Z/vtt8+FF16YysrK7Lbbbqmo+Fu0UqmU6urqNDQ0fKCA19bWpra2\ntmV65cqVmzUjAAAAbC4f+x7wTbV69eosXrw4n/70pzNv3rxcffXV+eIXv5jVq1e3eh81NTVZunRp\ny6tr165tmBgAAAA+vlaPgHfo0KHlUvG/19zcnFKplDVr1nykA1dXV6dDhw459dRTkyT77rtv9tpr\nryxevDgvvvhiVq9enYqKijQ3N6ehocHvjQMAALBVa/UI+EUXXZQpU6Zk8eLFWbx4cb773e/moosu\nyooVKz7Ww9h69uyZI488Mr/73e+SJM8//3yef/75HHjggdlvv/0yffr0JMnMmTNTVVXl/m8AAAC2\naqXm5ubm1qw4YsSIPPLIIxud91E899xz+drXvpZXXnklHTp0yAUXXJCTTjopdXV1GT9+fJYvX57u\n3btn6tSpGTp06Eb3V1VVlaVLl37sPEXqN2XDP+FWf+nnCkoCAABsbTbWJxKdoigfpYe2+hL0FStW\npKmpKbvuumuSpKmpKStWrPh4Cf+f/v3759577/3A/MGDB+fhhx/epH0DAADAlqTVBfz888/PsGHD\nMnr06CTJnXfemQsvvLCtcgEAAEC70uoCPnHixBx44IEtI9Y1NTXZe++92ywYAAAA5eES97bR6gKe\nJD169MjQoUNz2GGHZfXq1XnnnXfSqVOntsoGAAAA7Uarn4L+61//OqNGjcqZZ56ZJHniiSdy4okn\ntlkwAAAAaE9aXcAvueSS/N///V8qKyuTJMOGDcvixYvbLBgAAAC0J60u4B07dkyPHj3WmefycwAA\nAGidVt8D3q1bt7z00ksplUpJknvuuSe77LJLmwUDAACg/drYg97a40PeWl3AL7300nz2s5/Nc889\nl4MOOijPP/98Zs3a+JPxAAAAgI9QwEeOHJl77703f/jDH9Lc3JwDDjig5X5wAAAAPr5tcTR4W9Sq\nAr5mzZoMHTo0Tz75ZD772c+2dSYAAABod1r1ELaOHTumV69eefPNN9s6DwAAALRLrb4EfcCAATnw\nwAMzbty4dO3atWX+ueee2ybBAAAAoD1pdQFfu3Zthg8fnoULF7bMe++J6AAAAMCGbbSAT5gwIddd\nd12mTp2a2267LWPGjCkiFwAAALQrG70HfN68eS3vf/CDH7RpGAAAAGivWvUQtvc0Nze3VQ4AAABo\n1zZ6Cfpbb72Vxx9/PM3NzVm1alXL+/fss88+bRoQAAAA2oNWFfATTjihZfrv35dKpTz33HNtkwwA\nAADakY0W8Pr6+gJiAAAAQPv2ke4BBwAAAD4eBRwAAAAKoIADAABAARRwAAAAKIACDgAAAAVQwAEA\nAKAACjgAAAAUQAEHAACAAijgAAAAUAAFHAAAAAqggAMAAEABFHAAAAAogAIOAAAABVDAAQAAoAAK\nOAAAABRAAQcAAIACKOAAAABQAAUcAAAACqCAAwAAQAEUcAAAACjAFlHAp06dmlKplN/85jdJkqam\nphx33HEZOHBghgwZkgceeKDMCQEAAGDTlL2A19fX5/rrr8+oUaNa5k2ZMiWjRo3KwoULM3Xq1Jxy\nyil59913y5gSAAAANk1ZC/jatWtz1lln5T/+4z/SuXPnlvkzZszIpEmTkiQjR47M7rvvnvvvv79c\nMQEAAGCTlbWA19bW5sADD8yIESNa5i1fvjzvvvtu+vTp0zKvX79+aWhoWO/2VVVVLa+VK1cWkhsA\nAAA+qopyHfjPf/5zZs6cuUn3d9fU1KSmpqZluqqqanNEAwAAgM2ubCPgDz74YOrr6zNw4MD069cv\nf/zjHzNhwoTMmDEjFRUVaWxsbFm3vr4+1dXV5YoKAAAAm6xsBfzss8/Oiy++mPr6+tTX12fUqFG5\n7rrrcvbZZ2fcuHG59tprkyRz587NsmXLcuihh5YrKgAAAGyysl2CviGXXXZZTj/99AwcODCdOnXK\n9OnTs91225U7FgAAAHxsW0wBv++++1re9+7dO3fddVf5wgAAAMBmVvbfAQcAAIBtgQIOAAAABVDA\nAQAAoAAKOAAAABRAAQcAAIACKOAAAABQAAUcAAAACqCAAwAAQAEqyh0AAACgvek3ZdYGl9df+rmC\nkrAlMQIOAAAABVDAAQAAoAAKOAAAABRAAQcAAIACKOAAAABQAAUcAAAACqCAAwAAQAEUcAAAACiA\nAg4AAAAFUMABAACgAAo4AAAAFEABBwAAgAIo4AAAAFAABRwAAAAKoIADAABAARRwAAAAKEBFuQMA\nAAAUrd+UWRtcXn/p5wpKwrbECDgAAAAUQAEHAACAAijgAAAAUAAFHAAAAArgIWwAAABsko091C7x\nYLvECDgAAAAUQgEHAACAAijgAAAAUAAFHAAAAAqggAMAAEABFHAAAAAogAIOAAAABVDAAQAAoABl\nK+CrVq3KiSeemEGDBmXYsGE5+uijs2jRoiRJU1NTjjvuuAwcODBDhgzJAw88UK6YAAAAsFmUdQR8\nwoQJqaury4IFCzJmzJicddZZSZIpU6Zk1KhRWbhwYaZOnZpTTjkl7777bjmjAgAAwCYpWwHv0qVL\nRo8enVKplCQZNWpU6uvrkyQzZszIpEmTkiQjR47M7rvvnvvvv79cUQEAAGCTVZQ7wHuuuuqqjBkz\nJsuXL8+7776bPn36tCzr169fGhoaPrBNbW1tamtrW6ZXrlxZSFYAAGDL0W/KrI2uU3/p5wpIAhu2\nRTyE7eKLL86iRYtyySWXfKTtampqsnTp0pZX165d2yghAAAAbJqyF/DLL788t956a377299mhx12\nSI8ePVJRUZHGxsaWderr61NdXV3GlAAAALBpylrAa2trc/PNN2f27NmprKxsmT9u3Lhce+21SZK5\nc+dm2bJlOfTQQ8sVEwAAADZZ2e4BX7p0ac4///z0798/hx9+eJKkc+fOmTNnTi677LKcfvrpGThw\nYDp16pTp06dnu+22K1dUAAAA2GRlK+BVVVVpbm5e77LevXvnrrvuKjgRAAAAtJ2y3wMOAAAA2wIF\nHAAAAAqggAMAAEABFHAAAAAogAIOAAAABVDAAQAAoAAKOAAAABRAAQcAAIACKOAAAABQAAUcAAAA\nCqCAAwAAQAEUcAAAACiAAg4AAAAFUMABAACgAAo4AAAAFEABBwAAgAIo4AAAAFAABRwAAAAKoIAD\nAABAARRwAAAAKIACDgAAAAVQwAEAAKAACjgAAAAUQAEHAACAAijgAAAAUICKcgcAAADKq9+UWRtc\nXn/p5wpKAu2bEXAAAAAogBFwAACgTW1shD0xys62wQg4AAAAFEABBwAAgAK4BB0AAPhIXFIOH48R\ncAAAACiAAg4AAAAFUMABAACgAAo4AAAAFEABBwAAgAIo4AAAAFAAP0MGAAB/x09sAW3FCDgAAAAU\nYIsdAV+qAVLdAAAIaklEQVS4cGHOOOOMvPLKK9lpp50ybdq07L333uWOBdsMf/0H2Dz8+3Tz29h3\n+v7v0z8DYEuxxY6AT5w4MRMmTMgzzzyTyZMnZ/z48eWOBAAAAB/bFlnAm5qaMm/evJx22mlJkpNO\nOilLlizJokWLypwMAAAAPp5Sc3Nzc7lDvN8jjzySU045JXV1dS3zPvOZz+TSSy/NEUcc0TKvtrY2\ntbW1LdMvvPBCdt9990Kzwke1cuXKdO3atdwxYIOcp2zpnKNsDZynbA2cp5vu5Zdfzttvv92qdbfY\ne8Bbo6amJjU1NS3TVVVVWbp0aRkTwcY5T9kaOE/Z0jlH2Ro4T9kaOE+LtUVegt63b9+8+OKLWb16\ndZKkubk5DQ0Nqa6uLnMyAAAA+Hi2yAK+6667Zr/99sv06dOTJDNnzkxVVVUGDBhQ5mQAAADw8XS8\n8MILLyx3iPXZf//98y//8i/50Y9+lLlz52bq1Knp3bt3q7aDLZ3zlK2B85QtnXOUrYHzlK2B87Q4\nW+RD2AAAAKC92SIvQQcAAID2RgEHAACAArS7An711VdnyJAhGTp0aPbZZ5+WB7nBlmTWrFkZMWJE\nOnfunPPOO6/ccaDFwoULc8ABB2TQoEEZOXJknnjiiXJHgnWce+656devX0qlUubPn1/uOLBeq1at\nyoknnphBgwZl2LBhOfroo7No0aJyx4J1HHPMMdlnn30yfPjwHHzwwXn00UfLHWmb0O4K+N57753f\n//73efzxxzNr1qycd955efbZZ8sdC9YxcODA3Hjjjfn2t79d7iiwjokTJ2bChAl55plnMnny5Iwf\nP77ckWAdJ598ch566KHsueee5Y4CGzRhwoTU1dVlwYIFGTNmTM4666xyR4J1zJgxI4899ljmz5+f\nmpoa/80vSLsr4EceeWR22mmnJH/7PfE+ffpkyZIlZU4F63rvL+IVFRXljgItmpqaMm/evJx22mlJ\nkpNOOilLliwxasMW5ZBDDklVVVW5Y8AGdenSJaNHj06pVEqSjBo1KvX19eUNBe9TWVnZ8v71119v\nOV9pW+36//7vvvvuvPrqqxk5cmS5owBs8ZYsWZLddtut5Q9DpVIp1dXVaWhoyIABA8qcDmDrddVV\nV2XMmDHljgEf8JWvfCX33ntvkuSOO+4oc5ptw1ZXwPfff/8sXLhwvcseffTR9O3bN0ny+OOP58wz\nz8wtt9ySHXfcsciI0OrzFABo3y6++OIsWrQo99xzT7mjwAfcdNNNSZKf/exnmTx5shJegK2ugD/8\n8MMbXefJJ5/M8ccfnxtvvDEHHXRQAalgXa05T2FL07dv37z44otZvXp1Kioq0tzcnIaGhlRXV5c7\nGsBW6fLLL8+tt96au+++OzvssEO548CHOuOMMzJp0qQsX748PXr0KHecdq3d3QP+1FNPZfTo0bnu\nuuty9NFHlzsOwFZj1113zX777dfy6xEzZ85MVVWVy88BPoba2trcfPPNmT179jr32sKW4LXXXssL\nL7zQMv2b3/wmPXr0yC677FLGVNuGUnNzc3O5Q2xORx99dObNm7fO01Evu+yyHHvssWVMBeu65557\ncsYZZ+SNN95Ic3Nzdtppp1xzzTU54YQTyh2NbVxdXV3Gjx+f5cuXp3v37pk6dWqGDh1a7ljQYuLE\niZk1a1YaGxvTo0ePdOvWzYMC2eIsXbo0ffv2Tf/+/dOtW7ckSefOnTNnzpwyJ4O/Wbx4ccaNG5e3\n3norHTp0SK9evXL55Zdn+PDh5Y7W7rW7Ag4AAABbonZ3CToAAABsiRRwAAAAKIACDgAAAAVQwAEA\nAKAACjgAAAAUQAEHAACAAijgALCV69evXwYPHpzhw4dn8ODBufTSSze6zfz58/PLX/5ynXnDhw/P\nihUr2iomAGzzFHAAaAduueWWzJ8/P//7v/+bSy65JH/60582uP76Cvj8+fPTrVu3towJANs0BRwA\n2pE99tgjn/zkJ7N48eI0Njbm8MMPz4gRI7L33nvnnHPOydq1a9PU1JQLLrgg9957b4YPH55JkyYl\nSUqlUl577bUkfxtVv+CCC7L//vtnr732yg9/+MOWYzz99NPZf//9s/fee2fs2LE55phjMm3atHJ8\nXADYqijgANCOPP3001m+fHkOO+ywVFZW5n/+53/yyCOP5LHHHkt9fX1mzJiRXXfdNRdddFEOP/zw\nzJ8/P9dee+169/Xaa6/l4Ycfzty5c/PjH/84y5YtS5KcfvrpmTBhQp544on827/9Wx544IEiPyIA\nbLUqyh0AANh0X/ziF9OhQ4fU1dXliiuuSK9evfLmm29m8uTJeeihh9Lc3JympqYMGTIkX/rSl1q1\nz1NOOSVJ0rNnz/Tv3z/PP/98unXrlvnz5+crX/lKkuRTn/pUDjrooDb7XADQnijgANAO3HLLLRk+\nfHjuvvvufP7zn88RRxyR2267LU1NTZkzZ066dOmSmpqarFq1qtX77NKlS8v7jh07ZvXq1etdr1Qq\nbXJ+ANgWuAQdANqRo446KmeffXa+//3v59VXX02fPn3SpUuXNDY25le/+lXLet27d8/rr7/+kfff\nvXv3DBs2LNOnT0+S1NXV5aGHHtps+QGgPVPAAaCd+Zd/+Zc89NBDOeGEEzJnzpzsvffeOf3003PU\nUUe1rHPkkUfm7bffzj777NPyELbWuummm/Kf//mfGTJkSCZPnpyRI0emsrJyc38MAGh3Ss3Nzc3l\nDgEAbD1WrlyZHXfcMaVSKc8//3z233//zJ07N3379i13NADYorkHHAD4SP7whz/k29/+dpJkzZo1\nueKKK5RvAGgFI+AAAABQAPeAAwAAQAEUcAAAACiAAg4AAAAFUMABAACgAAo4AAAAFEABBwAAgAL8\nfzYvcwDmPlEGAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# z-score of rating\n", "df_z_score_rating = sqlContext.sql(\"SELECT (Rating - \" + \\\n", " str(mean_rating) + \" / \" + str(std_rating) + \\\n", " \") as Rating FROM df_ratio\") \n", "\n", "print \"\\nZ-score of rating:\"\n", "df_z_score_rating.show(5)\n", "\n", "bins, hist = df_z_score_rating.select(\"Rating\").rdd.flatMap(lambda x: x).histogram(70)\n", "hist = np.asarray(hist)\n", "bins = np.asarray(bins)\n", "width = 0.7 * (bins[1] - bins[0])\n", "center = (bins[:-1] + bins[1:]) / 2\n", "\n", "# plotting\n", "fig = plt.figure(figsize=(15, 5), dpi= 80, facecolor='w', edgecolor='k')\n", "plt.bar(center, hist, align='center', width=width)\n", "plt.title(\"Z score distribution\")\n", "plt.xlabel(\"Rating\")\n", "plt.ylabel(\"Frequency\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Xác suất\n", "- Xác suất (probability)\n", "$$probability = \\frac{event(s)}{outcome(s)}$$\n", "\n", "- Phép đếm hoán vị (permutation)\n", "$$P(n, r) = \\frac{n!}{(n - r)!}\\\\$$\n", "$$n: distinct\\ object\\ to\\ choose\\ from$$\n", "$$r: spaces\\ to\\ fill.$$\n", "\n", "- Phép đếm tổ hợp (combination)\n", "$$C(n, r) = \\frac{n!}{r!(n - r)!}$$\n", "\n", "- Xác suất có điều kiện (conditional probability)\n", "$$P(B|A) = \\frac{P(A \\cap B)}{P(A)}$$\n", "\n", "- Biến độc lập và biến phụ thuộc (independent/dependent variable)\n", " - Independent variable: \n", " * Dress_ID\n", " * Style\n", " * Price\n", " * Rating\n", " * Size\n", " * Season\n", " * NeckLine\n", " * SleeveLength\n", " * waiseline\n", " * Material\n", " * FabricType\n", " * Decoration\n", " * Pattern Type\n", " - Dependent variable: Recommendation\n", "\n", "- Bayes\n", "$$P(A|B) = \\frac{P(B|A) P(A)}{P(B)}\\\\$$\n", "$$Posterior = \\frac{Likelihood * Prior}{Evidence}$$" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "P(Season:summer) 31.8\n", "P(Style:sexy and Season:summer) 4.8\n", "P(Style:sexy|Season:summer) 15.0943396226\n" ] } ], "source": [ "# Conditional probability\n", "# P(Style:sexy|Season:summer) = P(Style:sexy and Season:summer) / P(Season:summer)\n", "num_items = df.select(\"Rating\").count()\n", "df_summer = df.select(\"Season\").where(df[\"Season\"] == \"Summer\")\n", "p_summer = df_summer.count() * 100.0 / num_items\n", "\n", "df_sexy_summer = df.select(df.Style, df.Season).where(\"Style = 'Sexy' and Season = 'Summer'\")\n", "p_sexy_summer = df_sexy_summer.count() * 100.0 / num_items\n", "\n", "p_sexy_given_summer = p_sexy_summer * 100 / p_summer\n", "\n", "print \"P(Season:summer)\", p_summer\n", "print \"P(Style:sexy and Season:summer)\", p_sexy_summer\n", "print \"P(Style:sexy|Season:summer)\", p_sexy_given_summer" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "P(Style:sexy) 13.8\n", "P(Season:summer) 31.8\n", "P(Season:summer|Style:sexy) 34.7826086957\n", "P(Style:sexy|Season:summer) Bayes: 15.0943396226\n" ] } ], "source": [ "# Bayes\n", "# P(Style:sexy|Season:summer) = P(Season:summer|Style:sexy) * P(Style:sexy) / P(Season:summer)\n", "num_items = df.select(\"Rating\").count()\n", "df_style = df.select(df.Style).where(\"Style = 'Sexy'\")\n", "p_sexy = df_style.count() * 100.0 / num_items\n", "\n", "p_summer_given_sexy = p_sexy_summer * 100 / p_sexy\n", "\n", "p_sexy_given_summer_bayes = p_summer_given_sexy * p_sexy / p_summer\n", "\n", "print \"P(Style:sexy)\", p_sexy\n", "print \"P(Season:summer)\", p_summer\n", "print \"P(Season:summer|Style:sexy)\", p_summer_given_sexy\n", "print \"P(Style:sexy|Season:summer) Bayes:\", p_sexy_given_summer_bayes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Central limit theorem\n", "- Dịch thành: Định lý giới hạn trung tâm.\n", "- Cho quần thể có phân bố bất kỳ.\n", "- Ta thực hiện lấy mẫu nhiều lần trên quần thể cho trước với số lượng xác định.\n", "- Mỗi lần lấy mẫu ta đi tính trung bình mẫu\n", "- Tổng hợp các trung bình mẫu này lại thành histogram.\n", "- Quan sát phân phối của trung bình mẫu của các mẫu ngẫu nhiên, ta thấy đây gần giống với phân phối chuẩn dù cho quần thể ban đầu có phân bố bất kỳ." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# Simulating Central limit theorem\n", "num_sample = 30.0\n", "num_loop = 1000\n", "num_items = df.select(\"Rating\").count()\n", "samp_mean_ls = []\n", "\n", "for i in range(0, num_loop):\n", " df_rating_sample = df.select(\"Rating\").sample(False, num_sample / num_items)\n", " sum_of_sample = df_rating_sample.agg(F.sum(df_rating_sample.Rating)).first()[0]\n", "\n", " x_bar = sum_of_sample * 100 / num_sample\n", " samp_mean_ls.append(x_bar) " ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA+EAAAFuCAYAAADuwBnCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAMTQAADE0B0s6tTgAAIABJREFUeJzs3X1cVHX+///n6HgNOAkK6oh4AbSJgpotaXmRm5m1q+vF\nWqZBZWituS5W0nZlbou2W5TVp49mBbnu19XUrF23UlsvNys0tbLNIB1BBVGLFBMFfP/+8Of5hAIO\nypxBfNxvt7ndPOd9Ll68PTPvec45c8ZhjDECAAAAAAA+V8/fBQAAAAAAcLkghAMAAAAAYBNCOAAA\nAAAANiGEAwAAAABgE0I4AAAAAAA2IYQDAAAAAGATQjgAAAAAADYhhAP/v+nTp+u66667qG3k5OQo\nICBAu3btqqGqvDN27FglJiZa0126dNGbb75Zo/vYsGGDAgICVFZWJqlm+qsi/upDX6lrfw8A2CUj\nI0Nut9uanjhxosaPH297HW63WxkZGZJ895qempqqQYMGWdP9+/fXY489VqP7kKS//e1vio6OrvHt\nXkgd4eHhCggI0CuvvOLvci4Z/noOoOYRwlEr9O/fXw0bNlRAQICCgoIUExOj119/3d9lVamiEBoe\nHq6ioiJ17NjRT1WdtmPHDiUkJJx3ubVr18rhcKi0tPS8y15//fUqKipS/fr1a6JESbWrDyMiItS4\ncWMFBATI5XKpZ8+eWrZsWbW2UZv+HgDw1u7du3X77berTZs2CggIUJs2bTRkyBDl5eX5u7Ry5syZ\no9dee82vNVTnNb06H1b/4Q9/0MqVKy+2vHIqCvJ33HGHdu7cWaP7qa7S0lIlJSXpueeeU1FRke6/\n/36/1lPTMjIyVK9ePQUEBCggIEBt27bVXXfdpe+++65a23E4HFq9enW5ebXhOYCaQQhHrfHwww+r\nqKhI33//vVJSUjR+/HitXbvW32VBUklJib9LsMXLL7+soqIiHTp0SGPGjNHo0aOVnZ3t77IAwKeG\nDBmiwMBAffnllyoqKtLWrVs1evRoORwOf5dW5xljvPogvC7Jz8/Xjz/+qO7du/u7FJ9p06aNioqK\nVFRUpI0bN+qTTz7R1KlT/V0WahFCOGqd+vXra+zYsQoODtaWLVskSYWFhUpKSpLb7VZISIhuvvnm\ncp/knvm0+ZFHHlGrVq0UFhamhx56yAqPHo9HDoejXKA631ng//mf/1FMTIyCgoIUFhamcePG6dCh\nQ5JOX0aVmpqqTZs2WZ90btiwocL9ZGRkWNuJiYkpd5n4meXffPNNxcbGKjAwUPHx8frqq6+q7KO/\n/OUvCg8Pl8vl0vjx43Xy5Mly7REREdYnpYWFhbrtttsUEhKioKAgRUVFacmSJcrJydHNN98sSXK5\nXAoICFBqaqq1/pNPPqnBgwcrMDBQzz33XKX9daF97u8+rIrT6dSECRNUWlqqbdu2WfNr4pg4c3nl\n3LlzFRERoebNm2vUqFE6cuSItZ+srCwNGDBAQUFB+tnPfqZ58+bJ4XDI4/Fc8N8EABU5fPiwvv76\na02cOFEtWrSQJIWGhiohIUFhYWGSpLy8PN16660KDQ1VYGCgunXrprfeeqvcdhwOh2bPnq1rr71W\nzZo1U2xsrL744gu99dZbio6OVlBQkEaOHKmioiJrnTNjzQ033KCAgADFxMTogw8+qLTWxMREjR07\nttz6M2bMsD5E6NSpk95++22r3RijWbNmlRsvf/Ob35T7+tbZioqKdM899yg4OFht27bV7Nmzy7Wf\n/Zq+fft29evXTy6XS1dccYV69uypnTt3nndMeP311xUbG6umTZtq8+bNFZ41Lyws1PDhwxUYGKjO\nnTtr/vz5VtvZl+pL5c+8T5w4URs2bNCf//xna/8VrVdcXKxp06apQ4cOuuKKK3T99dfrk08+OWc/\nVY1ZZ6tqm6tXr7Yuh4+NjVVAQIBycnLO2caZ9wuLFi1SVFSUmjZtql/+8pcqLCzU448/rtatWysk\nJERPPvlkufW+/vpr61ht27at7r//fh07dsxqf+KJJxQVFaXAwEC1a9dODzzwgH788UerPTExUbfd\ndpsmTZqk4OBghYaG6vHHH6/0b/VGhw4dNGTIEGVmZlrzzvec6tKliyTpl7/8pQICAqz3a3Y8B2AT\nA9QC/fr1M48++qgxxpiSkhIzf/5843A4zLp164wxxtx6662mf//+Ji8vzxw7dsz87ne/M2632xw9\netQYY8yTTz5pnE6neeyxx0xxcbH573//azp06GCefvppY4wxu3fvNpJMVlaWtc81a9YYSaakpMTa\nRp8+faz2JUuWmJ07d5qysjLj8XjMNddcY2677Tar/ezlK9rPkiVLTGBgoFm9erUpLS01q1atMs2a\nNTNvv/12ueUHDhxo9u/fb44fP25GjBhh+vbtW2lf/e1vfzMul8v85z//MSdPnjRz5841TqfTJCQk\nWMu0b9/ezJs3zxhjzB/+8AczZMgQc+TIEXPq1Cnj8XjMjh07KuyDn64fGhpqPvroI3Pq1Clz7Nix\nCvurpvvcV324YcMG07x5c7Nnz55K+/WnfVZcXGxmzZplJJkvvvjCWqYmjon09HRTv359M2XKFPPj\njz+a/fv3m86dO5snnnjCGHP6+I+KijL33nuvOXbsmNm7d6+Jj483kszu3bsrrR8ALlTXrl3N1Vdf\nbd544w2zfft2U1ZWVq49NzfXLF261Bw9etScPHnSvPbaa8bpdJovv/zSWkaSiYuLM7t27TInTpww\no0aNMh07djSJiYnmyJEjJj8/33Ts2NGkpqZa67Rv396EhISYjRs3mpKSEvPaa6+Zhg0bml27dhlj\nTr9etm3b1lo+ISHB3HHHHeXWb9eundmyZYspKyszzz33nAkMDDQ//PCDMcaYN99807Ro0cJ8/PHH\n1vbPHi/Pdu+995ru3bub3NxcU1RUZBITE039+vVNenq6Mebc1/TevXubp556ypSUlJiSkhKzdetW\nk5+fb4ypekzo3bu3ycnJMaWlpaa4uPicZfv162caN25s3n33XVNSUmJWrFhhGjRoYDZu3Fhh31S0\nv5++tzrj7PUmTZpkYmJiTFZWljlx4oR59tlnTUBAgMnNzbWWr2rMqsj5tlnR+4OznXm/MG7cOHPk\nyBFz4MABExkZaaKiosxLL71kSkpKzKZNm0z9+vXNRx99ZIwx5uDBgyYkJMSkpaWZ4uJic/DgQTNw\n4EAzfvx4a7vz5883e/bsMadOnTJffvml6dSpk0lJSbHaExISTMOGDc3ChQtNaWmp+eijj4zT6TT/\n/ve/rWVuueUWc99991Va+9l9/M0335ioqCgzcuRIa563z6lVq1aV27YdzwHYgxCOWqFfv36mUaNG\npnnz5iY4ONj06NHDZGRkGGOM2b9/v5Fktm3bZi1/8uRJExwcbBYuXGiMOT3wtGrVypSWllrLvPLK\nK6Zjx47GmAsPhD+1bNky06JFC2vam8A1aNAgM2XKlHLLTJ482dx0003llj/zYYMxxvzzn/80TZo0\nqbSOX/ziFyY5ObncvB49elQawqdPn25+/vOfm08//fScN1ZVhfCfDkoVLeurPrejDyvSvn1706RJ\nE9O8eXPjcDhM06ZNzfz586tc50KOifT0dNOoUSNz8uRJa5kHH3zQDB482Bhz+gODevXqmSNHjljt\n//jHPwjhAHzm0KFD5vHHHze9evUyjRo1MldccYWZOnWqKS4urnSdbt26mRdffNGallTuNXP58uVG\nktm/f781b9KkSWbYsGHWdPv27c8Zz6655hozY8YMY4x3Ifypp56ypouKiowk8/HHHxtjjBk4cKB5\n6KGHym2/Z8+elQaQsrIy06hRI/Puu+9a8woLC43D4ag0hPfv39/cc889Jjs7+5ztVTUmvP/++1Uu\n269fPzN8+PByy/zmN78xd999d4V9U9k2qgrhZWVlpkmTJmb58uXllunWrZuZOXOmtXxVY9bZvNlm\ndUJ4Tk6ONW/KlCkmKiqq3HIxMTHmhRdeMMYY89xzz5n4+Phy7Rs3bjQNGzYs917lp9LS0kyPHj2s\n6YSEBDNgwIByy1x99dVm1qxZldZ6tvT0dONwOEzz5s1NkyZNrBMFBw4cqHK9ip5T3oTwmnwOwD5c\njo5a48EHH1RhYaEOHTqkLVu2WDcWy83NlSR16tTJWrZBgwZq3759uUuY2rVrV+6mYR06dLDWvRDL\nli1T79691apVKwUFBWncuHH67rvvrLuDeyM3N7dc3ZLUuXPncy69atOmjfXvZs2a6fjx45VeJr93\n71516NCh3Lyzp3/qoYce0qBBgzR+/HgFBwdr1KhRXn3PuaptnlHTfV4RX/RhZV588UUVFhaqoKBA\n/fv3P+eGKDVxTEhSSEiIGjRoUK7eo0ePSpL27dunFi1aKDAw0GqPiIio1vYBoDqCg4M1Y8YMffrp\np/rhhx/0xhtvaN68eZo5c6Yk6fvvv9e9996rDh06KCgoSC6XSzt27FBBQUG57bRu3dr6d7NmzSqc\nd+a17oyKxrPqjCNnv/ZLKvd62r59+3LLV/V6evDgQZ04caJcTc2bN7cu069IRkaGHA6HbrjhBrnd\nbk2ZMqXcJfeV8WaMvdi+OZ9Dhw7p+PHj5x1jqxqzLnSb3jr7+Pnp9Nm1ZGVlacuWLXK5XNZjyJAh\ncjgcys/PlyTNnTtXPXr0UHBwsJo3b65HH330nOP4p8fU+f7eyrRp00aFhYUqKirSO++8o88//7zc\njQ69fU55u6+f1ipd+HMA9iGEo9Zr166dJOnbb7+15pWWlionJ0fh4eHWvNzc3HJhyOPxWN97OhNo\nfvq9oP3791e6z71792rUqFF64IEHlJOToyNHjuivf/2rpNPfr5GkevXO//Rp165dubrP/B0/rbu6\n3G73Od8Nruq7wk2bNtWMGTO0fft2ffvtt3I6ndYHHFX9Dd78fRfb5/7qw/MJCQlRRkaG3nnnHb3z\nzjuSau6YOJ+2bdvqu+++Kzfg79mz56K3CwDeaNSokYYNG6Zf/OIX+uyzzyRJKSkp+vrrr7Vu3Tr9\n8MMPKiwsVJcuXazXvotR0Xh29nedL1Tbtm3Pef2s6vW0ZcuWatSoUbmafvjhB33//feVrtO+fXvN\nmzdPe/bs0dq1a7Vq1Srrw4uLHWOr6pvAwMBy46tU/TE2JCREjRs3rtEx1hfb9FZYWJiuu+46FRYW\nWo8ffvhBxcXFatu2rTZt2qRJkybpueeeU35+vn744Qf96U9/qpHjuDL16tXTr371K9177726++67\nrX1585yqiRsjVvc5APsQwlHrtW7dWkOGDNHUqVN14MABHT9+XNOmTVPDhg11yy23WMt99913mjFj\nhk6cOKGdO3fqL3/5i+666y5Jpz/l79Chg1577TWVlpZq165devbZZyvdZ1FRkU6dOmUNJllZWdag\nekZYWJhycnJUXFxc6XbGjx+vN954Q2vXrlVZWZn+/e9/6/XXX1dSUtIF90dCQoLeeOMNffzxxyot\nLdVrr72m7du3V7r8u+++qx07dqi0tFRNmzZVkyZN5HQ6rb9B0gX/XMnF9rm/+tAbLVu21JQpU5SS\nkqKysrIaOybOJz4+Xp06ddLDDz+sH3/8Ufv377dumAcANe3ML5J8/vnnOnHihMrKyvThhx9qzZo1\n6tu3r6TTQbRp06YKDg5WSUmJXnrpJe3YsaNG9j9//nxt2rRJpaWlysjI0NatW3XHHXfUyLbHjRun\nN954Q5mZmSotLVV6enq5m22erV69eho7dqymT5+uffv26dixY5o6dWqVYSgjI0N79+6VMUZBQUFy\nOp3lxtiLGRP+9a9/acWKFSorK9P777+vt99+2xpju3fvrqNHj2rRokU6deqU1q5de87N8sLCwvTN\nN99U+ffefffdeuKJJ7Rr1y6dPHlSzz//vLKzsy/4/8AX2/TWXXfdpa1bt+qVV17Rjz/+KGOMcnNz\ntXz5ckmnj+P69eurZcuWatCggT777DO9/PLLPq3pjIceeki7du3S//t//8+q5XzPqbCwsIv+Obnq\nPgdgH0I4Lgl//etfFRERoR49esjtdmvHjh1avXp1uUt2f/7zn+vkyZNyu93q27evhg0bppSUFKt9\n/vz5Wrt2rVwul8aOHavx48dXur8rr7xSM2fO1J133qnAwEAlJCSUuxulJI0ePVrR0dFq06aNXC6X\nNm7ceM52Ro0apeeee07333+/XC6XHnjgAc2ePVvDhw+/4L6444479PDDD2vUqFEKCQnRxx9/rF//\n+teVLr97924NGzZMLpdLbdu21YEDB6zfYI+KitIDDzygAQMGyOVyadasWdWq5WL73K4+3LBhQ6V3\nYK3K73//e6u/auqYOB+n06l//OMf+u9//6vQ0FANHDhQY8aMkSQ1bty42tsDgKo0bNhQhw4dssaU\n4OBg/e53v9O0adOsn1R6+umndfz4cYWGhioiIkIHDhxQnz59amT/EydO1KOPPiqXy6W//OUvevvt\nt8+5lPlC3Xnnnfr973+v4cOHKyQkRBs3btStt95a5Wvp888/r65du6pr166KiopS165drQ+sK7Jm\nzRpdc801CggIUGxsrK699lpNmzZN0sWPCXfffbdef/11uVwu/fa3v9WcOXN0/fXXS5I6duyol19+\nWQ8++KBcLpfmzp1rBfQzpk6dqp07d+qKK66Qy+WqcB/PPvusBg0apAEDBqhVq1ZaunSpVq1aZV2F\neCF8sU1vhIeHa9OmTVq1apU6deokl8ulm266SV988YUkadCgQZo4caL69++v5s2b6w9/+IN1ZWB1\n3HzzzZo4cWK11nG5XEpOTtZjjz2mkydPevWcmjlzpp555hm5XC7deuut1a5TurDnAOzhML68BgOw\nyfTp07V69eoLCj1Abbd8+XLddtttOn78OL/bC6DOiIiI0GOPPVblh+I1LS4uTqNHj9Yjjzxi2z6B\n2oTnQO3AmXAAqGU2bdqkb775RsYY7dy5U0888YTGjBlDAAeAalq0aJGOHz+u4uJiPf/88/rqq680\natQof5cF2IbnQO1ECAeAWiYvL0+DBg1Ss2bNNHDgQMXHx+v555/3d1kAcMmZN2+ewsLC1LJlSy1Y\nsEDvvPOOOnfu7O+yANvwHKiduBwdAAAAAACbcCYcAAAAAACbEMIBAAAAALCJ098F1KRGjRqpZcuW\n/i4DAADLwYMHdeLECX+XUacw3gMAapPqjvV1KoS3bNlSe/fu9XcZAABY3G63v0uocxjvAQC1SXXH\nei5HBwAAAADAJoRwAAAAAABsQggHAAAAAMAmhHAAAAAAAGxCCAcAAAAAwCaEcAAAAAAAbEIIBwAA\nAADAJoRwAAAAAABsQggHAAAAAMAmPg/hkydPVkREhBwOh7Zt2yZJKi4u1rBhwxQVFaXY2FjdeOON\nys7OttYpKCjQ4MGDFRkZqZiYGK1fv97XZQIAAAAA4HM+D+EjR47Uxo0b1b59+3Lzk5KStHPnTm3f\nvl1Dhw7V+PHjrbaUlBTFx8crKytL6enpGjNmjEpKSnxdKgAAAAAAPuXzEN63b1+53e5y8xo3bqwh\nQ4bI4XBIkuLj4+XxeKz2xYsXa+LEiZKkXr16qU2bNlq3bp2vSwUAAAAAwKec/i5AkmbPnq2hQ4dK\nkg4fPqySkhKFhYVZ7REREcrJyTlnvbS0NKWlpVnTRUVFNVpXRMoK69+eWbfU6LYBAABwefrpe0w7\n8D4WqF38HsJTU1OVnZ2tDz/8sNrrJicnKzk52Zo++4w7AAAAAAC1iV/vjv7ss89q2bJleu+999S0\naVNJUnBwsJxOp/Lz863lPB6PwsPD/VUmAAAAAAA1wm8hPC0tTQsXLtSqVavkcrnKtY0aNUpz5syR\nJGVmZmrfvn3q16+fP8oEAAAAAKDG+Pxy9AkTJmjFihXKz8/XTTfdpMDAQK1du1ZTp05Vx44dNWDA\nAElSo0aN9Mknn0iSnnnmGY0bN06RkZFq2LChFixYoAYNGvi6VAAAAAAAfMrnIXzu3LkVzjfGVLpO\naGioVq5c6auSAAAAAADwC79+JxwAAAAAgMsJIRwAAAAAAJsQwgEAAAAAsAkhHAAAAAAAmxDCAQAA\nAACwCSEcAAAAAACbEMIBAAAAALAJIRwAAAAAAJsQwgEAAAAAsAkhHAAAAAAAmxDCAQAAAACwCSEc\nAAAAAACbEMIBAAAAALAJIRwAAAAAAJsQwgEAAAAAsAkhHAAAAAAAmxDCAQAAAACwCSEcAAAAAACb\nEMIBAAAAALAJIRwAAAAAAJsQwgEAAAAAsAkhHAAAAAAAmxDCAQAAAACwCSEcAAAAAACbEMIBAAAA\nALAJIRwAAAAAAJsQwgEAAAAAsAkhHAAAAAAAmxDCAQAAAACwCSEcAAAAAACbEMIBAAAAALAJIRwA\nAFyU4uJiDRs2TFFRUYqNjdWNN96o7OxsSVJBQYEGDx6syMhIxcTEaP369dZ6VbUBAFBXEcIBAMBF\nS0pK0s6dO7V9+3YNHTpU48ePlySlpKQoPj5eWVlZSk9P15gxY1RSUnLeNgAA6ipCOAAAuCiNGzfW\nkCFD5HA4JEnx8fHyeDySpMWLF2vixImSpF69eqlNmzZat27dedsAAKirnP4uAAAA1C2zZ8/W0KFD\ndfjwYZWUlCgsLMxqi4iIUE5OTpVtZ0tLS1NaWpo1XVRU5Ns/AHVCRMoK2/blmXWLbfsCcOnjTDgA\nAKgxqampys7O1syZM2tsm8nJydq7d6/1CAgIqLFtAwBgN0I4AACoEc8++6yWLVum9957T02bNlVw\ncLCcTqfy8/OtZTwej8LDw6tsAwCgLiOEAwCAi5aWlqaFCxdq1apVcrlc1vxRo0Zpzpw5kqTMzEzt\n27dP/fr1O28bAAB1Fd8JBwAAF2Xv3r2aOnWqOnbsqAEDBkiSGjVqpE8++UTPPPOMxo0bp8jISDVs\n2FALFixQgwYNJKnKNgAA6iqfh/DJkyfr3Xff1Z49e7R161bFxcVJkrKyspSQkKBDhw6pefPmysjI\nUJcuXc7bBgAAahe32y1jTIVtoaGhWrlyZbXbAACoq3x+OfrIkSO1ceNGtW/fvtz8CRMmKCkpSd98\n842mTZumxMREr9oAAAAAALhU+TyE9+3bV263u9y8goICbd68WWPHjpUkjRgxQrm5ucrOzq6yDQAA\nAACAS5lfbsyWm5ur1q1by+k8fTW8w+FQeHi4cnJyqmw7W1pamtxut/Xgd0MBAAAAALXZJX13dH43\nFAAAAABwKfHL3dHbtWunvLw8lZaWyul0yhijnJwchYeHKygoqNI2AAAAAAAuZX45E96qVSv16NFD\nCxYskCQtXbpUbrdbnTt3rrINAAAAAIBLmc/PhE+YMEErVqxQfn6+brrpJgUGBio7O1tz585VYmKi\nUlNTFRQUpPT0dGudqtoAAAAAALhU+TyEz507t8L50dHR2rRpU7XbAAAAAAC4VF3SN2YDAAAAAOBS\nQggHAAAAAMAmhHAAAAAAAGxCCAcAAAAAwCaEcAAAAAAAbEIIBwAAAADAJoRwAAAAAABsQggHAAAA\nAMAmhHAAAAAAAGxCCAcAAAAAwCaEcAAAAAAAbOL0dwEAAAAAao+IlBW27s8z6xZb9wf4G2fCAQAA\nAACwCSEcAAAAAACbEMIBAAAAALAJIRwAAAAAAJsQwgEAAAAAsAkhHAAAAAAAmxDCAQAAAACwCSEc\nAAAAAACbEMIBAAAAALAJIRwAAAAAAJsQwgEAAAAAsAkhHAAAAAAAmxDCAQAAAACwCSEcAAAAAACb\nEMIBAAAAALAJIRwAAAAAAJsQwgEAAAAAsAkhHAAAAAAAmxDCAQAAAACwCSEcAAAAAACbOP1dAAAA\n3opIWVFu2jPrFj9VAgAAcGE4Ew4AAAAAgE0I4QAAAAAA2IQQDgAAAACATQjhAAAAAADYhBAOAAAA\nAIBNCOEAAAAAANjEryH8X//6l3r06KG4uDjFxMTozTfflCQVFBRo8ODBioyMVExMjNavX+/PMgEA\nAAAAqBF++51wY4zGjh2rtWvXqlu3bvJ4PLryyis1fPhwpaSkKD4+Xu+//74yMzP161//Wrt371aD\nBg38VS4AAAAAABfNr2fCHQ6HCgsLJUlHjhxRcHCwGjVqpMWLF2vixImSpF69eqlNmzZat26dP0sF\nAAAAAOCi+e1MuMPh0KJFizR8+HA1a9ZM33//vZYtW6ajR4+qpKREYWFh1rIRERHKyck5ZxtpaWlK\nS0uzpouKimypHQAAAACAC+G3M+GlpaV6+umntWzZMu3Zs0cffvihxo0bp9LSUq+3kZycrL1791qP\ngIAAH1YMAAAAAMDF8VsI37Ztm/bv36++fftKOn3Zudvt1ueffy6n06n8/HxrWY/Ho/DwcH+VCgAA\nAABAjfBbCG/Xrp3y8vL03//+V5KUnZ2tb7/9VtHR0Ro1apTmzJkjScrMzNS+ffvUr18/f5UKAACq\nMHnyZEVERMjhcGjbtm3W/IiICEVHRysuLk5xcXFatGiR1ZaVlaXevXsrKipKvXr10o4dO/xROgAA\ntvPbd8JDQ0P16quv6je/+Y3q1aunU6dO6eWXX1Z4eLieeeYZjRs3TpGRkWrYsKEWLFjAndEBAKil\nRo4cqYcffljXXXfdOW2LFi1SXFzcOfMnTJigpKQkJSYmasmSJUpMTFRmZqYd5QIA4Fd+C+GSdPvt\nt+v2228/Z35oaKhWrlzph4oAAEB1nflqmbcKCgq0efNma6wfMWKEJk2apOzsbHXu3NkXJQIAUGv4\n9SfKAABA3XbnnXeqa9euuueee3Tw4EFJUm5urlq3bi2n8/S5AIfDofDw8Ap/CUU6/WsobrfbevBr\nKACASxkhHAAA+MT69ev1+eef67PPPlNISIgSEhIuaDv8GgoAoC7x6+XoAACg7jrzyyYNGjTQlClT\nFBUVJen/bs5aWloqp9MpY4xycnL4JRQAwGWBM+EAAKDGHTt2TIWFhdb0woUL1b17d0lSq1at1KNH\nDy1YsECStHTpUrndbr4PDgC4LHAmHAAAXJQJEyZoxYoVys/P10033aTAwECtXLlSI0aMUFlZmYwx\n6tixo+ZbOmr3AAAgAElEQVTPn2+tM3fuXCUmJio1NVVBQUFKT0/3418AAIB9COEAAOCizJ07t8L5\nW7durXSd6Ohobdq0yVclAQBQa3E5OgAAAAAANiGEAwAAAABgE0I4AAAAAAA28TqEf/DBB76sAwAA\nAACAOs/rED5jxgxFR0dr9uzZOnLkiC9rAgAAAACgTvI6hP/nP//R3//+d3355ZeKiorS/fffr6++\n+sqXtQEAAAAAUKdU6zvh3bt317x58/T+++/rn//8p7p166Ybb7xRX3zxha/qAwAAAACgzqhWCF+9\nerWGDh2q4cOH67e//a3y8/M1YcIE/frXv/ZVfQAAAAAA1BlObxf82c9+ppCQEE2ePFnDhw9X/fr1\nJUkjR47U66+/7rMCAQAAAACoK7wO4QsWLFDPnj0rbHvvvfdqrCAAAAAAAOoqry9H37Jli7777jtr\n+vDhw5o3b55PigIAAAAAoC7yOoS/8soratGihTUdHBysV155xSdFAQAAAABQF3l9Obox5px5ZWVl\nNVoMAAAALg0RKSts3Z9n1i227g8AfMXrM+GtW7fW4sWLrelFixapdevWPikKAAAAAIC6yOsz4S+8\n8IKGDh2qhx9+WJLUtGlTvfPOOz4rDAAAAACAusbrEH7llVfqq6++0s6dOyVJ0dHR1s+UAQAAAACA\n8/M6hEuSw+GQy+VSaWmp9u3bJ0kKDw/3SWEAAAAAANQ1XofwjIwMTZ48WQ0aNFC9eqe/Su5wOFRQ\nUOCz4gAAAAAAqEu8DuF//OMflZmZqejoaF/WAwAAAABAneX13dFDQkII4AAAAAAAXASvQ/iwYcP0\nwgsvqKCgQEeOHLEeAAAAAADAO15fjv7oo49KkpKTk+VwOGSMkcPhUFlZmc+KAwAAAACgLvE6hJ86\ndcqXdQAAAAAAUOd5fTm6JG3ZskV//etfJUmFhYXKy8vzSVEAAAAAANRFXofwV155RXfffbemT58u\nSTp8+LDGjBnjq7oAAAAAAKhzvL4c/dVXX9XHH3+s3r17S5I6deqkgwcP+qwwAAAAADhbRMoK2/bl\nmXWLbfvC5cPrM+GNGjVSkyZNys1zOr3O8AAAAAAAXPa8DuEtW7bUN998I4fDIUnKyMhQeHi4zwoD\nAAAAAKCu8fpU9gsvvKDbb79dX3/9tdq1a6egoCD985//9GVtAAAAAADUKV6H8M6dO+uTTz7Rzp07\nZYxRdHS06tev78vaAAAAAACoU7wO4Tk5OZKkZs2aSZL27dsnSVySDgAAAACAl7wO4T179pTD4ZAx\nRsXFxfrxxx8VHBysgoICX9YHAAAAAECd4XUIP/vnyJYtW6bt27fXeEEAAAAAANRVXt8d/WzDhw/X\nihUX9xt9J06c0KRJkxQZGamuXbtq7NixkqSsrCz17t1bUVFR6tWrl3bs2HFR+wEAAAAAoDbw+kz4\nkSNHrH+XlZXpk08+KTfvQqSkpMjhcFg/fZafny9JmjBhgpKSkpSYmKglS5YoMTFRmZmZF7UvAAAA\nAAD8zesQ7nK5rO+E169fX5GRkXrxxRcveMfHjh3T66+/rr1791q/PR4WFqaCggJt3rxZK1eulCSN\nGDFCkyZNUnZ2tjp37nzB+wMAAAAAwN+8DuGnTp2q0R1/++23atGihVJTU7V69Wo1adJE06dPl8vl\nUuvWreV0ni7N4XAoPDxcOTk554TwtLQ0paWlWdNFRUU1WiMAAAAAADXpgr8TfrFKS0u1Z88eXXXV\nVdq8ebNefPFFjR49WqWlpV5vIzk5WXv37rUeAQEBPqwYAAAAAICL4/WZ8Hr16lmXjf+UMUYOh0Nl\nZWXV2nF4eLjq1aunO+64Q5LUvXt3dejQQXv27FFeXp5KS0vldDpljFFOTg6/Rw4AAAAAuOR5fSZ8\nxowZSklJ0Z49e7Rnzx498sgjmjFjho4ePXpBN2gLCQnRwIED9cEHH0iSdu/erd27d6tPnz7q0aOH\nFixYIElaunSp3G433wcHAAAAAFzyvD4T/vbbb2vLli3W9NNPP62ePXvq0UcfveCdz5kzR/fcc4+m\nTZumevXqae7cuWrbtq3mzp2rxMREpaamKigoSOnp6Re8DwAAAAAAaguvQ/jRo0dVUFCgVq1aSZIK\nCgp09OjRi9p5x44dtWbNmnPmR0dHa9OmTRe1bQAAAAAAahuvQ/jUqVMVGxurIUOGSJLef/99TZ8+\n3Vd1AQAAAABQ53gdwidMmKA+ffpYZ66Tk5PVpUsXnxUGAMDFiEhZUW7aM+sWP1UCAADwf7wO4ZIU\nHBysrl27qn///iotLdXJkyfVsGFDX9UGAAAAAECd4vXd0ZcsWaL4+HjdddddkqQdO3Zo2LBhPisM\nAAAAAIC6xusQPnPmTH322WdyuVySpNjYWO3Zs8dnhQEAAAAAUNd4HcLr16+v4ODgcvO4FB0AAAAA\nAO95/Z3wwMBAHThwQA6HQ5L04YcfqkWLFj4rDAAAu3ATNwAAYBevQ/isWbN08803a9euXbruuuu0\ne/durVix4vwrAgAAAAAASdUI4b169dKaNWv00UcfyRij3r17W98PBwCgJnFmGgAA1FVehfCysjJ1\n7dpVX331lW6++WZf1wQAAAAAQJ3k1Y3Z6tevr5YtW+rHH3/0dT0AAAAAANRZXt8dvXPnzurTp49S\nU1P14osvWg8AAHB5mzx5siIiIuRwOLRt2zZrflZWlnr37q2oqCj16tVLO3bs8KoNAIC6zOsQfurU\nKcXFxSkrK0tbt27V1q1byw20AADg8jRy5Eht3LhR7du3Lzd/woQJSkpK0jfffKNp06YpMTHRqzYA\nAOqy834nPCkpSa+++qrS09P1zjvvaOjQoXbUBQAALhF9+/Y9Z15BQYE2b96slStXSpJGjBihSZMm\nKTs7W0FBQZW2de7c2dbaAQCw23nPhG/evNn691NPPeXTYgAAQN2Qm5ur1q1by+k8/Xm/w+FQeHi4\ncnJyqmyrSFpamtxut/UoKiqy7e8AAKCmeX05uiQZY3xVBwAAQIWSk5O1d+9e6xEQEODvkgAAuGDn\nvRz9+PHj+uKLL2SMUXFxsfXvM7p16+bTAgEAwKWnXbt2ysvLU2lpqZxOp4wxysnJUXh4uIKCgipt\nAwCgrvMqhP/qV7+ypn/6b4fDoV27dvmmMgAAcMlq1aqVevTooQULFigxMVFLly6V2+22vvNdVRsA\nAHXZeUO4x+OxoQwAAHCpmjBhglasWKH8/HzddNNNCgwMVHZ2tubOnavExESlpqYqKChI6enp1jpV\ntQEAUJedN4QDAABUZe7cuRXOj46O1qZNm6rdBgBAXVatG7MBAAAAAIALRwgHAAAAAMAmhHAAAAAA\nAGxCCAcAAAAAwCaEcAAAAAAAbEIIBwAAAADAJoRwAAAAAABsQggHAAAAAMAmhHAAAAAAAGxCCAcA\nAAAAwCaEcAAAAAAAbEIIBwAAAADAJoRwAAAAAABsQggHAAAAAMAmhHAAAAAAAGxCCAcAAAAAwCaE\ncAAAAAAAbEIIBwAAAADAJoRwAAAAAABsUitCeHp6uhwOh5YvXy5JKigo0ODBgxUZGamYmBitX7/e\nzxUCAAAAAHDx/B7CPR6P5s2bp/j4eGteSkqK4uPjlZWVpfT0dI0ZM0YlJSV+rBIAAAAAgIvn1xB+\n6tQpjR8/Xi+99JIaNWpkzV+8eLEmTpwoSerVq5fatGmjdevW+atMAAAAAABqhF9DeFpamvr06aOe\nPXta8w4fPqySkhKFhYVZ8yIiIpSTk1Ph+m6323oUFRXZUjcAAAAAABfC6a8df/nll1q6dOlFfd87\nOTlZycnJ1rTb7a6J0gAAAAAA8Am/nQnfsGGDPB6PIiMjFRERoY8//lhJSUlavHixnE6n8vPzrWU9\nHo/Cw8P9VSoAAAAAADXCbyH8vvvuU15enjwejzwej+Lj4/Xqq6/qvvvu06hRozRnzhxJUmZmpvbt\n26d+/fr5q1QAAAAAAGqE3y5Hr8ozzzyjcePGKTIyUg0bNtSCBQvUoEEDf5cFAAAAAMBFqTUhfO3a\ntda/Q0NDtXLlSv8VAwAAAACAD/j9d8IBAAAAALhcEMIBAAAAALAJIRwAAAAAAJsQwgEAAAAAsAkh\nHAAAAAAAmxDCAQAAAACwCSEcAAAAAACbEMIBAAAAALCJ098FAAAubxEpK8pNe2bd4qdKAAAAfI8z\n4QAAAAAA2IQQDgAAAACATQjhAAAAAADYhBAOAAAAAIBNCOEAAAAAANiEEA4AAAAAgE0I4QAAAAAA\n2IQQDgAAAACATQjhAAAAAADYhBAOAAAAAIBNnP4uAAAAAADqgoiUFbbuzzPrFlv3h5rBmXAAAAAA\nAGxCCAcAAAAAwCaEcAAAAAAAbEIIBwAAAADAJoRwAAAAAABswt3RAQA17uy7w3L31stbRESEGjVq\npCZNmkiSHnnkEY0ePVpZWVlKSEjQoUOH1Lx5c2VkZKhLly5+rhYAAN8ihAMAAJ9btGiR4uLiys2b\nMGGCkpKSlJiYqCVLligxMVGZmZl+qhAAAHtwOToAALBdQUGBNm/erLFjx0qSRowYodzcXGVnZ/u5\nMgAAfIsQDgAAfO7OO+9U165ddc899+jgwYPKzc1V69at5XSevijP4XAoPDxcOTk556yblpYmt9tt\nPYqKiuwuHwCAGsPl6AAAwKfWr1+v8PBwlZSU6LHHHlNCQoL++Mc/er1+cnKykpOTrWm32+2LMmud\ns++t4GvcuwEA7EEIBwBcNrhhnH+Eh4dLkho0aKApU6YoKipK7dq1U15enkpLS+V0OmWMUU5OjrUs\nAAB1FZejAwAAnzl27JgKCwut6YULF6p79+5q1aqVevTooQULFkiSli5dKrfbrc6dO/urVAAAbMGZ\ncAAA4DMHDhzQiBEjVFZWJmOMOnbsqPnz50uS5s6dq8TERKWmpiooKEjp6el+rhYAAN8jhAMAAJ/p\n2LGjtm7dWmFbdHS0Nm3aZHNFAAD4F5ejAwAAAABgE0I4AAAAAAA2IYQDAAAAAGATQjgAAAAAADYh\nhAMAAAAAYBO/hfDi4mINGzZMUVFRio2N1Y033qjs7GxJUkFBgQYPHqzIyEjFxMRo/fr1/ioTAAAA\nAIAa49cz4UlJSdq5c6e2b9+uoUOHavz48ZKklJQUxcfHKysrS+np6RozZoxKSkr8WSoAAAAAABfN\nbyG8cePGGjJkiBwOhyQpPj5eHo9HkrR48WJNnDhRktSrVy+1adNG69at81epAAAAAADUCKe/Czhj\n9uzZGjp0qA4fPqySkhKFhYVZbREREcrJyTlnnbS0NKWlpVnTRUVFttQKADgtImVFuWnPrFv8VAkA\nAMCloVbcmC01NVXZ2dmaOXNmtdZLTk7W3r17rUdAQICPKgQAAAAA4OL5PYQ/++yzWrZsmd577z01\nbdpUwcHBcjqdys/Pt5bxeDwKDw/3Y5UAAAAAAFw8v4bwtLQ0LVy4UKtWrZLL5bLmjxo1SnPmzJEk\nZWZmat++ferXr5+/ygQAAAAAoEb47Tvhe/fu1dSpU9WxY0cNGDBAktSoUSN98skneuaZZzRu3DhF\nRkaqYcOGWrBggRo0aOCvUgEAAAAAqBF+C+Fut1vGmArbQkNDtXLlSpsrAgAAAADAt/z+nXAAAAAA\nAC4XhHAAAAAAAGxCCAcAAAAAwCaEcAAAAAAAbEIIBwAAAADAJoRwAAAAAABsQggHAAAAAMAmhHAA\nAAAAAGxCCAcAAAAAwCaEcAAAAAAAbEIIBwAAAADAJoRwAAAAAABsQggHAAAAAMAmhHAAAAAAAGxC\nCAcAAAAAwCaEcAAAAAAAbEIIBwAAAADAJoRwAAAAAABsQggHAAAAAMAmhHAAAAAAAGxCCAcAAAAA\nwCaEcAAAAAAAbEIIBwAAAADAJoRwAAAAAABsQggHAAAAAMAmTn8XAADwvYiUFeWmPbNu8VMlAAAA\nlzfOhAMAAAAAYBPOhAMAyuGsOQAAgO9wJhwAAAAAAJtwJhwAAAAALgNnX+3ma1xNVzFCOABcxrj0\nHAAAwF5cjg4AAAAAgE0I4QAAAAAA2IQQDgAAAACATfhOOAAAuGxxkyIAqB0up9djzoQDAAAAAGAT\nQjgAAAAAADbhcnQA8BN+HgwAAODyw5lwAAAAAABsUmvPhGdlZSkhIUGHDh1S8+bNlZGRoS5duvi7\nLJwHZ/Zgh9p8nFVUW22uF/A3xnsAwOWm1p4JnzBhgpKSkvTNN99o2rRpSkxM9HdJAACghjHeAwAu\nN7UyhBcUFGjz5s0aO3asJGnEiBHKzc1Vdna2nysDAAA1hfEeAHA5chhjjL+LONuWLVs0ZswY7dy5\n05p3zTXXaNasWbrhhhuseWlpaUpLS7Om8/PzFRYWViM1FBUVKSAgoEa2dbmgz6qPPqs++qz66LML\nU1P9dvDgQZ04caIGKqp7asN4X5vwXLUH/Wwf+toe9LN9Kuvr6o71tfY74d5ITk5WcnKyT7btdru1\nd+9en2y7rqLPqo8+qz76rProswtDv9UevhzvaxOOOXvQz/ahr+1BP9unpvq6Vl6O3q5dO+Xl5am0\ntFSSZIxRTk6OwsPD/VwZAACoKYz3AIDLUa0M4a1atVKPHj20YMECSdLSpUvldrvVuXNnP1cGAABq\nCuM9AOByVH/69OnT/V1ERa699lo9/vjj+vOf/6zMzEylp6crNDTU9hpQPfRZ9dFn1UefVR99dmHo\nN9+rDeN9bcIxZw/62T70tT3oZ/vURF/XyhuzAQAAAABQF9XKy9EBAAAAAKiLCOEAAAAAANiEEH6W\nrKws9e7dW1FRUerVq5d27Njh75JqvcmTJysiIkIOh0Pbtm3zdzm1XnFxsYYNG6aoqCjFxsbqxhtv\nVHZ2tr/LuiQMGjRI3bp1U1xcnK6//npt3brV3yVdEtLT0+VwOLR8+XJ/l3JJiIiIUHR0tOLi4hQX\nF6dFixb5uyRc4qp63S8oKNDgwYMVGRmpmJgYrV+/3lqvqjZUrrKxoqr3eLz/u3BnjzEc0zWvsnGJ\nY7pmnThxQpMmTVJkZKS6du2qsWPHSvJRPxuUM2DAAJOenm6MMeatt94yV199tX8LugSsW7fO5Obm\nmvbt25utW7f6u5xa7/jx42bFihXm1KlTxhhjXnrpJdOvXz//FnWJ+P77761/L1u2zHTr1s2P1Vwa\ndu/eba699loTHx9v3n77bX+Xc0ngtQw1rarX/bvuuss8+eSTxhhjPv30U9O2bVtz8uTJ87ahcpWN\nFVW9x+P934WpaIzhmK55lY1LHNM1a8qUKWbSpEnWa3VeXp4xxjf9TAj/iQMHDpjAwEBTUlJijDHm\n1KlTJjQ01GRlZfm5sksDb1wvTGZmpmnfvr2/y7jkpKenm9jYWH+XUauVlZWZgQMHms2bN5t+/foR\nwr3Eaxl87aev+82aNbPe6BljTK9evcyqVavO2wbvnBkrqnqPx/u/C1PZGMMxXfMqGpc4pmtWUVGR\nCQwMND/88EO5+b7qZ6cvTuVfqnJzc9W6dWs5nae7xeFwKDw8XDk5OfxmKXxm9uzZGjp0qL/LuGTc\neeedWrNmjSTpX//6l5+rqd3S0tLUp08f9ezZ09+lXHLuvPNOGWN0zTXXaNasWWrZsqW/S0IdcuZ1\n//DhwyopKVFYWJjVFhERoZycnCrbcH5njxVVvcdr3rw57/8uQEVjDMe075w9LnFM16xvv/1WLVq0\nUGpqqlavXq0mTZpo+vTpcrlcPulnvhMO+FFqaqqys7M1c+ZMf5dyyZg/f75yc3P19NNPa9q0af4u\np9b68ssvtXTpUj322GP+LuWSs379en3++ef67LPPFBISooSEBH+XhDqE1317MFb4FmOMvRiXfK+0\ntFR79uzRVVddpc2bN+vFF1/U6NGjVVpa6pP9EcJ/ol27dsrLy7M62xijnJwchYeH+7ky1EXPPvus\nli1bpvfee09Nmzb1dzmXnISEBK1Zs0aHDx/2dym10oYNG+TxeBQZGamIiAh9/PHHSkpK0v/+7//6\nu7Ra78xrfoMGDTRlyhRt2LDBzxWhrjj7dT84OFhOp1P5+fnWMh6PR+Hh4VW2wXtnxgq3213pezze\n/1VfZWPM4sWLOaZ9oKJxqarjlmO6+sLDw1WvXj3dcccdkqTu3burQ4cO2rNnj0/6mRD+E61atVKP\nHj20YMECSdLSpUvldru5bAM1Li0tTQsXLtSqVavkcrn8Xc4lobCwUPv377emly9fruDgYLVo0cKP\nVdVe9913n/Ly8uTxeOTxeBQfH69XX31V9913n79Lq9WOHTumwsJCa3rhwoXq3r27HytCXVHZ6/6o\nUaM0Z84cSVJmZqb27dunfv36nbcNFatsrKjqPR7v/6qvqjGGY7pmVTYucUzXrJCQEA0cOFAffPCB\nJGn37t3avXu3+vTp45t+rrFvs9cRX3/9tYmPjzeRkZGmZ8+e5vPPP/d3SbVeUlKSadu2ralfv75p\n1aqV6dSpk79LqtVyc3ONJNOxY0cTGxtrYmNjzTXXXOPvsmo9j8djevXqZWJiYky3bt3MwIEDuXlW\nNXBjNu98++23Ji4uznTt2tXExMSYX/3qV2b37t3+LguXuKpe9/Pz882NN95oOnfubK666irz73//\n21qvqjZUrKqxoqr3eLz/uzg/HWM4pmtWVeMSx3TN+vbbb03//v2t148lS5YYY3zTzw5jjPHRBwoA\nAAAAAOAnuBwdAAAAAACbEMIBAAAAALAJIRwAAAAAAJsQwgEAAAAAsAkhHAAAAAAAmxDCAQAAAACw\nCSEcqIMiIiIUHR2tuLg4RUdHa9asWeddZ9u2bfr73/9ebl5cXJyOHj3qqzIBALgsLFu2TD179lRc\nXJyuvPJK3XDDDTp16pQt+/Z4PHK5XLbsC4B3nP4uAIBvLFq0SHFxcdq3b5+uuuoq3XDDDbrmmmsq\nXX7btm1avny5brvttnLzAADAhcvLy1NSUpK2bNmi9u3bS5I+++wzORwOP1cGwF84Ew7UcW3bttWV\nV16pPXv2KD8/XwMGDFDPnj3VpUsXTZo0SadOnVJBQYGeeOIJrVmzRnFxcZo4caIkyeFwqLCwUNLp\ns+tPPPGErr32WnXo0EFPP/20tY+vv/5a1157rbp06aLhw4dr0KBBysjI8MefCwBArXLgwAHVr19f\nLVq0sOb16NHDCuEPPvigevXqpbi4OPXt21c7d+60lnM4HPrTn/6kn//854qIiNDy5cs1c+ZMXX31\n1YqMjNTatWsl/d/Z7gcffFDdunVTly5dtHr16grryczM1A033KCrr75a3bt311tvvVXhcomJiUpK\nStIvfvELdejQQXfffbc+/fRT9e/f//9r7/5CmmrjOIB//TMaqBMho3/2x0Ust7VdzGFRQSWtiJpI\nZBRlXUVQN0LYhd0EeRGBQUE30aB/gtBFQXSRQcFamYESUonRibCxFjXMlamz73shHlj1vs3++L7u\n/X6udjjPec7zOze//XaeZw/Ky8vR0NBgto3FYti+fTv8fj/cbjeamprMcz+Kr7m5GX6/H4sXL0Yo\nFJr8AxaZjigiWWfhwoXs6uoiST59+pR2u53xeJxDQ0McHBwkSaZSKW7evJmtra0kyVAoxGAwmNYP\nACYSCbPPQ4cOkSTfvn1Lm83G/v5+kqTP5+P58+dJkk+ePOGMGTMYCoX+eJwiIiL/dWNjY6ytrWVJ\nSQlramp44sQJM3+SZDweNz+3trYyEAiYxwB46tQpkmR7ezsLCgrM/NrW1kafz0eSNAyDAHju3DmS\n5P3791laWsoPHz7QMAwWFxeTJBOJBL1eL6PRKMnxfF5WVpY2ngn19fWsqqri0NAQh4eHabfbWVNT\nw5GRESaTSc6aNYs9PT0kyQ0bNvDOnTskydHRUQYCAba1tWUU38mTJ0mOf18pLCzk6OjopJ+xyHSj\n6egiWaqurg65ubno7e1FS0sLSktL8enTJzQ2NiIcDoMk4vE4XC5X2hT0f7Jz504AwMyZM1FeXg7D\nMFBUVITu7m7s2bMHALBs2TKsWrXqj8UlIiIyneTm5uLq1at49uwZ7t69i5s3b+L48eN49OgRlixZ\nglu3buH06dMYHBzEly9f8P79+7Tr6+rqAAA+nw8fP340c7bf70dfX5/ZLj8/H3v37gUAVFVVYe7c\nuejq6sKCBQvMNpFIBC9evMCmTZvS7tHb24t58+Z9M/ZgMAir1QoAcLvdCAQCsFgssFgsqKioQF9f\nHxYtWoTbt2/jzZs35nXJZNJ84/2j+Hbt2gUAcDgcyM/PRywWw/z58zN/wCLTkIpwkSw1sSa8vb0d\nW7Zswbp163Dt2jXE43F0dHTAarWioaEBnz9/zrjPiUQMAHl5eUilUt9tp3VuIiIi6RwOBxwOB/bv\n34+NGzfi+vXr2LZtGw4ePIjOzk7Y7XY8fvwYa9asSbtuIvfm5eV9c/x3eXjC1/mYJJxOJyKRSEZj\n/jrvf+97AEkAwIMHD9LOA8CrV68yji/TmESygdaEi2S56upqHDhwAE1NTUgkEpg9ezasVitisVja\nOjCbzYaBgYFJ92+z2eDxeHDp0iUA47+mh8Ph3zZ+ERGR6ez169e4d++eeZxIJGAYBux2OwYGBmCx\nWDBnzhyQxJkzZ376PqlUChcvXgQAPHz4ENFoFF6vN63NypUrYRhG2nrx7u5ujIyM/PR9CwsLsXbt\n2rSdWKLRKPr7+39rfCLZREW4yP/A0aNHEQ6HsXXrVnR0dMDpdGL37t2orq4226xfvx7Dw8NYvny5\n+cdsmbpw4QLOnj0Ll8uFxsZGVFZWajsUERERjBfHx44dw9KlS+H1erF69WrU19cjGAzC7XZjx44d\ncDqdqKysTJs6PlnFxcXo6emBx+PBvn37cOXKFRQVFaW1KSkpwY0bN9Dc3AyPx4OKigocOXLkl7dL\nu3z5Mp4/fw6XywW3243a2lq8e/fut8Ynkk1yODGHRETkJyWTSRQUFCAnJweGYWDFihXo7OxEWVnZ\nv/kS0NAAAACCSURBVD00ERGRrPfy5Ut4vV5zRxMR+W/TmnAR+WWRSASHDx8GAIyNjaGlpUUFuIiI\niIjId+hNuIiIiIiIiMgU0ZpwERERERERkSmiIlxERERERERkiqgIFxEREREREZkiKsJFRERERERE\npoiKcBEREREREZEpoiJcREREREREZIr8BTe670xxn55aAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# ploting population\n", "fig = plt.figure(figsize=(15, 5), dpi= 80, facecolor='w', edgecolor='k')\n", "plt.subplot(1, 2, 1)\n", "bins, hist = df_ratio.select(\"Rating\").rdd.flatMap(lambda x: x).histogram(70)\n", "hist = np.asarray(hist)\n", "bins = np.asarray(bins)\n", "width = 0.7 * (bins[1] - bins[0])\n", "center = (bins[:-1] + bins[1:]) / 2\n", "\n", "plt.bar(center, hist, align='center', width=width)\n", "plt.title(\"Population distribution: Rating\")\n", "plt.xlabel(\"Rating\")\n", "plt.ylabel(\"Frequency\")\n", "\n", "# ploting sampling\n", "plt.subplot(1, 2, 2)\n", "hist, bins = np.histogram(samp_mean_ls)\n", "width = 0.7 * (bins[1] - bins[0])\n", "center = (bins[:-1] + bins[1:]) / 2\n", "plt.bar(center, hist, align='center', width=width)\n", "plt.title(\"Sampling distribution of mean: Rating\")\n", "plt.xlabel(\"Sample mean\")\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 2 }