{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os.path\n", "import urllib.request\n", "import gzip\n", "import shutil\n", "\n", "if not os.path.exists('winequality-red.csv'):\n", " urllib.request.urlretrieve('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', 'winequality-red.csv')\n", "if not os.path.exists('winequality-white.csv'):\n", " urllib.request.urlretrieve('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', 'winequality-white.csv')\n", "if not os.path.exists('winequality.names'):\n", " urllib.request.urlretrieve('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality.names', 'winequality.names')\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from pyspark.sql import SparkSession\n", "from pyspark.sql.functions import col\n", "\n", "spark = SparkSession.builder.appName('wine-quality').getOrCreate()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "root\n", " |-- fixed acidity: double (nullable = true)\n", " |-- volatile acidity: double (nullable = true)\n", " |-- citric acid: double (nullable = true)\n", " |-- residual sugar: double (nullable = true)\n", " |-- chlorides: double (nullable = true)\n", " |-- free sulfur dioxide: double (nullable = true)\n", " |-- total sulfur dioxide: double (nullable = true)\n", " |-- density: double (nullable = true)\n", " |-- pH: double (nullable = true)\n", " |-- sulphates: double (nullable = true)\n", " |-- alcohol: double (nullable = true)\n", " |-- quality: integer (nullable = true)\n", "\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>fixed acidity</th>\n", " <th>volatile acidity</th>\n", " <th>citric acid</th>\n", " <th>residual sugar</th>\n", " <th>chlorides</th>\n", " <th>free sulfur dioxide</th>\n", " <th>total sulfur dioxide</th>\n", " <th>density</th>\n", " <th>pH</th>\n", " <th>sulphates</th>\n", " <th>alcohol</th>\n", " <th>quality</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>7.4</td>\n", " <td>0.700</td>\n", " <td>0.00</td>\n", " <td>1.9</td>\n", " <td>0.076</td>\n", " <td>11.0</td>\n", " <td>34.0</td>\n", " <td>0.9978</td>\n", " <td>3.51</td>\n", " <td>0.56</td>\n", " <td>9.4</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>7.8</td>\n", " <td>0.880</td>\n", " <td>0.00</td>\n", " <td>2.6</td>\n", " <td>0.098</td>\n", " <td>25.0</td>\n", " <td>67.0</td>\n", " <td>0.9968</td>\n", " <td>3.20</td>\n", " <td>0.68</td>\n", " <td>9.8</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>7.8</td>\n", " <td>0.760</td>\n", " <td>0.04</td>\n", " <td>2.3</td>\n", " <td>0.092</td>\n", " <td>15.0</td>\n", " <td>54.0</td>\n", " <td>0.9970</td>\n", " <td>3.26</td>\n", " <td>0.65</td>\n", " <td>9.8</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>11.2</td>\n", " <td>0.280</td>\n", " <td>0.56</td>\n", " <td>1.9</td>\n", " <td>0.075</td>\n", " <td>17.0</td>\n", " <td>60.0</td>\n", " <td>0.9980</td>\n", " <td>3.16</td>\n", " <td>0.58</td>\n", " <td>9.8</td>\n", " <td>6</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>7.4</td>\n", " <td>0.700</td>\n", " <td>0.00</td>\n", " <td>1.9</td>\n", " <td>0.076</td>\n", " <td>11.0</td>\n", " <td>34.0</td>\n", " <td>0.9978</td>\n", " <td>3.51</td>\n", " <td>0.56</td>\n", " <td>9.4</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>7.4</td>\n", " <td>0.660</td>\n", " <td>0.00</td>\n", " <td>1.8</td>\n", " <td>0.075</td>\n", " <td>13.0</td>\n", " <td>40.0</td>\n", " <td>0.9978</td>\n", " <td>3.51</td>\n", " <td>0.56</td>\n", " <td>9.4</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>7.9</td>\n", " <td>0.600</td>\n", " <td>0.06</td>\n", " <td>1.6</td>\n", " <td>0.069</td>\n", " <td>15.0</td>\n", " <td>59.0</td>\n", " <td>0.9964</td>\n", " <td>3.30</td>\n", " <td>0.46</td>\n", " <td>9.4</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>7.3</td>\n", " <td>0.650</td>\n", " <td>0.00</td>\n", " <td>1.2</td>\n", " <td>0.065</td>\n", " <td>15.0</td>\n", " <td>21.0</td>\n", " <td>0.9946</td>\n", " <td>3.39</td>\n", " <td>0.47</td>\n", " <td>10.0</td>\n", " <td>7</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>7.8</td>\n", " <td>0.580</td>\n", " <td>0.02</td>\n", " <td>2.0</td>\n", " <td>0.073</td>\n", " <td>9.0</td>\n", " <td>18.0</td>\n", " <td>0.9968</td>\n", " <td>3.36</td>\n", " <td>0.57</td>\n", " <td>9.5</td>\n", " <td>7</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>7.5</td>\n", " <td>0.500</td>\n", " <td>0.36</td>\n", " <td>6.1</td>\n", " <td>0.071</td>\n", " <td>17.0</td>\n", " <td>102.0</td>\n", " <td>0.9978</td>\n", " <td>3.35</td>\n", " <td>0.80</td>\n", " <td>10.5</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>6.7</td>\n", " <td>0.580</td>\n", " <td>0.08</td>\n", " <td>1.8</td>\n", " <td>0.097</td>\n", " <td>15.0</td>\n", " <td>65.0</td>\n", " <td>0.9959</td>\n", " <td>3.28</td>\n", " <td>0.54</td>\n", " <td>9.2</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>7.5</td>\n", " <td>0.500</td>\n", " <td>0.36</td>\n", " <td>6.1</td>\n", " <td>0.071</td>\n", " <td>17.0</td>\n", " <td>102.0</td>\n", " <td>0.9978</td>\n", " <td>3.35</td>\n", " <td>0.80</td>\n", " <td>10.5</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>5.6</td>\n", " <td>0.615</td>\n", " <td>0.00</td>\n", " <td>1.6</td>\n", " <td>0.089</td>\n", " <td>16.0</td>\n", " <td>59.0</td>\n", " <td>0.9943</td>\n", " <td>3.58</td>\n", " <td>0.52</td>\n", " <td>9.9</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>7.8</td>\n", " <td>0.610</td>\n", " <td>0.29</td>\n", " <td>1.6</td>\n", " <td>0.114</td>\n", " <td>9.0</td>\n", " <td>29.0</td>\n", " <td>0.9974</td>\n", " <td>3.26</td>\n", " <td>1.56</td>\n", " <td>9.1</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>8.9</td>\n", " <td>0.620</td>\n", " <td>0.18</td>\n", " <td>3.8</td>\n", " <td>0.176</td>\n", " <td>52.0</td>\n", " <td>145.0</td>\n", " <td>0.9986</td>\n", " <td>3.16</td>\n", " <td>0.88</td>\n", " <td>9.2</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>8.9</td>\n", " <td>0.620</td>\n", " <td>0.19</td>\n", " <td>3.9</td>\n", " <td>0.170</td>\n", " <td>51.0</td>\n", " <td>148.0</td>\n", " <td>0.9986</td>\n", " <td>3.17</td>\n", " <td>0.93</td>\n", " <td>9.2</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>8.5</td>\n", " <td>0.280</td>\n", " <td>0.56</td>\n", " <td>1.8</td>\n", " <td>0.092</td>\n", " <td>35.0</td>\n", " <td>103.0</td>\n", " <td>0.9969</td>\n", " <td>3.30</td>\n", " <td>0.75</td>\n", " <td>10.5</td>\n", " <td>7</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>8.1</td>\n", " <td>0.560</td>\n", " <td>0.28</td>\n", " <td>1.7</td>\n", " <td>0.368</td>\n", " <td>16.0</td>\n", " <td>56.0</td>\n", " <td>0.9968</td>\n", " <td>3.11</td>\n", " <td>1.28</td>\n", " <td>9.3</td>\n", " <td>5</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>7.4</td>\n", " <td>0.590</td>\n", " <td>0.08</td>\n", " <td>4.4</td>\n", " <td>0.086</td>\n", " <td>6.0</td>\n", " <td>29.0</td>\n", " <td>0.9974</td>\n", " <td>3.38</td>\n", " <td>0.50</td>\n", " <td>9.0</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>7.9</td>\n", " <td>0.320</td>\n", " <td>0.51</td>\n", " <td>1.8</td>\n", " <td>0.341</td>\n", " <td>17.0</td>\n", " <td>56.0</td>\n", " <td>0.9969</td>\n", " <td>3.04</td>\n", " <td>1.08</td>\n", " <td>9.2</td>\n", " <td>6</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", "0 7.4 0.700 0.00 1.9 0.076 \n", "1 7.8 0.880 0.00 2.6 0.098 \n", "2 7.8 0.760 0.04 2.3 0.092 \n", "3 11.2 0.280 0.56 1.9 0.075 \n", "4 7.4 0.700 0.00 1.9 0.076 \n", "5 7.4 0.660 0.00 1.8 0.075 \n", "6 7.9 0.600 0.06 1.6 0.069 \n", "7 7.3 0.650 0.00 1.2 0.065 \n", "8 7.8 0.580 0.02 2.0 0.073 \n", "9 7.5 0.500 0.36 6.1 0.071 \n", "10 6.7 0.580 0.08 1.8 0.097 \n", "11 7.5 0.500 0.36 6.1 0.071 \n", "12 5.6 0.615 0.00 1.6 0.089 \n", "13 7.8 0.610 0.29 1.6 0.114 \n", "14 8.9 0.620 0.18 3.8 0.176 \n", "15 8.9 0.620 0.19 3.9 0.170 \n", "16 8.5 0.280 0.56 1.8 0.092 \n", "17 8.1 0.560 0.28 1.7 0.368 \n", "18 7.4 0.590 0.08 4.4 0.086 \n", "19 7.9 0.320 0.51 1.8 0.341 \n", "\n", " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", "0 11.0 34.0 0.9978 3.51 0.56 \n", "1 25.0 67.0 0.9968 3.20 0.68 \n", "2 15.0 54.0 0.9970 3.26 0.65 \n", "3 17.0 60.0 0.9980 3.16 0.58 \n", "4 11.0 34.0 0.9978 3.51 0.56 \n", "5 13.0 40.0 0.9978 3.51 0.56 \n", "6 15.0 59.0 0.9964 3.30 0.46 \n", "7 15.0 21.0 0.9946 3.39 0.47 \n", "8 9.0 18.0 0.9968 3.36 0.57 \n", "9 17.0 102.0 0.9978 3.35 0.80 \n", "10 15.0 65.0 0.9959 3.28 0.54 \n", "11 17.0 102.0 0.9978 3.35 0.80 \n", "12 16.0 59.0 0.9943 3.58 0.52 \n", "13 9.0 29.0 0.9974 3.26 1.56 \n", "14 52.0 145.0 0.9986 3.16 0.88 \n", "15 51.0 148.0 0.9986 3.17 0.93 \n", "16 35.0 103.0 0.9969 3.30 0.75 \n", "17 16.0 56.0 0.9968 3.11 1.28 \n", "18 6.0 29.0 0.9974 3.38 0.50 \n", "19 17.0 56.0 0.9969 3.04 1.08 \n", "\n", " alcohol quality \n", "0 9.4 5 \n", "1 9.8 5 \n", "2 9.8 5 \n", "3 9.8 6 \n", "4 9.4 5 \n", "5 9.4 5 \n", "6 9.4 5 \n", "7 10.0 7 \n", "8 9.5 7 \n", "9 10.5 5 \n", "10 9.2 5 \n", "11 10.5 5 \n", "12 9.9 5 \n", "13 9.1 5 \n", "14 9.2 5 \n", "15 9.2 5 \n", "16 10.5 7 \n", "17 9.3 5 \n", "18 9.0 4 \n", "19 9.2 6 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "1599" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "red = spark.read.option(\"delimiter\", \";\").csv('./winequality-red.csv', header='true', inferSchema='true')\n", "# white = spark.read.option(\"delimiter\", \";\").csv('./winequality-white.csv', header='true', inferSchema='true')\n", "\n", "red.printSchema()\n", "display(red.limit(20).toPandas())\n", "red.count()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "338" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train, test = red.randomSplit([0.8, 0.2])\n", "train.count()\n", "test.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exploratory Data Analysis" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>summary</th>\n", " <th>quality</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>count</td>\n", " <td>1261</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>mean</td>\n", " <td>5.6193497224425055</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>stddev</td>\n", " <td>0.8014311934008284</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>min</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>max</td>\n", " <td>8</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " summary quality\n", "0 count 1261\n", "1 mean 5.6193497224425055\n", "2 stddev 0.8014311934008284\n", "3 min 3\n", "4 max 8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>quality</th>\n", " <th>count</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>3</td>\n", " <td>7</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>4</td>\n", " <td>42</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>5</td>\n", " <td>552</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>6</td>\n", " <td>499</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>7</td>\n", " <td>145</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>8</td>\n", " <td>16</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " quality count\n", "0 3 7\n", "1 4 42\n", "2 5 552\n", "3 6 499\n", "4 7 145\n", "5 8 16" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>skewness(quality)</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0.292623</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " skewness(quality)\n", "0 0.292623" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>kurtosis(quality)</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0.370935</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " kurtosis(quality)\n", "0 0.370935" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>25%</th>\n", " <th>50%</th>\n", " <th>75%</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>5.0</td>\n", " <td>6.0</td>\n", " <td>6.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " 25% 50% 75%\n", "0 5.0 6.0 6.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>quality_freqItems</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>[8, 5, 4, 7, 3, 6]</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " quality_freqItems\n", "0 [8, 5, 4, 7, 3, 6]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pyspark.sql.functions import kurtosis, skewness\n", "\n", "labelCol = 'quality'\n", "\n", "display(train.select(labelCol).describe().toPandas())\n", "display(train.groupby(labelCol).count().orderBy(labelCol).toPandas())\n", "display(train.agg(skewness(labelCol)).toPandas())\n", "display(train.agg(kurtosis(labelCol)).toPandas())\n", "\n", "# Last parameter is error tolerance\n", "quantile = train.approxQuantile(labelCol, [0.25, 0.50, 0.75], 0.05)\n", "quantileFrame = spark.createDataFrame([quantile], ['25%', '50%', '75%'])\n", "display(quantileFrame.toPandas())\n", "train.stat.freqItems([labelCol], 0.1).toPandas()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The distribution of class has small positive skewness and kurtosis. So we can conclude it follows normal distribution roughly. " ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<matplotlib.axes._subplots.AxesSubplot at 0x7f4334ceb860>" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAb8AAAG9CAYAAACF23h5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzs3Xm8XfO9//HXO5FIEEGRBiWqQUMJkpSaFRdV81hthfaXq6jS0nKpq/pTlP56L6puTDFd862mqqgQQ0yJSCJBjGm5pqpZTMn5/P5Y3yPbzt7n7Jy11zk7e7+feezHWeNnfffZJ+dzvsP6LkUEZmZmraRXTxfAzMysuzn5mZlZy3HyMzOzluPkZ2ZmLcfJz8zMWo6Tn5mZtRwnPzMzazlOfmZm1nKc/MzMrOU4+ZmZWctZoqcLYIvmk9efK2Q+ujEjjisiLOf/YmghcQEe+OlzhcQNVEjcu/r3LiQuwNJRzN+xR5/8+ULiDvjB1YXEBZi22kaFxJ34yXKFxB05f24hcQG++tL/5P5hzvs7p8+KXyzmP1ROTn5mZlZd2/yeLkEh3OxpZmYtxzU/MzOrLtp6ugSFcPIzM7Pq2pz8zMysxYRrfmZm1nKatObnAS9mZtZyXPMzM7PqmrTZc7Go+Uk6StITkq6StJuk4+sU9706xKhanvb4klaRdENaHi5pl7zXNTPrFm3z870a1OJS8zsc2Dkink/r43uyMKUiYjydlCciXgL2SavDgRHALQUXzcwsP9f8eoakC4AvAuMlHSNptKTz0r4/SvpuWv5XSVel5bUk3SrpEUn3Slo3bV9T0gOSJkv6ZQfXvCmdO0vSmJLtO0maKmm6pAlpW2l5KsaXNETSTEl9gVOB/SVNk7S/pKclrZSO6yXpGUkr1ve7aGbWRW1t+V4NquFrfhFxmKSdgG0j4nVJo0t2jwEmSXoe+Amwado+FjgsIp6W9FXgfGA74D+B30fE5ZKO6OCyh0bEG5L6A5Ml3Uj2h8KFwFYR8bykFSqc12H8iPhY0snAiIg4EiAl5oOA/wC2B6ZHxOu1fXfMzKwrGr7m15GIeBU4GbgL+ElKWMsAXwOulzQN+C9gcDplc6B9Rt0rOgh9lKTpwIPAF4ChZIn1nvam14h4o8J5tcYvdQnw3bR8KHBp+QGSxkiaImnKRZcXNyGwmVm5iLZcr0bV8DW/GnwF+CewSlrvBbwVEcOrHN/hDOWStiGrgW0WEXMlTQT6Aers3FriL3RwxAuSXpW0HfBVslpg+TFjyWqzhT3VwcysogZuusxjsa75SRoF7AxsBBwrac2IeAd4XtK+6RhJ2jCdMgk4IC0vlGSSgcCbKfGty4Km1AeArSWtmeJWavasJf67wICybRcBVwLXRUTjDo8ys9YTbfleNUjjKWanMQ8LjZ6XtIakCZJmSJooabW8b2uxTX6SliTrgzs0jab8CXCJJJElnu+lpstZwO7ptB8BR0iaTJbkKrkVWELSDOCXZE2fRMQ/yPoY/yfFvbbCubXEvwsY1j7gJW0bDyxDhSZPM7NmJqk38Duyisww4EBJw8oOOxu4PCI2IBs0eHre6y4WzZ4RMaRkeRwwLq1uWLK99JaD54GdKsR5HtisZNMZFY75iOxDqFSOvwB/Kdv2aXmqxY+IOcD6afkNYGRZ6A3JBro8Wem6ZmY9pvh79UYBz0TEcwCSriGrsDxecsww4Ji0fBdwU96LLrY1v2aRqvg3Aif0dFnMzBZSfLPnqsALJesvpm2lpgN7p+U9gQGSPpfnbTn59bCIOCMi1oiI+3q6LGZmC8l5n1/paPX0GlN2BVW4avnAvmPJxlw8CmwN/C8wL8/bWiyaPc3MrIfkvF2hdLR6FS+S3VLWbjXgpbIYLwF7AaTb2faOiLfzlMs1PzMz60mTgaFphqy+ZCPmPzNlpKQVJbXnqxPI7o/OxcnPzMyqK3h6s4iYBxwJ3AY8QXbL1yxJp0raLR22DTBb0lPAIOC0vG/LzZ5mZlZVd9x6HBG3UDbZf0ScXLJ8A3BDPa/p5GdmZtU18BRleTj5mZlZdU06vZmT32JmzIjjCok7dspZhcRdc+3dOj+oi37Vv9r0rfmsPC/XCOqq1v24uC72wfM/KSTuTT9/uZC4M4ds2PlBXXT6x0sWEndA72K+xzfEO4XEBZhYWOTFn5OfmZlV52ZPMzNrOcVPb9YjnPzMzKy6Jq35+T4/MzNrOa75mZlZdR7taWZmLadJmz2d/MzMrDrX/MzMrOU0afLzgBczM2s5DZ/8JA2RNLOGY75Vsj5C0jlpebSk8wos36mStq+wfRtJN6fl3dIT25G0h6RhRZXHzKyeIubnejWqZmn2HAJ8C/hvgIiYAkzpjguXzjzewTHjWfB8qj2Am4HHiyyXmVlduNmzPiSdKenwkvVTJP1EmbMkzZT0mKT9K5w7RNK9kqam19fSrjOALSVNk3RMaa2r7PyVJN0oaXJ6bb4I10DST1PZpks6I20bJ2mftLyTpCcl3Ud66nDaPlrSeSnWbsBZqaxrSZpactxQSY904dtqZlaMaMv3alA9UfO7BvgP4Py0vh+wE1myGA5sCKwITJZ0T9m5rwE7RMSHkoYCVwMjgOOBYyNiV8iaHKtc+z+B30bEfZJWJ3t44pdruYaknclqbV+NiLmSVig9SVI/4EJgO+AZ4Nryi0fE/ZLGAzen51Mh6W1JwyNiGnAIMK5K2c3MrE66PflFxKOSVpa0CrAS8GZE/F3SMcDVkTUSvyrpbmAkMKPk9D7AeZKGA/OBtRfx8tsDwyS1ry8raUBEvFvDNbYHLo2Iuel9vFEWe13g+Yh4GkDSlcCYGsp0EXCIpB8D+wOjyg+QNKY91mYrbMQ6A9asIayZWR00abNnT/X53QDsA3yerCYIoOqHf+oY4FWy2mEv4MNFvG4vYLOI+KAL1xAQncTvbH8lNwL/DtwJPBIR/1woaMRYYCzAIUP27so1zMy6poGbLvPoqdGe1wAHkCXA9kfT3wPsL6m3pJWArYCHy84bCLwcEW3Ad4Deafu7wIAarns7cGT7Sqrdlat2jduBQyUtlc5doey8J4E1Ja2V1g+sUobPlDUiPiRrfv09cGkN78HMrPu0teV7NageSX4RMYssAfxvRLQ/LfMPZE2c08lqQT+NiFfKTj0fOFjSg2TNke+n7TOAeWkgyjEdXPoosv67GZIeBw6rcEzFa0TErWQjNqdImgYcW/aePiRrmvxzGvDytypluAY4TtKjJYnyKrJa4+0dlN3MrPs16YAXRbgVradJOhYYGBE/7+zYopo9/ST3BYp6kvvrvYvrZSjqSe6vFVTmjZZ5s5C4AKd/3L+QuAMK6iWaOW+hno66mfjiHbV0J3Xog9vOy/U7p/+/HJm7DEVolvv8FluS/gCsRTZK1MyssTRw02UeTn49LCL27OkymJlV5eRnZmYtp4H77fJw8jMzs+qatObX8BNbm5mZ1ZtrfmZmVp2bPc3MrOU0abOnk5+ZmVXnmp81gvN/MbSQuEXdjP78U+M7P6iLHlz/p4XEHdDv40LizoplC4kL0CeK+a+8w4YvFhJ31OR3CokLcP+wgYXEfXL2SoXEPWal3p0fZHXn5GdmZtW52dPMzFqOk5+ZmbWcJp3/2cnPzMyqa9Kan29yNzOzluOan5mZVdekNT8nPzMzq873+ZmZWctxzc/MzFpOk4729IAXMzNrOS2V/CQdJum7aXm0pFU6OPZUSdsXXY6y7UMkzSzimmZmXdLWlu/VoFqq2TMiLihZHQ3MBF4qP05S74g4uZvKYWbWuBo4geXRtDU/Sd+VNEPSdElXpG2nSDpW0j7ACOAqSdMk9Zc0R9LJku4D9pU0Lh2HpJGS7k+xHpY0oOxay0iaIGmqpMck7V5LOdLyJmnfA8AR3fPdMTOrUbTlezWopkx+ktYDTgS2i4gNgR+V7o+IG4ApwEERMTwiPki7PoyILSLimpJYfYFrgR+lWNsDH/BZHwJ7RsTGwLbAb5TpsBzJpcBREbFZB+9njKQpkqZcPHFazd8HMzOrrFmbPbcDboiI1wEi4o0az7u2wrZ1gJcjYnKKVelZLAJ+JWkroA1YFRjUWTkkDQSWi4i706YrgJ3Lg0fEWGAswAeXHd+cQ6/MrCFFW3P+ymnW5CegK5/Y+12MdRCwErBJRHwiaQ7Qr4Zzu1pOM7Pu4T6/xcoEYD9JnwOQtEKFY94FBlTYXu5JYBVJI1OsAZLK/2gYCLyWEt+2wBq1lCMi3gLelrRF2nRQDeUxM+s+Tdrn15Q1v4iYJek04G5J84FHyUZ3lhoHXCDpA6Bqf1tEfCxpf+BcSf3J+vu2B94rOewq4E+SpgDTyBJmreU4BLhE0lzgti68XTOz4rjZc/ESEZcBl5VtO6Vk+UbgxpLdQ8qOHV2yPBnYtINrvU6VBFpDOR4BNizZfQpmZlaopk1+ZmZWB+7zMzOzltMNM7xI2knSbEnPSDq+yjH7SXpc0ixJ/533bbnmZ2Zm1RU8sbWk3sDvgB2AF4HJksZHxOMlxwwFTgA2j4g3Ja2c97pOfmZmVl3xzZ6jgGci4jkASdcAuwOPlxzzf4DfRcSbABHxWt6LutnTzMx60qrACyXrL6ZtpdYG1pY0SdKDknbKe1HX/MzMrLqctzpIGgOMKdk0Ns1a9ekhFU4rv+gSwFBgG2A14F5J66d7pbvEyc/MzKrLeaN66fSMVbwIfKFkfTUWftrOi8CDEfEJ8Lyk2WTJcHJXy+VmTzMzq64t8r06NxkYKmnN9CCBA4DxZcfcRPbQACStSNYM+lyet+Wa32LmgZ/m+ryr+lX/4YXEfXD9nxYSF2DTmb8uJO6t659YSNwd+r1dSFyAXv0Xr1k4Dl9qvcJi3/5sMXH79C7me/ziGysVEhdgrcIi109EzJN0JNkMV72BS9LsWKcCUyJifNq3o6THgfnAcRHxzzzXdfIzM7Oqohtuco+IW4BbyradXLIcwI/Tqy6c/MzMrDrP7WlmZi2ngZ/MkIeTn5mZVdekNT+P9jQzs5bjmp+ZmVXXpE91cPIzM7PqmrTZ08nPzMyq84AXMzNrOU1a81tsB7xIOkzSdytsHyJpZo64EyWNyFc6MzNrZA1R85MkQBG1168j4oICi9SjJC0REfN6uhxmZt0xw0tP6LGaX6qhPSHpfGAq8AVJO0p6QNJUSddLWiYde0Z6fP0MSWenbadIOjYtbyJpuqQHgCNKrjFa0nkl6zdL2iYt/17SFEmzJP2ihvJWKsM4SfuUHPNe+tpL0vkp9s2Sbmk/TtLJkiZLmilpbEr87TXOX0m6G/hRrm+umVm9FD+xdY/o6WbPdYDLI2Ij4H3gJGD7iNgYmAL8WNIKwJ7AehGxAfB/K8S5FDgqIjZbhGufGBEjgA2ArSVtUO3AGstQai9gCPAV4PtAabnOi4iREbE+0B/YtWTfchGxdUT8ZhHeh5lZcZz8CvG3iHgwLW8KDAMmSZoGHAysAbwDfAhcJGkvYG5pAEkDyZLG3WnTFTVeez9JU4FHgfXStavpsAwVbAFcHxFtEfEKcFfJvm0lPSTpMWC7dO1211YKJmlMqqVOufmDYp7qYGbWSnq6z+/9kmUBf42IA8sPkjQK+DrZc56OJEsapedV+/NiHp9N8P1SvDWBY4GREfGmpHHt+ypJj9yoVIZP46fmy74lZVqIpH7A+cCIiHhB0ill132/0nmlD4O8c9B+jfunlJk1nya91aGna36lHgQ2l/QlAElLSVo79fsNTI+8OBr4zIPn0mPs35a0Rdp0UMnuOcDw1Af3BWBU2r4sWaJ5W9IgYOeOCtZBGeYAm6Tl3YE+afk+YO903UHANml7e6J7PcX8tL/QzKwhNWmzZ0/X/D4VEf+QNBq4WtKSafNJwLvAH1OtScAxFU4/BLhE0lyyhx62mwQ8DzwGzCQbWENETJf0KDCL7GnAkzop3oAqZbgwbX8YmMCCmtuNZLXEmcBTwEPA2xHxlqQLU3nmkD3B2MysYUUDJ7A8eiz5RcQcYP2ybXcCIyscPqp8Q0ScUrL8CLBhye5T0vbgszXB0vNHV9m+TYVtL1cpw6tkfZXtTkjb2yQdGxHvSfoc8DBZwiMiTiJL6p1e18ysxzn52SK6WdJyZP2Av0wDX8zMrAE4+RXENTkzawpNepO7k5+ZmVXnZk8zM2s5Tn5mZtZqsnGDzaeR7vMzMzPrFq75mZlZdW72NDOzluPkZ40gKk8bmtvK84p5fOCAfh8XEhfg1vVPLCTuTjNPKyTutRucXEhcgAEFDUePJ4v5eZvV64NC4gKM/vLrhcT9+exBhcQdFH06P6gHNesML+7zMzOzluOan5mZVdekNT8nPzMzq645J3hx8jMzs+qatc/Pyc/MzKpr0uTnAS9mZtZyXPMzM7Pq3OdnZmatxn1+ZmbWepq05teSfX6Sxknap8L2IZJmLmKsVSTdUGXfREkjulpOMzMrhmt+OUhaIiJeAhZKpGZmzaBZmz1bouYn6buSZkiaLumKtHkrSfdLeq5KLbCfpEslPSbpUUnbpu2jJV0v6U/A7aW1RUn9JV2TrnUt0L8k3o6SHpA0NZ2/TNp+hqTH0zlnF/7NMDNbFG05Xw2q6Wt+ktYDTgQ2j4jXJa0A/D9gMLAFsC4wHihvujwCICK+ImldskS3dtq3GbBBRLwhaUjJOT8A5kbEBpI2AKamMqwInARsHxHvS/oZ8GNJ5wF7AutGREhart7v38wsj2jgBJZHK9T8tgNuiIjXASLijbT9pohoi4jHgUrTtW8BXJHOeRL4G9Ce/P5aEqfUVsCV6ZwZwIy0fVNgGDBJ0jTgYGAN4B3gQ+AiSXsBcyu9AUljJE2RNOXmD56t/Z2bmeXlmt9iS0ClRuuPyo6pdF4173ewr9K1RJYwD1xohzQK+DpwAHAkWbL+bMCIscBYgAmD9m/OBngzs27UCjW/CcB+kj4HkJo9a3EPcFA6Z21gdWD2IpyzPrBB2v4gsLmkL6V9S0laO/X7DYyIW4CjgeE1vyszs24Qbflejarpa34RMUvSacDdkuYDj9Z46vnABZIeA+YBoyPiI6nDh3v+HrhU0gxgGvBwKsM/JI0Grpa0ZDr2JOBd4I+S+pHVDo9ZtHdnZlawBk5geTR98gOIiMuAyzrYv0z6OgdYPy1/CIyucOw4YFzJeuk5H5A1X1a6xp3AyAq7RtXwFszMekQj197yaInkZ2ZmXdOsya8V+vzMzMw+wzU/MzOrqllrfk5+ZmZWXXQ4yG+x5eRnZmZVNWvNz31+ZmbWoyTtJGm2pGckHV9h/2FpnuVpku6TNCzvNZ38zMysqmhTrldnJPUGfgfsTDYN5IEVktt/R8RXImI48Guy+ZlzcbOnmZlV1Q3NnqOAZyLiOQBJ1wC7A49/WoaId0qOX5rK00guEie/xcxd/XsXEnfdj4tpBJgVyxYSF2CHfm8XEvfaDU4uJO7+M04tJC7AvD9dUEjc6096qZC4+86bV0hcgEnTVy0k7pf6FvN/pG+Dz9YbxQ94WRV4oWT9ReCr5QdJOgL4MdCXCnMgLyo3e5qZWVV55/YsfSpNeo0pu0Sl7LrQnwQR8buIWAv4Gdn0kLm45mdmZoUpfSpNFS8CXyhZXw3oqMnhGrJ5lHNxzc/MzKoqesALMBkYKmlNSX3J5kceX3qApKElq98Ans77vlzzMzOzqqLgPsmImCfpSOA2oDdwSXoaz6nAlIgYDxwpaXvgE+BNsgeC5+LkZ2ZmVdVYe8t3jeyZpreUbTu5ZPlH9b6mk5+ZmVXVHcmvJ7jPz8zMWo5rfmZmVlXRfX49xcnPzMyqcrMnIOkoSU9IuqqoAtVYjm0k3ZyWl5R0R5rwdP86xR8naZ+0fFFXJ1GVdH9n8c3MGlmEcr0a1aLW/A4Hdo6I50s3SloiIoqbr6hjGwF90oSnNVmU8kbE97tasIj4WlfPNTOz4tRc85N0AfBFYLykYySdImmspNuByyX1lnSWpMmSZkj615JzjyvZ/osKsXun2tDM9NiKY9L2iZJGpOUVJc0pO29l4EpgeKr5rSVpjqQV0/4Rkiam5c+UtyyOJJ0n6XFJfwZWLtlXWoYDU/lmSjozbVtD0tOpfL0k3Stpx7TvvRribyLpbkmPSLpN0uBaPxMzs6Llnd6sUdVc84uIwyTtBGwbEa9LOgXYBNgiIj5I87W9HREjJS0JTEqJZmh6jSKbw228pK0i4p6S8MOBVSNifQBJy9VYptckfR84NiJ2Ted2dMqn5S3bviewDvAVYBDZbOKXlB4gaRXgzBTjTeB2SXtExE0pEV4APAQ8HhG31xJfUh/gXGD3iPhHarY9DTi0lvdvZla0tgZuuswj74CX8SWJZEdgg5K+rIFkSW/H9Ho0bV8mbS9Nfs8BX5R0LvBnoDx51Mv4CokPYCvg6oiYD7wk6c4Kx4wEJkbEPwBSv+dWwE0RcZGkfYHDyBJ5rfHXAdYH/pqSdm/g5fKT0x8WYwB2XmEkGw/4Us1v2Mwsj0but8sjb/J7v2RZwA8j4rbSAyT9C3B6RPxXtSAR8aakDYF/AY4A9iOr/cxjQdNsvxrL1NE571NdZwN6q/4ESFqKbDJWyJL7uzXGFzArIjbr6MKlE8OeNORbTTrw2MwakUd7du424AepKQ9Ja0taOm0/VNIyafuqqa/uU6mPrldE3Aj8HNg47ZpD1swIUOvoyNJz9q7xnHuAA1Lf42Bg2wrHPARsnfr2egMHAnenfWcCVwEnAxcuQvzZwEqSNgOQ1EfSejWW2czMuqie9/ldBAwBpiprw/sHsEdE3C7py8ADqWnvPeDbwGsl564KXCqpPRmfkL6eDVwn6TtApabISn4BXCzp38gSVi3+QPZwxMeAp1iQ1D4VES9LOgG4i6zGdktE/FHS1mRNoptHxHxJe0s6JCIu7Sx+RHycmonPkTSQ7PP4D2BWjeU2MytUs97krmjWd9akimr2LOpJ7q8WOI3CDr2KeZL7jI8HFhLXT3JfYMUCn+T+iYr5WX5mMXyS+w9fuDJ3m+Xja30jVwmHPfvnhmw39QwvZmZWlUd7mplZy2nW0Z5+qoOZmbUc1/zMzKyqZh0W4uRnZmZVuc/PzMxajvv8zMzMmoRrfmZmVpX7/KwhLB3FVNYHz/+kkLh9orgfsV79i/lfOaCtmOewFHUjOsAS3zyskLiDjz+h84O64Om+fQqJC7DSvGJ+Lp7t9XEhcZeoPm1wQ3Cfn5mZtZxm7fNz8jMzs6qatebnAS9mZtZyXPMzM7OqmnS8i5OfmZlV16zNnk5+ZmZWlQe8mJlZyynmxp+e5wEvZmbWclzzMzOzqqLBb8Lvqg5rfpKWk3R4Z0EkDZH0rRqPm7koBawS5xRJx6bldSVNk/SopLXyxk4x50haMS3f38UYIySd01l8M7NG1hb5Xo2qs2bP5YBOkx8wBOg0+RVkD+CPEbFRRDxbywmSaq7xRsTXulKoiJgSEUd15Vwzs0bRhnK9GlVnye8MYK1UszpLmbMkzZT0mKT9S47bMh13TKrh3Stpanp1mEAkDZZ0Tzp/pqQt0/b3So7ZR9K4svN2AY4Gvi/prvKapaRjJZ2SlidK+pWku4EflcX5nKTbU+3xv2DBJ9ZehmrvXdKeku5I+wdLekrS5yVtI+nmGuJ/W9LD6b3/l6TenXwmZmaWU2fJ73jg2YgYHhHHAXsBw4ENge2BsyQNTsfdm477LfAasENEbAzsD1Rs/ivxLeC2iGiPPa2WwkfELcAFwG8jYtsaTlkuIraOiN+Ubf934L6I2AgYD6xe4dyK7z0i/gC8AhwBXAj8e0S8Ukt8SV8m+/5snt77fOCgGt6HmVm3CJTr1agWdcDLFsDVETEfeDXVokYC75Qd1wc4T1L7L/S1O4k7GbhEUh/gpoioKfl1wbVVtm9FltyIiD9LerPCMdXe+3jgh8BM4MGIuHoR4n8d2ASYLAmgP9kfDp8haQwwBmCPFUYxapmhNbxVM7P8fKtDptY0fgzwKlktaQTQt6ODI+IesgTxv8AVkr7bvqvksH41XHcen31P5ee831ExOond0XtflexnZJCkat/TSvEFXJZqzMMjYp2IOGWhEyPGRsSIiBjhxGdm3alZa36dJb93gQEl6/cA+0vqLWklsoT1cIXjBgIvR0Qb8B2gw34sSWsAr0XEhcDFwMZp16uSvpwSyp41vJ9XgZVTH9uSwK41nNP+vg5KZdkZWL7KMQu99zR45lKyptsngB8vQvwJwD6SVk77VkjfCzOzhtCW89WoOmz2jIh/SpqUBpH8BfgpsBkwnawm89OIeEXSP4F5kqYD44DzgRsl7QvcRcc1LoBtgOMkfQK8B7TX/I4HbgZeIGtWXKaT8n4i6VTgIeB54MlOrtvuF8DVkqYCdwN/r3DMH6j83k8m6++8V9I0sibMP9cSPyIel3QScHtK8J+Q9R3+rcZym5lZFyia9Rn1Ter0Nb5dyAc26sNinuT+eq/i5lFYr//bhcR9du6yhcTd8fTVCokLxT3JfeJ6fpJ7u7uWLOb/SJFPcj9nzrW5g98y6IBc39BdXr2mIds+PcOLmZlV1cj9dnk4+ZmZWVVtzZn7nPzMzKy6Rp6lJQ8/1cHMzFqOa35mZlZVsw6JdPIzM7OqGvlevTyc/MzMrKo2uc/PzMysKbjmt5g5+uTPFxL3pp+/XEjcHTZ8sZC4RYoni/lL9/qTXiokLsDg44u5GX2bWacXEve8jYt71OW/qcOphLvszrkvFBJ3z6Uae75e9/mZmVnLadY+Pzd7mplZVW3K96qFpJ0kzZb0jKTjK+xfUtK1af9DkobkfV9OfmZmVlUbyvXqjKTewO+AnYFhwIGShpUd9j3gzYj4EvBb4My878vJz8zMetIo4JmIeC4iPgauAXYvO2Z34LK0fAPwdSnfMFQnPzMzqypyvmqwKtlj69q9mLZVPCYi5gFvA59b9HezgAe8mJlZVXkntpY0BhhTsmlsRIwtPaTCaeV5s5ZjFomTn5mZVZV3tGdKdGM7OORF4Asl66sB5fcFtR/zoqQlgIHAG3nK5WZPMzOrqhuaPScDQyWtKakvcAAwvuyY8cDBaXkf4M7I+SR21/zMzKzHRMQ8SUcCtwG9gUsiYpakU4GUQLsLAAAgAElEQVQpETEeuBi4QtIzZDW+A/Je18nPzMyq6o6H2UbELcAtZdtOLln+ENi3ntd0s2cFkk6RdGwd490iabn0Orxecc3MitaW89WonPy6QUTsEhFvAcsBTn5mtthw8mtykk5M0+vcAayTtq0l6VZJj0i6V9K6afs4SedIul/Sc5L2SdsHS7pH0jRJMyVtmbbPkbQicAawVtp/lqQrJO1eUoarJO3W7W/ezKzFOPkBkjYh60DdCNgLGJl2jQV+GBGbAMcC55ecNhjYAtiVLKkBfAu4LSKGAxsC08oudTzwbEQMj4jjgIuAQ1IZBgJfo6zdO+0bI2mKpCkX3z0j79s1M6tZKN+rUXnAS2ZL4A8RMRdA0nigH1kyur5kFp0lS865KSLagMclDUrbJgOXSOqT9pcnv8+IiLsl/U7SymRJ98Y0e0H5cZ/eJ/PBxcc26xNGzKwBNXLTZR5OfguUJ5VewFupFlfJRyXLAoiIeyRtBXyDbFjuWRFxeSfXvQI4iKzmeeiiF9vMrDjNmvzc7Jm5B9hTUn9JA4BvAnOB5yXtC6DMhh0FkbQG8FpEXEh2X8rGZYe8Cwwo2zYOOBogImblfSNmZvXUDTe59wgnPyAipgLXkvXR3Qjcm3YdBHxP0nRgFgvPNF5uG2CapEeBvYH/LLvOP4FJaTDMWWnbq8ATwKX1eTdmZtYZN3smEXEacFqFXTtVOHZ02foy6etlLHjsRun+ISXL3yrdJ2kpYChwdReKbWZWqO64yb0nuObXgyRtDzwJnBsRb/d0eczMyjXrfX6u+fWgiLgDWL2ny2FmVk0jJ7A8XPMzM7OW45qfmZlV1cgjNvNw8jMzs6qadcCLk5+ZmVXVrH1+Tn5mZlaVmz2tIQz4QTG3A84c0uHkNV32+rNLs8sbLxcS+/Cl1isk7qxeHxQSlz4fsO8HfQoJ/XTfYuKet/FRhcS9Yeo5hcQFOHHEiYXEva7v5wuJe2t43GFPcPKzQhWV+BZHRSU+syK1NWndz8nPzMyqcp+fmZm1nOas9zn5mZlZB5q15ueeVjMzazmu+ZmZWVW+yd3MzFqOR3uamVnLac7U5z4/MzNrQa75mZlZVR7taXUlaaKkESXrQyTN7MkymZmVayNyvRqVa35mZlZV46avfJz8CiZpCHAr8BCwEfAU8N0eLJKZWc2atdnTya97rAN8LyImSboEODxtv0pS+yME+tK8P2dmZg3FfX7d44WImJSWrwS2SMsHRcTwiBgO7FLtZEljJE2RNKWt7f2iy2pm9qlm7fNz8use5T8Bi/QTERFjI2JERIzo1WvpOhbLzKxjkfPVqJz8usfqkjZLywcC9/VkYczMatWW89WonPy6xxPAwZJmACsAv+/h8piZ1SRy/mtUHvDSPdoi4rCybduUrkTEHGD97iqQmVkrc/IzM7OqGrnpMg8nv4K5Rmdmi7NGHrGZh5OfmZlV1ZypzwNezMysBbnmZ2ZmVbnZ08zMWo4HvJiZWctp5Hv18nDyMzOzqlzzs4YwbbWNCol7+sdLFhL3/mEDC4kLcPuzxcQd/eXXC4k7afqqhcQFWGleMX+d/5v6FhL3xBEnFhIX4LQppxUSd+J6JxQSd7/BrxYS1zrm5GdmZlW52dPMzFqOmz3NzKzltIVrfmZm1mKaM/V5hhczM2tQklaQ9FdJT6evy1c4Zg1Jj0iaJmmWpPIn6FTk5GdmZlW1EbleOR0PTIiIocCEtF7uZeBrETEc+CpwvKRVOgvs5GdmZlX18MNsdwcuS8uXAXssVL6IjyPio7S6JDXmNSc/MzOrqi3nK6dBEfEyQPq6cqWDJH1B0gzgBeDMiHips8Ae8GJmZoWRNAYYU7JpbESMLdl/B/D5CqfWPBNCRLwAbJCaO2+SdENEdDh7QMsnP0mjgRERcWSeYyqcczTZhzw3dyHNzHpI3n67lOjGdrB/+2r7JL0qaXBEvCxpMPBaJ9d6SdIsYEvgho6OdbNncY4GlurpQpiZ5dHDfX7jgYPT8sHAH8sPkLSapP5peXlgc2B2Z4GbMvlJWlrSnyVNlzRT0v6S5khaMe0fIWlihfPGSbpA0r2SnpK0a8nuVSTdmobc/rrknN9LmpKG2P4ibTsKWAW4S9JdaduOkh6QNFXS9ZKWSdvPkPS4pBmSzi7uu2Jmtuh6uM/vDGAHSU8DO6T19t/hF6Vjvgw8JGk6cDdwdkQ81lngZm323Al4KSK+ASBpIHBmjecOAbYG1iJLXl9K24cDGwEfAbMlnZvamU+MiDck9QYmSNogIs6R9GNg24h4PSXdk4DtI+J9ST8DfizpPGBPYN2ICEnL1eXdm5nVSfTgDC8R8U/g6xW2TwG+n5b/CmywqLGbsuYHPAZsL+lMSVtGxNuLcO51EdEWEU8DzwHrpu0TIuLtiPgQeBxYI23fT9JU4FFgPWBYhZibpu2TJE0jq76vAbwDfAhcJGkvoGL/oKQxqXY55fp3/r4Ib8XMzCppyppfRDwlaRNgF+B0SbcD81iQ7Pt1dHqV9Y9Kts0HlpC0JnAsMDIi3pQ0rkpsAX+NiAMX2iGNIvvL5gDgSGC7Cu/n0w7jmV/ctVlnGzKzBlSHG9UbUlPW/NJw17kRcSVwNrAxMAfYJB2ydwen7yupl6S1gC/SccfpssD7wNuSBgE7l+x7FxiQlh8ENm9vQpW0lKS1U7/fwIi4hWyAzPBFeJtmZoXr4T6/wjRlzQ/4CnCWpDbgE+AHQH/gYkn/BjzUwbmzyTpNBwGHRcSHkioeGBHTJT0KzCJrIp1Usnss8BdJL0fEtul2iasltT819iSyBPlHSf3IaofHdOndmpkVxM/zW4xExG3AbRV2rV3h2HHAuJJNkyLimI6OiYhdS5ZHVynDucC5Jet3AiMrHDqq0vlmZo3AzZ5mZmZNoilrfl1VrRZnZtaqevJWhyI5+ZmZWVWNPGglDyc/MzOrqlkHvLjPz8zMWo5rfmZmVlWzjvZ08jMzs6o84MXMzFqOa37WECZ+UsyDHwb0/qSQuE/OXqmQuAB9ehfzn/LnswcVEvdLfYvrYn+218eFxL1z7guFxL2ub6UHd9fHxPVOKCTuNrNOLyTuByf+oJC49eIBL2ZmZk3CNT8zM6uqzX1+ZmbWapoz9Tn5mZlZBzzgxczMWk6zJj8PeDEzs5bjmp+ZmVXlm9zNzKzlNGuzp5OfmZlV5ZvcW5SkOZJW7MJ54yTtswjHD5E0c1GvY2Zmi841PzMzq6pZ+/xc8ysh6SZJj0iaJWlMhf3flTRD0nRJV6Rta0iakLZPkLR6ySlbSbpf0nPttUBlzpI0U9JjkvbvprdnZrbI2ohcr0blmt9nHRoRb0jqD0yWdGP7DknrAScCm0fE65JWSLvOAy6PiMskHQqcA+yR9g0GtgDWBcYDNwB7AcOBDYEV03Xu6Yb3Zma2yFzzaw1HSZoOPAh8ARhasm874IaIeB0gIt5I2zcD/jstX0GW7NrdFBFtEfE40P6ogC2AqyNifkS8CtwNjOyoUJLGSJoiacqk957O8fbMzBZNs9b8nPwSSdsA2wObRcSGwKNAv9JDqG2au9JjPio7v/RrzSJibESMiIgRmy8ztPMTzMysQ05+CwwE3oyIuZLWBTYt2z8B2E/S5wBKmj3vBw5IywcB93VynXuA/SX1lrQSsBXwcD3egJlZvUXOf43KfX4L3AocJmkGMJus6fNTETFL0mnA3ZLmk9UMRwNHAZdIOg74B3BIJ9f5A1lT6XSyWuJPI+IVSUPq91bMzOrDjzRqchHxEbBzhV1DSo65DLis7Lw5ZP2B5fFGl60vk74GcFx6lcdZvwtFNzMrTCPX3vJw8jMzs6qatebnPj8zM2s5rvmZmVlVbvY0M7OW06zNnk5+ZmZWVbPW/NznZ2ZmLcc1PzMzq8rNnmZm1nKatdnTyW8xM3L+3ELi3hDvFBL3mJV6FxIX4MU3Viok7qDoU0jcvgX+Dlli0aeMrcmeSxUzl+ytUVyPy36DXy0k7gcn/qCQuP1P+30hcesloq2ni1AIJz8zM6uqkZ/MkIcHvJiZWctxzc/MzKpq1ofZOvmZmVlVzdrs6eRnZmZVNWvNz31+ZmbWclzzMzOzqnyTu5mZtRzf5G5mZi3HfX4NTtIQSTPT8ghJ56TlbSR9rYbzx0l6XtJ0SU9JulzSqjnKM07SPhW2byPp5q7GNTPrTm1ErlceklaQ9FdJT6evy1c5bnVJt0t6QtLjkoZ0Frtpkl+piJgSEUel1W2ATpNfclxEbAisAzwK3CWpbwFFNDOzzh0PTIiIocCEtF7J5cBZEfFlYBTwWmeBGyL5STpR0mxJd0i6WtKxkiZKGpH2ryhpTloeIuleSVPTa6HE1l67Stn/MOAYSdMkbZlqd33ScctKmtO+3i4yvwVeAXZOxx4o6TFJMyWdWXKt90qW95E0riTU9qmsT0natUI5l5Z0iaTJkh6VtHsXv4VmZoWIiFyvnHYHLkvLlwF7lB8gaRiwRET8NZX3vYjodBLkHu/zk7QJcACwEVl5pgKPdHDKa8AOEfGhpKHA1cCISgdGxBxJFwDvRcTZ6XoTgW8AN6Xr3hgRn0gVJwaeCqwraTJwJrAJ8CZwu6Q9IuKmTt7eEGBrYC2yWuSXyvafCNwZEYdKWg54WNIdEfF+J3HNzLpFD4/2HBQRLwNExMuSVq5wzNrAW5L+B1gTuAM4PiLmdxS4EWp+WwJ/iIi5EfEOML6T4/sAF0p6DLgeGLaI17sIOCQtHwJc2sGx7RlxJDAxIv4REfOAq4CtarjWdRHRFhFPA88B65bt3xE4XtI0YCLQD1h9oUJIYyRNkTTlprnP13BZM7P6yFvzK/39lV5jSuOnFr+ZFV61toQtQZZHjiX7Xf1FYHQtJzWCSn9azGNBcu5Xsv0Y4FVgw7T/w0W6UMSk1HS6NdA7ImZ2cPhGZO3MHf2RUFr2fh3sq7QuYO+ImN1JmccCYwEeWmWv5hx6ZWYNKe+gldLfX1X2b19tn6RXJQ1Otb7BVO7LexF4NCKeS+fcBGwKXNxRuRqh5ncPsKek/pIGAN9M2+eQNTMClI6aHAi8HNlDpr4DdPbAuHeBAWXbLidrLq1Y61PmKGAwcCvwELB16nvsDRwI3J0Of1XSlyX1AvYsC7WvpF6S1iL7a6Q8yd0G/FCpzVXSRp28FzOzVjIeODgtHwz8scIxk4HlJbU/4HM74PHOAvd48ouIqcC1wDTgRuDetOts4AeS7gdWLDnlfOBgSQ+StfV21j/2J7LkOk3SlmnbVcDyZAmw1FmSpgNPkVWft42Ij1Ob8wnAXcB0YGpEtH8IxwM3A3cCL5fFm02WJP8CHBYR5bXUX5I1485It2n8spP3YmbWrXp4wMsZwA6SngZ2SOvtt7NdlMo3n6zJc0LqDhNwYWeB1Wg3MEo6hZIBKgVdYx9g94j4TlHXKEpRzZ4/61XMk9wvXq64J7k/UNCT3J/pU8z/ic+1FfO0dYCne39SSNylO21Y6Zrli3yS+/LFPMl9ha8WU+Yin+TeZ8Uv5v6hW2apNXP9h3hv7vPF/eDn0Ch9ft1G0rlkty/s0tNlMTNrdJ7erJtExCkFx/9hkfHNzKzxNVzyMzOzxuGnOpiZWctptHEh9eLkZ2ZmVbnPz8zMWk6z1vx6/D4/MzOz7uaan5mZVdWsNT8nPzMzq6o5U18DzvBi9SNpTJpUdrGJvbjFLTL24ha3yNiLW9wiYxdZ5lbiPr/mNqbzQxou9uIWt8jYi1vcImMvbnGLjF1kmVuGk5+ZmbUcJz8zM2s5Tn7Nrch+gaJiL25xi4y9uMUtMvbiFrfI2O7vqwMPeDEzs5bjmp+ZmbUcJ78mImmFni5DI5DUW9IxPV0OM2tcTn7N5SFJ10vaRVJdn54saYqkIyQtX8+4RYiI+cDuPV2ORSVpkKSLJf0lrQ+T9L06xJWkb0s6Oa2vLmlU3rhmizMnv+ayNlln+HeAZyT9StLadYp9ALAKMFnSNZL+JU+ClfSYpBnVXnUo7yRJ50naUtLG7a86xEXSryUtK6mPpAmSXpf07TqEHgfcRvZ9BngKOLoOcc8HNgMOTOvvAr+rQ1wk7Sppsfo9UuDnVyhJ/SWt09PlaBYe8NKkJG0LXAksDUwHjo+IB+oQtxewK/B7oA24BPjPiHhjEeOskRaPSF+vSF8PAuZGxKk5y3lXhc0REdvliZtiT4uI4ZL2BPYAjgHuiogNc8adHBEjJT0aERuVXitn3KkRsXFZ3Ol5y5viXEmWWG8ELo2IJ+oQ8zEqz6olss9wg5zx6/r5FV3edI1vAmcDfSNiTUnDgVMjYre8sVuV5/ZsIpI+B3ybrOb3KvBDYDwwHLgeWDNn/A2AQ4BdyH7ZXQVsAdyZrlGziPhbirl5RGxesut4SZOAXMkvIrbNc34n+qSvuwBXR8QbdWplfj99hgEgaVPg7TrE/URS75K4K5H94ZJbRHxb0rJktcpLJQVwKdn35d0uht01fRXwZ7Lvcz3V+/PbtfNDcjsFGAVMBIiIaZKGdMN1m5aTX3N5gKwGtUdEvFiyfYqkC/IElvQI8BZwMVkt8qO06yFJm1c/s1NLS9oiIu5L1/kaWW01N0nfANYD+rVvy1ujTP4k6UngA+DwlEw+rEPcH5P9sbJW+gNgJWCfOsQ9B/gDsLKk01LMk+oQF4CIeEfSjUB/smbaPYHjJJ0TEed2Id7f2pclfVS6Xid1/fzKyjsIGJlWH46I13KVdIF5EfF2nbvyW5qbPZuIpP0i4rqybftGxPV1iP3FiHiubNuaEfF8zribkDWdDkyb3gIOjYipOeNeACwFbAtcRPYL/+GIyD2AJMVfHngnIuZLWhoYEBGv1CHuEsA6ZLWe2RHxSd6YKe66wNdT3An1aJ5McXcjaw1Yi+wPr8si4jVJSwFPRMQaHQboPP7UiKhLX21Z3Lp/fpL2A84iq50J2BI4LiJuqEN5LwYmAMcDewNHAX0i4rC8sVuVk18TqfSLol6/PKrEfiQiNskbO8ValuznsR7NfEiaEREblHxdBvifiNixDrGXIqulrR4RYyQNBdaJiJu7GG+vjvZHxP90MW6Ht74saj9tlWtcDlwUEfdU2Pf1iJjQhZilP2dXkfUDf6oOfxjV9fMriTsd2KG9tpdqlHfUqW91KeBEYEeyxHob8MuIqEeLQ0tys2cTkLQzWf/FqpLOKdm1LDAvZ+x1yZoOB5b9kl6WkubELsT9dkRcKenHZdsBiIj/19XYyQfp61xJqwD/JGefZ4lLgUeAr6X1F8n6VLv6y/Ob6evKKeadaX1bslpEl5IfWRmD7Jfl6sCbaXk54O/U5/vxcnnik3RmRPysK4kv+U3J8itkAz0gDSAB8g5aqvfn165XWTPnP6nTiPqImEuW/E6sRzxz8msWLwFTgN3I/lO3e5dsJFse65B16C/Hgl/S7bH/T4647f16A3LE6MjNkpYja4aaSvZL86I6xV4rIvaXdCBARHygHJ0xEXEIgKSbgWER8XJaH0yOWxIiYs0U5wJgfETcktZ3BrbvatwyOwA/K9u2c4VtNWsfrCSpP3A42aCqAO4lG2WcV10/vxK3SroNuDqt7w/ckiegpD/RwfNkPdqz69zs2UQkLRERuWp6HcTerB63SvQESUsC/erYpHo/Wf/ZpHQLwVpkowZz3TguaWZErF+y3guYUbqti3EXap6WNCUiRuSI+QOyxLQW8EzJrgFk35fc981Jug54h6zpE7IRpctFxH454xby+aXYewObk9VS74mIP+SMt3VH+yPi7jzxW5mTXxOQdF1E7FftfqM89xlJ+mlE/FrSuVViH9XV2Cn+ZcCPIuKttL488JuIODRn3Er9aG8Dj+UdgSdpB7LRksOA28l+2Y2OiIk5454HDCWrOQTZxALPRMQPc8a9jazWdGWK+21gq4j4lxwxBwLLA6eTDcJo9249+hLTNRa6F7Ee9ydK2pGs+bD08zskIirdG9owJPUF1iX7DGdHxMc9XKTFmpNfE5A0OCJe1oIbxz8jz1BxSd+MiD9JOrhK7Mu6GjvF//TG6462dSHun8luvm7/hbYN8CDZLDinRsQVVU6tNf7ngE3J/sJ/MCJezxOvJO5eZKMEoQ41hxRzBeDfga3a4wK/yJOkJC2bbnGoOKimToNpxgEXRMSDaf2rwMERcXgdYtf980uf3ZlkfbdiwU3uy9Yh9jeAC4BnU9w1gX+NiL/kjd2qnPysR6URcttExJtpfQXg7oj4Ss64fwK+HxGvpvVBZP1F3ydLKovclKhOpkfLOwpxcSLp5ojYVdLzLBhU0y4i4ot1uMYTZH3Of0+bVgeeILtBP7raoiFpQkR8vbNtXYj7DPDNet1GUhb7SWDXiHgmra8F/Dki1q33tVqFB7w0AUnv0nGneJf/8uyGDvffAPdLar8Xal/gtJwxAYa0J77kNWDtyGbz6Oq9c+2jEPsBI8imjROwAfAQ2cCMRSbpvojYosLnmKvmIOk/IuLoap9hns8uInZNX+s1graSneoZTFI/sns/V0zN6+0Je1kWzKeax6tFJL7ktfbElzxH9jNtXeTk1wQiYgCApFPJhoZfQfYf+yDyj6ZsH2a+F/B5sn4jyAYfzMkZm4i4XNnsMduSlXmviHg8b1zg3jR6sv0G/72Be9INzW91saztoxCvAcZExGNpfX3g2K4WNCK2SF/rPfK1vWn37A6PykHZ7D7TIuJ9ZZNDbwz8R0T8vZNTO5Wnub6KfyWbgWYVslHR7cnvHXKMqi3pX54i6VrgJqB9BqQu36dZFnuWpFuA68j+kNkXmNzVuOZmz6Yi6aGI+Gpn27oY+56I2KqzbTnir8xnpyHL9cszDV0vHXl3H3Bj1OEHXhUmm660rQtxt4+IO8q2HVyHftWVywf5SFonImbniZvizAA2JKv9XkE2/d1eEdHhKMWeJOmH0YVp1zqId2kHuyPP4K0iY7c6J78mkoZw/w64huyvwwOBIyLiax2eWFvsJ4BvRJriTNKawC0R8eWccXcja05chawZZw2yabHWy1nkwki6Gnifz46eXCYiDuzwxM7j3gPMIqtFLkN2X+JHEZFrfk9Js4GfR5r6TtJPgO9FxLA8cVOs9idGnAz8b0RcrIKmJKunVFsfxmf/4Lq850pk3c3Nns3lW8B/plcAk9K2ejgGmCipfX7PIWTNSHn9kmzU3R0RsZGyRzHlSiKwUD9oX7KZ/N+vx8g7srksfwD8KK3fQ31uvt4a+AkwLa2fHBFXd3B8rbYBxkraFxhENmikXg+zfVfSCaTbJ5Q9PaJPJ+f0KEn/TvY9GUZ2E/rOZC0DuZKfpNWAc8laGyLF/FF8dpL5rsa+lMr9tq75dZGTXxOJiDkU9ATziLhV2RyI7aPLnowFT3bI45OI+KekXpJ6RcRdks7MG7S8/0zSHtTpF35k8yn+Nr3qaXngq2TD2VcD1pCkvE216TaYW4ETyEZKnhAR7+UubWZ/sj+wvhcRr0hanWxWnUa2D1lT7aMRcUgaCVyP2X8uBf6brD8Osj8ILiWbBSev0qnX+pE9OeOlOsRtWW72bAJF3oguabuIuLPKTeO5OvNT/DvIHih6OrAiWdPnyHo01Va41oMRsWmO8wubTCDFfwo4IyIuUTa115nAiLzfC0l/BV4mexLAamRP0bgnIro8SGdxJunhiBhVMtDqXWBm3qb2ovqCq1yrF1lrSe6HM7cq1/yaQ/vw6ikFxN6abKLlb1bYF3R90uV2u5NNQn0M2ejUgeR8kC0sNMNLL7JbE/L+pdfezFnUw0u3bx/oExEfAEdJqseAot9FxE1p+S1lz0w8oQ5xC72xu0BTlM37eiHZqM/3gIfrEPf1NOK1van6QLLJrYswlOy+R+si1/ysKZWNkptHdlvGheWjHrsQtzdwW0TUa2Lo8vi7sWAmlrsj4k91ilvIQ1aLvLG7Oyh7GvqyETGjDrFWB84jm1kogPvJ+vxy37JR0ofd/mSLV8iar2/MG7tVOfk1kdS8tW98dp7MayLHHI4lsX8F/Los9k8iom5PBF9cSBoPfCfqNFF2SdwzyBJU6UTOUyIiVy1NxT5kdVJEbJ43TneTtCrZyOJPW7+iwjMJrXk5+TWRKn0OuefJrBankYe0S/o18H/JmlRvJRvgcHREXNnhibXFvo5shOpfyW55AOoyyfcMYHhEtKX13mSDMv5/e+ceZWdVnvHfM1FAMKC28VIEKiigq3IxRKEgldYronIRULlUUGijFsQKatFS0FZFQFmwRERFiC0icaGICAgVkKQKAQKuKlSXoIIgAtJEEqGJT//Y+8ucmZy5nW9/c27vb61Zme87c96z15mcefd+b0/dXGKTIqtnkoYfFGvsbppcUHUw8GNgbb7tOhNvst1GhrS32G+NClznmuK7w07k/AaLtZK2rPJGSoOuS+1u5kjasKrwzAUZG9Y1mieurG75gz9Ckh9aVdP0q22fIGk/kljpgaQh17WdH/Dt/NUETwOqodCbFbLZmMgqaTTYKpLCeEWJXHCT7EtSbi9RrdzKDpXjA7D9O0m1N57QNipwrKTd60YFhplwfoPFicCNkiqNrz2BowvZ/gpwbUu/0ZFArckjmWtJwqpV6f3GJJmZutWeVa/Z3iSttkdURK80KVkoyctsm2/dZbvTeaGtfBy4TdL3SOHJPSlTmFJcZLXCWYi3z/g56f9Haec3IunpHjukvdTf2L0ZGxW4ALiNQoVLw0g4vwEi9+K9hFGpluNcSGont1L8iCQCKuCjtq8qYHqj1p4z27+XtHEBu99SmoS/GnhXDvX9oYBdJL2C5PjvIb0XW+QxZLVyRrYvknQdaYcv4AO2H6i3WrB9vMaKrH7eBaSSACRtS2rwf5btv5C0A/BG2x8rYb8kLa1Aq4Dlkq5lbKi2Vtia5oa0VzQRFRhaIuc3YOQ8wwsYO7apZxP5kpYA/+AsByRpPnC27d0K2H46sBN399gAABE3SURBVML22hxenVvCmeT+sLc5z8bMDuAij1NLn4G97W3fqQkkk9zDUkk5ynA8cG6VE9Y4RfpeQRNoUla45gzV/BovAv6atMm41mWGtCPprcAnSKH7dVEB218tYX8YCec3QEh6J6kX7bmkEVm7Av9VohFW0q6k0U0vJI0Lm0OBcWGSFpBmkVbTKp4DHGz7ljp2m0TSHeOLUNrdm4G982wflcOd43Gnvz81JJU07jVutr2gtSCqqcbukqigKromEPStcDll++cwGhX4YYmN3DATYc/B4ljSh+MHtveStD1wciHbZwNvIUkE7QIcDjy/rlHbN+d1bkf6UN9ZKH/WJMskfZFRyaBDSM3SHWH7qPzvXgXW1mq3KamkVh5SElY1gKQ3k6bJ9CyS9gbOpUUVXVIdVfRbGCvoW200qp68WsK+kp5Emj9ajRb8CVAknTHMxMlvgGjZhS8HXmb78VK7cEnLbO/SesKRtLTT0VtqeGxak0jaEHg3SbxWpMHWn+20enCi96Ci0/diNk4kkrYGPk8qUPodcDdwSInG7qZQg6ro+T0fn3a4fuJnTGnvz0ihzvtJBS4Cdia1l+xlO+Z7dkic/AaLe/PYpm8A35X0O8oNv12VQ0XLcw/d/cAmNew1MjZtorzZOsMF8mfZyZ2Rv0pQvQfPJDmR/8zXe5Ea0zvdCLSeSLYkOSeRCid+CXSswi7pfS2XV5D+QI+Q+h4PoNx70wSNqKJPkHZYSioS65R/A86x/Zlxr3UMqTp40jxmMDFx8htQJP0VqSLsyjr5jBZ7WwG/IeX7jsu2Pzvuj8hMbY4Ab3bWmSvBBHmzio7zZ9l224HWLcbrNqNfDhxl+/58/RzSXM5JT4bTsPs54DLbV+Tr15HmiP5jDZsn5W+3I4Xav0lyrG8gDc1+Z501N4mkc0jTXVpV0e8iSYDVOWn/iNG0w05V2sH2wTXWeudEJ1JJd9nerlPbw044v6CrqKAafNPkDcCE1A31ja+SzJuDO+pWTkq6ZXwlahXGrmM327kaOMD2ynw9F7jE9mvr2m4KNaSO3kTaQZNMaJrssWBqIuwZdJvvSno/cDFjR4WVyEcVVeuehTzWdS3N6CYVGE12kp0uD0n6MGOV50upDWwJtEYWniAJHfcsDTbmN5F22GyCnLBI03WCDomTX9BVJN3d5rZt162Qa6vWbfvNdexm243J+CiNY6tOwjeUaEbPRRgnZbsmFeicUmiDcSJwEHBptr0fcLHtj9e1XRpNoHdZUaDJvfW1iqQdpjil9uuEnZ4gnF8wIyRtYvuxqX9y2vY2clJGn/ReB3Z/xKha947Kat222xXYzNR2X8v4lCYXGb08X95g+7ZurmciZqPJvSkkzbG9duqfDKZLhD0HgDZNzGModCL5S+ALwFOBLSXtCPyd7XfVNL0UGF+h2e7eTFlt+4+S1kjalFTNV+s02cJvwvGNkitoe3YKTUUvO7dp8LM8Nu38UlNjhp1wfgNA1cQs6RSSyOUiUijuEKBUg/OngdcAl+XXvF01VMYlPRvYHHiK0uT7qkF4U9Jw67oUV+tuyb0sk3QxfSTjE4ySK4LX2yyWmITUIDuQcsBfyIVQXyJpda7o7rL6lwh7DhCSfmj7ZVPdq2N73Bir292hJlwOQb2dNC1mWctDK4Evl3QkKqTW3ZJ/aZ3mUdFxlWC2PQe4wPahndqYxO4xtj9d0m4/ozQ/tmIjUl/iGtsndGlJMyJvOi8i9WsuJg2Z77jlaFiJk99gsVbSIaRZmSYpgZfKE/wqhz6dm92PIY1Z6ogcgrpA0gG2v15ojetodyqVtKdrDPmuigs0gWhpp3az7bWS5knaoERf5ji7byKd3APA68+NXaJRGbCeJG9iXg8cQaqmPZ2k7fdyUkHXthM+OWhLOL/B4m3AmfnLpKbdtxWy/ffZ7uYkcdirSSO+OkLSoU6q6n8+bloIALbrTgg5vuX7jYCXksKfJUJbTYmW3kP6Q3wZY9s+6r4XSySdzfrtJD2fp2uCcWPfRkjRh2d3aTnT5aektpdP2V7acn9xnfTDMBPOb4CwfQ/wpoZsP0TKIZaiGo321II21zG+qlPSFsCphcw3JVr66/w1QrlcLYwKA5/Scs+U2Qj0I61j3/6PtOl4RzcXNA0Ot31j6w0lJfclJVs0honI+Q0QakBYdDZ7o5pEkkjTUl5cwNbhJAXtxaT35iDgX20vmvSJQU8g6SBS/90KSR8hVRZ/tJdPwpJutf2Sqe4F0ydOfoPFeWRhUQDbd0j6D6COqvayqX+kcybKn9UpHsl2Wp32CLATcHsdmxW2L5S0jFHR0v1LlJ83VYUo6Z/b3bd9Srv7Q8CHbX9N0h7Aq0j5s3OA2oVhpZG0G+nkPm9cemBTkqZm0CHh/AaLjW3flA4561hTx+As9EY1lT9rddprSErrSwrYBSA7u9L9Vu9v+X5dFWIBu61DCTYC9qFGsdIAUBWBvR74nO1vSvqXLq5nMjYgpQaexNhQ+Aqg9rSiYSac32BRXFhU0mdsv1fSt2h/KnljHfs0lD/rx4bmpqoQbY+pRJV0Grlfc0i5T9K5wCuBTyrpM450eU1tcdICvF7Sl2dhtuxQEc5vsHg3SVh0e0n3kYRF6/aNVXms02ramYjTgaV5esW6/FmnxtSw7FCTtKlCnE8zVYgbU27aTT9yEPBa4DTbjypJRx0/xXO6QrX5BM6W1MTmc2iJgpcBRNImwEglM1PI5rG2z5zqXoe2X8Ro/uzaOvkzjcoOVW0YlfM+BFjVy3muPOS7qkJcQ9q8nDK+yq8Du60bgjnAvGz37Dp2g+aRNN/2LXlQ9nq4hkr8sBPOb4CQtAh4j+3/zddbAV+yXUdJurLdrtqsZ/XEJC2xvftU93oBSQfavkTS1rZ/XtDu82zfrbE6hGtIs0lL5BKDoG+JsOdgcSPww1wVtjkplNOxWjeApLeSGuWfl5uvK+ZSThOuCTaRtEd1asrTaTaZ4jnd4kPAJaTWiZKl64tJodMiG6Bg9unnMH6vE85vgLB9rqT/Jk2CeAjY2fYDNc0uJRXN/CljR3itBGrNymyYdwBfkrRZvn4UqNU+0SAP5zaH8RsMoFZeZ0RJ13DbhqboBM2zT7cXMKiE8xsgJB0GfAQ4nDQF/gpJR9juuL8tV5j9AtitzCpnh1w5uWOWM1IVCu5RXk868S2i5ozQcbwF2Jf1y+SDPiEqPJsjcn4DhKRvAEfbfjBfvxT4vO2dCtjeFTgLeCGp92gO8FgJrcCSVDND2510oLdPO5Lm2f5tA3ZfZ/s7pe0Gs0e/fP76iTj5DRC29x13fVN2gCU4m3SSuIQ0CPhw4PmFbJekyuv1zUmntYdy3IACoPOwZ+sGQNIL29jt2Y1AsB798vnrG8L5DQCSTrB96iRzOIvM37T9M0lzbK8Fzpe0dMonzTK2q9FuJ3d7LTOgqR7KvtkABFPTD5+/fiKc32BQ9cU1OYdzVdbxWy7pVFIRTK9WT5LX+DFgNXAlsCPw3iyj1FM01avVZxuAYHL66vPXD0TObwCQtMj2YaWazid4ja2AB4EnA8cBmwGf7VUFaUnLbe8kaT9S0cdxwPfcofL8bNDS5D4G27WmsSgp0Lez26vVr8E4+u3z1w/EyW8wmJ8/HEdKupA0IWQdth+p+wItVWergX44UTw5/7s3aaj1I+3yaT3GLi3fbwQcCDxjgp+dCZePs7sfSTcw6BP68PPX88TJbwCQdAywkDSv8T7GOj/XOTn0a5OtpE+QTnyrSSruTwMut91zsjWTIelG23sUtjkCXFNXKimYPZqKCgwz4fwGCEnn2F5Y2OZWkz3ey31IWRtwhe21ed7p3AJN/40hqXW6ywjpJLiwdKhW0nbAt21HtWCfIOlPWi7XRQVst9VqDKYmnF8wbSQ9C1iQL2+q+gl7EUkbA+8DtrR9tKQXANvZvnyKp3aNPOWlYg1wD0l54K6adlcy9tTwAPAh21+vYzfoLk1EBYaJyPkF00LSQcCngOtIYdWzJB1ve3FXFzYx5wO3kFSwAe4l9Uj1rPOzvVdDdqPloc+ZICoQv9cahPMLpsuJwIKW6THzgGtIw5N7kW1sH5wHc2N7tXq84kXSsSSnvRI4jzTy7IO2r65pd3dgue3HJB2a7Z7ZyyHrYD1OZ/T0XkUFDuzaagaAcH7BdBkZF+Z8mB5Vv848IekpjE5O2QZ4vLtLmpIjbZ8p6TXAM4EjSM6wlvMDziHNOd0ROAH4InAh0FYjLuhJLmdU65H8/T7Vfi6m9cyccH7BdLlS0lXARfn6YOCKLq5nKk4iNbdvIenfgd2Bt3d1RVNT/WHbGzjf9u2FTqtrbFvSm0gnvi9K+tsCdoPZYz4p3/5N0v+TNwA3AL/q5qL6mSh4CaaNpP2BPUgfvhtsX9rlJbUlO4znAquAXUnr/YHth7q6sCnIzeibA88jTaSZA1xne35Nu9eTNgJHAHsCvyWFQV9cb8XBbCHpauAA2yvz9VzgEtuv7e7K+pdwfsG0kHQc6cN2b7fXMh0k3VLXacw2uf9uJ+Dnth/N5e2b266lmyjp2SRB4pttf1/SlsArbF9Yf9XBbCDpTmBH24/n6w2B221v392V9S8R9gymy6bAVZIeAb4KLLb9my6vaTJ+IGmB7Zu7vZDpYvuPwK0t1w+Tcqt17T4AnNFy/UtSzi/oHxYBN0m6lJTv2w+4oLtL6m/i5BfMCEk7kPJ9BwD32n5ll5fUFkk/BrYlCfE+Rgp9ulcn0gTBVOR2h5fnyxts39bN9fQ7cfILZsqDpCbph0kVib3K67q9gCAoie1baYkMBPWIk18wLSQtJJ345pF6+y62/ePJnxXMFEl7AC+wfX7upXyq7bu7va4gGDTi5BdMl61IenjLu72QQUXSSaTJHduR+vueDHyF1KbRib2JhpJHCDgYeuLkFwQ9gqTlwM7ArbZ3zvfu6NRJ9fNQ8iBomjj5BUHv8ERuRq+m0tRS6g7nFgQT08vjqYJg2PiapHOBp0k6ijQ79by6RiXtKulmSb+X9ISktZJW1F5tEPQxEfYMgh5C0quAV5PyclfZ/m4Bm8uAt5BULXYBDgeeb/vEuraDoF+JsGcQ9Bb/QypGuUbSxpLmViOt6mD7Z5Lm2F4LnC9paf2lBkH/Es4vCHqEHOo8GngGsA1pzufngL+paXqVpA2A5ZJOBe4HauUTg6DfiZxfEPQO7ya1NawAsP1TygwSOIz0WX8PadrNFsD+BewGQd8Szi8IeofHbT9RXUh6Eu379GbKvrb/YHuF7ZNtvw/Yp4DdIOhbwvkFQe9wvaR/Ap6SC18uAb5VwG477b63F7AbBH1LVHsGQY+QJY3eQUu1J/AFd/ghlfRWkpTRHsD3Wx7alCRw25NDyYNgNgjnFwQ9gKQ5wAW2Dy1ocyuSMO7HgQ+2PLQSuMP2mlKvFQT9RlR7BkEPYHutpHmSNmjN+9W0+QuSpNNukp4FLMgP/SQcXzDshPMLgt7hHmCJpMtIVZkA2D5jwmdMA0kHAqcB15HCqWdJOt724jp2g6CfCecXBL3Dr/PXCDC3oN0PAwtsPwiQpZKuIUlTBcFQEs4vCLqMpEW2DwMetX1mAy8xUjm+zMNEpXcw5ITzC4LuMz8Xpxwp6UJSaHIdth+paf9KSVcBF+Xrg4Hv1LQZBH1NVHsGQZeRdAywENgauI+xzs+2ty7wGvuTWh4E3GD70ro2g6CfCecXBD2CpHNsL2zA7idtf2Cqe0EwTITzC4IBR9Kttl8y7l7HCvFBMAhEzi8IBhRJC4F3AVtLuqPlobnAku6sKgh6gzj5BcGAImkz4Om0mfBSoIgmCPqacH5BEATB0BG9PkEQBMHQEc4vCIIgGDrC+QVBEARDRzi/IAiCYOgI5xcEQRAMHf8PthYLW2iH+jYAAAAASUVORK5CYII=\n", "text/plain": [ "<Figure size 432x432 with 2 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from pyspark.ml.stat import Correlation\n", "from pyspark.ml.feature import VectorAssembler\n", "\n", "changedTrain = train.withColumn(labelCol+'Double', train[labelCol].cast(\"double\")).drop(labelCol)\n", "\n", "numericCols = changedTrain.columns\n", "\n", "corrAssembler = VectorAssembler(inputCols=numericCols, outputCol='corrFeatures')\n", "corrTrain = corrAssembler.transform(changedTrain)\n", "corrMat = Correlation.corr(corrTrain, 'corrFeatures').head()\n", "\n", "pdf = corrMat[0].toArray()\n", "\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "plt.figure(figsize=(6,6))\n", "\n", "sns.heatmap(pdf, \n", " xticklabels=numericCols,\n", " yticklabels=numericCols)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There's some moderate positive correlation between 'citric acid' and 'fixed acidity', 'free sulfur dioxide' and 'total sulfur dioxide', 'fixed acidity' and 'density'" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-RECORD 0------------------------------------\n", " fixed acidity | 1.7339054079326566 \n", " volatile acidity | 0.18387071977423192 \n", " citric acid | 0.1937298802160502 \n", " residual sugar | 1.4816221021751061 \n", " chlorides | 0.045579661274595056 \n", " free sulfur dioxide | 10.680847219823992 \n", " total sulfur dioxide | 32.517503458328534 \n", " density | 0.00191715082907368 \n", " pH | 0.15339972836093962 \n", " sulphates | 0.15549440282020172 \n", " alcohol | 1.0674132842935937 \n", "\n", "-RECORD 0------------------------------------\n", " fixed acidity | 3.0064279636581124 \n", " volatile acidity | 0.03380844159029412 \n", " citric acid | 0.03753126648852516 \n", " residual sugar | 2.195204053653781 \n", " chlorides | 0.00207750552190682 \n", " free sulfur dioxide | 114.08049733322189 \n", " total sulfur dioxide | 1057.3880311624082 \n", " density | 3.675467301417898... \n", " pH | 0.023531476661210065 \n", " sulphates | 0.02417850930841115 \n", " alcohol | 1.1393711194864362 \n", "\n" ] } ], "source": [ "from pyspark.sql.functions import stddev_pop, var_pop\n", "\n", "cols = train.columns[:]\n", "cols.remove(labelCol)\n", "\n", "stddev_pops = [stddev_pop(column).alias(column) for column in cols]\n", "var_pops = [var_pop(column).alias(column) for column in cols]\n", "\n", "df1 = train.agg(*stddev_pops)\n", "df2 = train.agg(*var_pops)\n", "df1.show(truncate=True, vertical=True)\n", "df2.show(truncate=True, vertical=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "%%script false\n", "from pyspark.ml.feature import PCA, StandardScaler, VectorAssembler\n", "from pyspark.ml import Pipeline\n", "\n", "cols = train.columns[:]\n", "cols.remove(labelCol)\n", "\n", "assembler = VectorAssembler(inputCols=cols, outputCol=\"assembledFeatures\")\n", "scalers = StandardScaler(inputCol=\"assembledFeatures\", outputCol=\"features\", withStd=True, withMean=True)\n", "pca = PCA(k=5, inputCol=\"features\", outputCol=\"pcaFeatures\")\n", "pipeline = Pipeline(stages=[assembler, scalers, pca])\n", "model = pipeline.fit(train)\n", "pcaTrain = model.transform(train)\n", "pcaTrain.select(\"features\", \"pcaFeatures\").show()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DataFrame[fixed acidity: double, volatile acidity: double, citric acid: double, residual sugar: double, chlorides: double, free sulfur dioxide: double, total sulfur dioxide: double, density: double, pH: double, sulphates: double, alcohol: double, quality: int]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Classifiers" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from pyspark.ml import Pipeline\n", "from pyspark.ml.tuning import CrossValidator, ParamGridBuilder\n", "from pyspark.ml.evaluation import MulticlassClassificationEvaluator\n", "import pandas as pd\n", "\n", "accuracyDataFrame = pd.DataFrame(columns=['Train', 'Test'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Regressor" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Train</th>\n", " <th>Test</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>GBTRegressor</th>\n", " <td>0.706159</td>\n", " <td>0.30547</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Train Test\n", "GBTRegressor 0.706159 0.30547" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# %%script false \n", "from pyspark.ml.regression import GBTRegressor\n", "from pyspark.ml import Pipeline\n", "from pyspark.ml.tuning import CrossValidator, ParamGridBuilder\n", "from pyspark.ml.evaluation import RegressionEvaluator\n", "\n", "cols = train.columns[:]\n", "cols.remove(labelCol)\n", "\n", "gbtRegressorTrain = train\n", "\n", "assembler = VectorAssembler(inputCols=cols, outputCol='features')\n", "regressor = GBTRegressor(labelCol='quality')\n", "pipeline = Pipeline(stages=[assembler, regressor])\n", "grid = ParamGridBuilder().addGrid(regressor.maxIter, [5, 10, 15]).build()\n", "crossval = CrossValidator(estimator=pipeline,\n", " estimatorParamMaps = grid,\n", " evaluator = RegressionEvaluator(labelCol='quality'),\n", " numFolds=10)\n", "model = crossval.fit(gbtRegressorTrain)\n", "gbtRegressorTrain = model.transform(gbtRegressorTrain)\n", "gbtRegressorTest = model.transform(test)\n", "\n", "evaluator = RegressionEvaluator(labelCol='quality')\n", "\n", "trainAccuracy = evaluator.evaluate(gbtRegressorTrain, {evaluator.metricName: \"r2\"})\n", "testAccuracy = evaluator.evaluate(gbtRegressorTest, {evaluator.metricName: \"r2\"})\n", "accuracyDataFrame.loc['GBTRegressor'] = [trainAccuracy, testAccuracy]\n", "accuracyDataFrame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## RandomForestClassifier" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Train</th>\n", " <th>Test</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>GBTRegressor</th>\n", " <td>0.706159</td>\n", " <td>0.305470</td>\n", " </tr>\n", " <tr>\n", " <th>RandomForestClassifier</th>\n", " <td>0.693101</td>\n", " <td>0.597633</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Train Test\n", "GBTRegressor 0.706159 0.305470\n", "RandomForestClassifier 0.693101 0.597633" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pyspark.ml.classification import RandomForestClassifier\n", "\n", "cols = train.columns[:]\n", "cols.remove(labelCol)\n", "\n", "randomForestClassifierTrain = train\n", "randomForestClassifierTest = test\n", "\n", "assembler = VectorAssembler(inputCols=cols, outputCol='features')\n", "classifier = RandomForestClassifier(labelCol='quality')\n", "pipeline = Pipeline(stages=[assembler, classifier])\n", "grid = ParamGridBuilder().addGrid(classifier.maxDepth, [3, 5])\\\n", " .addGrid(classifier.numTrees, [20, 50])\\\n", " .build()\n", "crossval = CrossValidator(estimator=pipeline,\n", " estimatorParamMaps = grid,\n", " evaluator = MulticlassClassificationEvaluator(labelCol='quality'),\n", " numFolds=10)\n", "model = crossval.fit(randomForestClassifierTrain)\n", "randomForestClassifierTrain = model.transform(randomForestClassifierTrain)\n", "randomForestClassifierTest = model.transform(randomForestClassifierTest)\n", "\n", "evaluator = MulticlassClassificationEvaluator(labelCol='quality')\n", "\n", "trainAccuracy = evaluator.evaluate(randomForestClassifierTrain, {evaluator.metricName: \"accuracy\"})\n", "testAccuracy = evaluator.evaluate(randomForestClassifierTest, {evaluator.metricName: \"accuracy\"})\n", "\n", "accuracyDataFrame.loc['RandomForestClassifier'] = [trainAccuracy, testAccuracy]\n", "accuracyDataFrame" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Train</th>\n", " <th>Test</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>GBTRegressor</th>\n", " <td>0.706159</td>\n", " <td>0.305470</td>\n", " </tr>\n", " <tr>\n", " <th>RandomForestClassifier</th>\n", " <td>0.693101</td>\n", " <td>0.597633</td>\n", " </tr>\n", " <tr>\n", " <th>LogisticRegression</th>\n", " <td>0.598731</td>\n", " <td>0.535503</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Train Test\n", "GBTRegressor 0.706159 0.305470\n", "RandomForestClassifier 0.693101 0.597633\n", "LogisticRegression 0.598731 0.535503" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pyspark.ml.classification import LogisticRegression\n", "\n", "cols = train.columns[:]\n", "cols.remove(labelCol)\n", "\n", "logisticRegressionTrain = train\n", "logisticRegressionTest = test\n", "\n", "assembler = VectorAssembler(inputCols=cols, outputCol='features')\n", "classifier = LogisticRegression(labelCol='quality')\n", "pipeline = Pipeline(stages=[assembler, classifier])\n", "grid = ParamGridBuilder().addGrid(classifier.regParam, [0.1, 0.15])\\\n", " .addGrid(classifier.maxIter, [100, 200, 300])\\\n", " .build()\n", "crossval = CrossValidator(estimator=pipeline,\n", " estimatorParamMaps = grid,\n", " evaluator = MulticlassClassificationEvaluator(labelCol='quality'),\n", " numFolds=10)\n", "model = crossval.fit(logisticRegressionTrain)\n", "logisticRegressionTrain = model.transform(logisticRegressionTrain)\n", "logisticRegressionTest = model.transform(logisticRegressionTest)\n", "\n", "evaluator = MulticlassClassificationEvaluator(labelCol='quality')\n", "\n", "trainAccuracy = evaluator.evaluate(logisticRegressionTrain, {evaluator.metricName: \"accuracy\"})\n", "testAccuracy = evaluator.evaluate(logisticRegressionTest, {evaluator.metricName: \"accuracy\"})\n", "\n", "accuracyDataFrame.loc['LogisticRegression'] = [trainAccuracy, testAccuracy]\n", "accuracyDataFrame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## MultilayerPerceptronClassifier" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "from pyspark.ml.pipeline import Estimator, Model, Pipeline\n", "from pyspark.ml.param.shared import *\n", "from pyspark.sql.functions import min\n", "\n", "class HasMin(Params):\n", " minimum = Param(Params._dummy(), \"minimum\", \"minimum\")\n", "\n", " def __init__(self):\n", " super(HasMin, self).__init__()\n", "\n", " def setMin(self, value):\n", " return self._set(minimum=value)\n", "\n", " def getMin(self):\n", " return self.getOrDefault(self.minimum)\n", "\n", "class MinTransformation(Estimator, HasInputCol, \n", " HasOutputCol, HasMin):\n", "\n", " def _fit(self, dataset):\n", " c = self.getInputCol()\n", " self.minimum = dataset.agg(min(c)).first()[0]\n", " return (MinTransformationModel()\n", " .setInputCol(c)\n", " .setMin(self.minimum)\n", " .setOutputCol(self.getOutputCol()))\n", "\n", "class MinTransformationModel(Model, HasInputCol, HasOutputCol, HasMin):\n", "\n", " def _transform(self, dataset):\n", " x = self.getInputCol()\n", " y = self.getOutputCol()\n", " minimum = self.getMin()\n", " meta = {'MinTransformation' : minimum}\n", " return dataset.withColumn(y, (col(x) - minimum).alias(y, metadata=meta))\n", "\n", "class HasConst(Params):\n", " const = Param(Params._dummy(), \"const\", \"const\")\n", "\n", " def __init__(self):\n", " super(HasConst, self).__init__()\n", "\n", " def setConst(self, value):\n", " return self._set(const=value)\n", "\n", " def getConst(self):\n", " return self.getOrDefault(self.const)\n", "\n", "class ConstTransformation(Estimator, HasInputCol, \n", " HasOutputCol, HasConst):\n", "\n", " def _fit(self, dataset):\n", " c = self.getInputCol()\n", " for f in dataset.schema.fields:\n", " if 'MinTransformation' in f.metadata:\n", " self.const = f.metadata['MinTransformation']\n", " break\n", " \n", " return (ConstTransformationModel()\n", " .setInputCol(c)\n", " .setConst(self.const)\n", " .setOutputCol(self.getOutputCol()))\n", "\n", "class ConstTransformationModel(Model, HasInputCol, HasOutputCol, HasConst):\n", "\n", " def _transform(self, dataset):\n", " x = self.getInputCol()\n", " y = self.getOutputCol()\n", " const = self.getConst()\n", "\n", " return dataset.withColumn(y, col(x) + const)\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Train</th>\n", " <th>Test</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>GBTRegressor</th>\n", " <td>0.706159</td>\n", " <td>0.305470</td>\n", " </tr>\n", " <tr>\n", " <th>RandomForestClassifier</th>\n", " <td>0.693101</td>\n", " <td>0.597633</td>\n", " </tr>\n", " <tr>\n", " <th>LogisticRegression</th>\n", " <td>0.598731</td>\n", " <td>0.535503</td>\n", " </tr>\n", " <tr>\n", " <th>MultilayerPerceptronClassifier</th>\n", " <td>0.616971</td>\n", " <td>0.529586</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Train Test\n", "GBTRegressor 0.706159 0.305470\n", "RandomForestClassifier 0.693101 0.597633\n", "LogisticRegression 0.598731 0.535503\n", "MultilayerPerceptronClassifier 0.616971 0.529586" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from pyspark.ml.classification import MultilayerPerceptronClassifier\n", "\n", "cols = train.columns[:]\n", "cols.remove(labelCol)\n", "\n", "mlpClassifierTrain = train\n", "mlpClassifierTest = test\n", "\n", "minTransformation = MinTransformation().setInputCol('quality').setOutputCol('reQuality')\n", "assembler = VectorAssembler(inputCols=cols, outputCol='features')\n", "classifier = MultilayerPerceptronClassifier(labelCol='reQuality')\n", "constTransformation = ConstTransformation().setInputCol('prediction').setOutputCol('actualPrediction')\n", "\n", "pipeline = Pipeline(stages=[minTransformation, assembler, classifier, constTransformation])\n", "grid = ParamGridBuilder().addGrid(classifier.maxIter, [100, 200])\\\n", " .addGrid(classifier.layers, [[11, 10, 10, 6], [11, 20, 10, 6]])\\\n", " .build()\n", "crossval = CrossValidator(estimator=pipeline,\n", " estimatorParamMaps = grid,\n", " evaluator = MulticlassClassificationEvaluator(labelCol='quality', predictionCol='actualPrediction'),\n", " numFolds=10)\n", "\n", "model = crossval.fit(mlpClassifierTrain)\n", "mlpClassifierTrain = model.transform(mlpClassifierTrain)\n", "mlpClassifierTest = model.transform(mlpClassifierTest)\n", "\n", "evaluator = MulticlassClassificationEvaluator(labelCol='quality', predictionCol='actualPrediction')\n", "\n", "trainAccuracy = evaluator.evaluate(mlpClassifierTrain, {evaluator.metricName: \"accuracy\"})\n", "testAccuracy = evaluator.evaluate(mlpClassifierTest, {evaluator.metricName: \"accuracy\"})\n", "\n", "accuracyDataFrame.loc['MultilayerPerceptronClassifier'] = [trainAccuracy, testAccuracy]\n", "accuracyDataFrame" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }