{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data retrieval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>OverallGrade</th>\n",
       "      <th>Obedient</th>\n",
       "      <th>ResearchScore</th>\n",
       "      <th>ProjectScore</th>\n",
       "      <th>Recommend</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Henry</td>\n",
       "      <td>A</td>\n",
       "      <td>Y</td>\n",
       "      <td>90</td>\n",
       "      <td>85</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>John</td>\n",
       "      <td>C</td>\n",
       "      <td>N</td>\n",
       "      <td>85</td>\n",
       "      <td>51</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>David</td>\n",
       "      <td>F</td>\n",
       "      <td>N</td>\n",
       "      <td>10</td>\n",
       "      <td>17</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Holmes</td>\n",
       "      <td>B</td>\n",
       "      <td>Y</td>\n",
       "      <td>75</td>\n",
       "      <td>71</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Marvin</td>\n",
       "      <td>E</td>\n",
       "      <td>N</td>\n",
       "      <td>20</td>\n",
       "      <td>30</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Simon</td>\n",
       "      <td>A</td>\n",
       "      <td>Y</td>\n",
       "      <td>92</td>\n",
       "      <td>79</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Robert</td>\n",
       "      <td>B</td>\n",
       "      <td>Y</td>\n",
       "      <td>60</td>\n",
       "      <td>59</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Trent</td>\n",
       "      <td>C</td>\n",
       "      <td>Y</td>\n",
       "      <td>75</td>\n",
       "      <td>33</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Name OverallGrade Obedient  ResearchScore  ProjectScore Recommend\n",
       "0   Henry            A        Y             90            85       Yes\n",
       "1    John            C        N             85            51       Yes\n",
       "2   David            F        N             10            17        No\n",
       "3  Holmes            B        Y             75            71        No\n",
       "4  Marvin            E        N             20            30        No\n",
       "5   Simon            A        Y             92            79       Yes\n",
       "6  Robert            B        Y             60            59        No\n",
       "7   Trent            C        Y             75            33        No"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "# turn of warning messages\n",
    "pd.options.mode.chained_assignment = None  # default='warn'\n",
    "\n",
    "# get data\n",
    "df = pd.read_csv('student_records.csv')\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data preparation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Feature extraction and engineering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get features and corresponding outcomes\n",
    "feature_names = ['OverallGrade', 'Obedient', 'ResearchScore', 'ProjectScore']\n",
    "training_features = df[feature_names]\n",
    "\n",
    "outcome_name = ['Recommend']\n",
    "outcome_labels = df[outcome_name]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>OverallGrade</th>\n",
       "      <th>Obedient</th>\n",
       "      <th>ResearchScore</th>\n",
       "      <th>ProjectScore</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>Y</td>\n",
       "      <td>90</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C</td>\n",
       "      <td>N</td>\n",
       "      <td>85</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>F</td>\n",
       "      <td>N</td>\n",
       "      <td>10</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>B</td>\n",
       "      <td>Y</td>\n",
       "      <td>75</td>\n",
       "      <td>71</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>E</td>\n",
       "      <td>N</td>\n",
       "      <td>20</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>A</td>\n",
       "      <td>Y</td>\n",
       "      <td>92</td>\n",
       "      <td>79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>B</td>\n",
       "      <td>Y</td>\n",
       "      <td>60</td>\n",
       "      <td>59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>C</td>\n",
       "      <td>Y</td>\n",
       "      <td>75</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  OverallGrade Obedient  ResearchScore  ProjectScore\n",
       "0            A        Y             90            85\n",
       "1            C        N             85            51\n",
       "2            F        N             10            17\n",
       "3            B        Y             75            71\n",
       "4            E        N             20            30\n",
       "5            A        Y             92            79\n",
       "6            B        Y             60            59\n",
       "7            C        Y             75            33"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# view features\n",
    "training_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Recommend</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Recommend\n",
       "0       Yes\n",
       "1       Yes\n",
       "2        No\n",
       "3        No\n",
       "4        No\n",
       "5       Yes\n",
       "6        No\n",
       "7        No"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# view outcome labels\n",
    "outcome_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# list down features based on type\n",
    "numeric_feature_names = ['ResearchScore', 'ProjectScore']\n",
    "categoricial_feature_names = ['OverallGrade', 'Obedient']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Numeric Feature Scaling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>OverallGrade</th>\n",
       "      <th>Obedient</th>\n",
       "      <th>ResearchScore</th>\n",
       "      <th>ProjectScore</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>Y</td>\n",
       "      <td>0.899583</td>\n",
       "      <td>1.376650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C</td>\n",
       "      <td>N</td>\n",
       "      <td>0.730648</td>\n",
       "      <td>-0.091777</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>F</td>\n",
       "      <td>N</td>\n",
       "      <td>-1.803390</td>\n",
       "      <td>-1.560203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>B</td>\n",
       "      <td>Y</td>\n",
       "      <td>0.392776</td>\n",
       "      <td>0.772004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>E</td>\n",
       "      <td>N</td>\n",
       "      <td>-1.465519</td>\n",
       "      <td>-0.998746</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>A</td>\n",
       "      <td>Y</td>\n",
       "      <td>0.967158</td>\n",
       "      <td>1.117516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>B</td>\n",
       "      <td>Y</td>\n",
       "      <td>-0.114032</td>\n",
       "      <td>0.253735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>C</td>\n",
       "      <td>Y</td>\n",
       "      <td>0.392776</td>\n",
       "      <td>-0.869179</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  OverallGrade Obedient  ResearchScore  ProjectScore\n",
       "0            A        Y       0.899583      1.376650\n",
       "1            C        N       0.730648     -0.091777\n",
       "2            F        N      -1.803390     -1.560203\n",
       "3            B        Y       0.392776      0.772004\n",
       "4            E        N      -1.465519     -0.998746\n",
       "5            A        Y       0.967158      1.117516\n",
       "6            B        Y      -0.114032      0.253735\n",
       "7            C        Y       0.392776     -0.869179"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "ss = StandardScaler()\n",
    "\n",
    "# fit scaler on numeric features\n",
    "ss.fit(training_features[numeric_feature_names])\n",
    "\n",
    "# scale numeric features now\n",
    "training_features[numeric_feature_names] = ss.transform(training_features[numeric_feature_names])\n",
    "\n",
    "# view updated featureset\n",
    "training_features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Engineering Categorical Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ResearchScore</th>\n",
       "      <th>ProjectScore</th>\n",
       "      <th>OverallGrade_A</th>\n",
       "      <th>OverallGrade_B</th>\n",
       "      <th>OverallGrade_C</th>\n",
       "      <th>OverallGrade_E</th>\n",
       "      <th>OverallGrade_F</th>\n",
       "      <th>Obedient_N</th>\n",
       "      <th>Obedient_Y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.899583</td>\n",
       "      <td>1.376650</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.730648</td>\n",
       "      <td>-0.091777</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-1.803390</td>\n",
       "      <td>-1.560203</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.392776</td>\n",
       "      <td>0.772004</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.465519</td>\n",
       "      <td>-0.998746</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.967158</td>\n",
       "      <td>1.117516</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-0.114032</td>\n",
       "      <td>0.253735</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.392776</td>\n",
       "      <td>-0.869179</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ResearchScore  ProjectScore  OverallGrade_A  OverallGrade_B  \\\n",
       "0       0.899583      1.376650               1               0   \n",
       "1       0.730648     -0.091777               0               0   \n",
       "2      -1.803390     -1.560203               0               0   \n",
       "3       0.392776      0.772004               0               1   \n",
       "4      -1.465519     -0.998746               0               0   \n",
       "5       0.967158      1.117516               1               0   \n",
       "6      -0.114032      0.253735               0               1   \n",
       "7       0.392776     -0.869179               0               0   \n",
       "\n",
       "   OverallGrade_C  OverallGrade_E  OverallGrade_F  Obedient_N  Obedient_Y  \n",
       "0               0               0               0           0           1  \n",
       "1               1               0               0           1           0  \n",
       "2               0               0               1           1           0  \n",
       "3               0               0               0           0           1  \n",
       "4               0               1               0           1           0  \n",
       "5               0               0               0           0           1  \n",
       "6               0               0               0           0           1  \n",
       "7               1               0               0           0           1  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_features = pd.get_dummies(training_features, columns=categoricial_feature_names)\n",
    "# view newly engineering features\n",
    "training_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get list of new categorical features\n",
    "categorical_engineered_features = list(set(training_features.columns) - set(numeric_feature_names))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Modeling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
       "          verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "import numpy as np\n",
    "\n",
    "# fit the model\n",
    "lr = LogisticRegression() \n",
    "model = lr.fit(training_features, np.array(outcome_labels['Recommend']))\n",
    "# view model parameters\n",
    "model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 100.0 %\n",
      "Classification Stats:\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "         No       1.00      1.00      1.00         5\n",
      "        Yes       1.00      1.00      1.00         3\n",
      "\n",
      "avg / total       1.00      1.00      1.00         8\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# simple evaluation on training data\n",
    "pred_labels = model.predict(training_features)\n",
    "actual_labels = np.array(outcome_labels['Recommend'])\n",
    "\n",
    "# evaluate model performance\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import classification_report\n",
    "\n",
    "print('Accuracy:', float(accuracy_score(actual_labels, pred_labels))*100, '%')\n",
    "print('Classification Stats:')\n",
    "print(classification_report(actual_labels, pred_labels))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model Deployment "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Scaler/scaler.pickle',\n",
       " 'Scaler/scaler.pickle_01.npy',\n",
       " 'Scaler/scaler.pickle_02.npy',\n",
       " 'Scaler/scaler.pickle_03.npy']"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.externals import joblib\n",
    "import os\n",
    "# save models to be deployed on your server\n",
    "if not os.path.exists('Model'):\n",
    "    os.mkdir('Model')\n",
    "if not os.path.exists('Scaler'):\n",
    "    os.mkdir('Scaler') \n",
    "    \n",
    "joblib.dump(model, r'Model/model.pickle') \n",
    "joblib.dump(ss, r'Scaler/scaler.pickle') "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prediction in Action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# load model and scaler objects\n",
    "model = joblib.load(r'Model/model.pickle')\n",
    "scaler = joblib.load(r'Scaler/scaler.pickle')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>OverallGrade</th>\n",
       "      <th>Obedient</th>\n",
       "      <th>ResearchScore</th>\n",
       "      <th>ProjectScore</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Nathan</td>\n",
       "      <td>F</td>\n",
       "      <td>N</td>\n",
       "      <td>30</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Thomas</td>\n",
       "      <td>A</td>\n",
       "      <td>Y</td>\n",
       "      <td>78</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Name OverallGrade Obedient  ResearchScore  ProjectScore\n",
       "0  Nathan            F        N             30            20\n",
       "1  Thomas            A        Y             78            80"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## data retrieval\n",
    "new_data = pd.DataFrame([{'Name': 'Nathan', 'OverallGrade': 'F', 'Obedient': 'N', 'ResearchScore': 30, 'ProjectScore': 20},\n",
    "                  {'Name': 'Thomas', 'OverallGrade': 'A', 'Obedient': 'Y', 'ResearchScore': 78, 'ProjectScore': 80}])\n",
    "new_data = new_data[['Name', 'OverallGrade', 'Obedient', 'ResearchScore', 'ProjectScore']]\n",
    "new_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ResearchScore</th>\n",
       "      <th>ProjectScore</th>\n",
       "      <th>OverallGrade_A</th>\n",
       "      <th>OverallGrade_F</th>\n",
       "      <th>Obedient_N</th>\n",
       "      <th>Obedient_Y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.127647</td>\n",
       "      <td>-1.430636</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.494137</td>\n",
       "      <td>1.160705</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ResearchScore  ProjectScore  OverallGrade_A  OverallGrade_F  Obedient_N  \\\n",
       "0      -1.127647     -1.430636               0               1           1   \n",
       "1       0.494137      1.160705               1               0           0   \n",
       "\n",
       "   Obedient_Y  \n",
       "0           0  \n",
       "1           1  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## data preparation\n",
    "prediction_features = new_data[feature_names]\n",
    "\n",
    "# scaling\n",
    "prediction_features[numeric_feature_names] = scaler.transform(prediction_features[numeric_feature_names])\n",
    "\n",
    "# engineering categorical variables\n",
    "prediction_features = pd.get_dummies(prediction_features, columns=categoricial_feature_names)\n",
    "\n",
    "# view feature set\n",
    "prediction_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ResearchScore</th>\n",
       "      <th>ProjectScore</th>\n",
       "      <th>OverallGrade_A</th>\n",
       "      <th>OverallGrade_F</th>\n",
       "      <th>Obedient_N</th>\n",
       "      <th>Obedient_Y</th>\n",
       "      <th>OverallGrade_B</th>\n",
       "      <th>OverallGrade_E</th>\n",
       "      <th>OverallGrade_C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.127647</td>\n",
       "      <td>-1.430636</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.494137</td>\n",
       "      <td>1.160705</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ResearchScore  ProjectScore  OverallGrade_A  OverallGrade_F  Obedient_N  \\\n",
       "0      -1.127647     -1.430636               0               1           1   \n",
       "1       0.494137      1.160705               1               0           0   \n",
       "\n",
       "   Obedient_Y  OverallGrade_B  OverallGrade_E  OverallGrade_C  \n",
       "0           0               0               0               0  \n",
       "1           1               0               0               0  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# add missing categorical feature columns\n",
    "current_categorical_engineered_features = set(prediction_features.columns) - set(numeric_feature_names)\n",
    "missing_features = set(categorical_engineered_features) - current_categorical_engineered_features\n",
    "for feature in missing_features:\n",
    "    # add zeros since feature is absent in these data samples\n",
    "    prediction_features[feature] = [0] * len(prediction_features) \n",
    "\n",
    "# view final feature set\n",
    "prediction_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>OverallGrade</th>\n",
       "      <th>Obedient</th>\n",
       "      <th>ResearchScore</th>\n",
       "      <th>ProjectScore</th>\n",
       "      <th>Recommend</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Nathan</td>\n",
       "      <td>F</td>\n",
       "      <td>N</td>\n",
       "      <td>30</td>\n",
       "      <td>20</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Thomas</td>\n",
       "      <td>A</td>\n",
       "      <td>Y</td>\n",
       "      <td>78</td>\n",
       "      <td>80</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Name OverallGrade Obedient  ResearchScore  ProjectScore Recommend\n",
       "0  Nathan            F        N             30            20        No\n",
       "1  Thomas            A        Y             78            80       Yes"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## predict using model\n",
    "predictions = model.predict(prediction_features)\n",
    "\n",
    "## display results\n",
    "new_data['Recommend'] = predictions\n",
    "new_data"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}