{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import necessary dependencies and settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Transforming Nominal Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Platform</th>\n",
       "      <th>Year</th>\n",
       "      <th>Genre</th>\n",
       "      <th>Publisher</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Super Mario Bros.</td>\n",
       "      <td>NES</td>\n",
       "      <td>1985.0</td>\n",
       "      <td>Platform</td>\n",
       "      <td>Nintendo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Mario Kart Wii</td>\n",
       "      <td>Wii</td>\n",
       "      <td>2008.0</td>\n",
       "      <td>Racing</td>\n",
       "      <td>Nintendo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wii Sports Resort</td>\n",
       "      <td>Wii</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>Sports</td>\n",
       "      <td>Nintendo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Pokemon Red/Pokemon Blue</td>\n",
       "      <td>GB</td>\n",
       "      <td>1996.0</td>\n",
       "      <td>Role-Playing</td>\n",
       "      <td>Nintendo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Tetris</td>\n",
       "      <td>GB</td>\n",
       "      <td>1989.0</td>\n",
       "      <td>Puzzle</td>\n",
       "      <td>Nintendo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>New Super Mario Bros.</td>\n",
       "      <td>DS</td>\n",
       "      <td>2006.0</td>\n",
       "      <td>Platform</td>\n",
       "      <td>Nintendo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       Name Platform    Year         Genre Publisher\n",
       "1         Super Mario Bros.      NES  1985.0      Platform  Nintendo\n",
       "2            Mario Kart Wii      Wii  2008.0        Racing  Nintendo\n",
       "3         Wii Sports Resort      Wii  2009.0        Sports  Nintendo\n",
       "4  Pokemon Red/Pokemon Blue       GB  1996.0  Role-Playing  Nintendo\n",
       "5                    Tetris       GB  1989.0        Puzzle  Nintendo\n",
       "6     New Super Mario Bros.       DS  2006.0      Platform  Nintendo"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vg_df = pd.read_csv('datasets/vgsales.csv', encoding='utf-8')\n",
    "vg_df[['Name', 'Platform', 'Year', 'Genre', 'Publisher']].iloc[1:7]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Action', 'Adventure', 'Fighting', 'Misc', 'Platform', 'Puzzle',\n",
       "       'Racing', 'Role-Playing', 'Shooter', 'Simulation', 'Sports',\n",
       "       'Strategy'], dtype=object)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "genres = np.unique(vg_df['Genre'])\n",
    "genres"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: 'Action',\n",
       " 1: 'Adventure',\n",
       " 2: 'Fighting',\n",
       " 3: 'Misc',\n",
       " 4: 'Platform',\n",
       " 5: 'Puzzle',\n",
       " 6: 'Racing',\n",
       " 7: 'Role-Playing',\n",
       " 8: 'Shooter',\n",
       " 9: 'Simulation',\n",
       " 10: 'Sports',\n",
       " 11: 'Strategy'}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "gle = LabelEncoder()\n",
    "genre_labels = gle.fit_transform(vg_df['Genre'])\n",
    "genre_mappings = {index: label for index, label in enumerate(gle.classes_)}\n",
    "genre_mappings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Platform</th>\n",
       "      <th>Year</th>\n",
       "      <th>Genre</th>\n",
       "      <th>GenreLabel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Super Mario Bros.</td>\n",
       "      <td>NES</td>\n",
       "      <td>1985.0</td>\n",
       "      <td>Platform</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Mario Kart Wii</td>\n",
       "      <td>Wii</td>\n",
       "      <td>2008.0</td>\n",
       "      <td>Racing</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wii Sports Resort</td>\n",
       "      <td>Wii</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>Sports</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Pokemon Red/Pokemon Blue</td>\n",
       "      <td>GB</td>\n",
       "      <td>1996.0</td>\n",
       "      <td>Role-Playing</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Tetris</td>\n",
       "      <td>GB</td>\n",
       "      <td>1989.0</td>\n",
       "      <td>Puzzle</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>New Super Mario Bros.</td>\n",
       "      <td>DS</td>\n",
       "      <td>2006.0</td>\n",
       "      <td>Platform</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       Name Platform    Year         Genre  GenreLabel\n",
       "1         Super Mario Bros.      NES  1985.0      Platform           4\n",
       "2            Mario Kart Wii      Wii  2008.0        Racing           6\n",
       "3         Wii Sports Resort      Wii  2009.0        Sports          10\n",
       "4  Pokemon Red/Pokemon Blue       GB  1996.0  Role-Playing           7\n",
       "5                    Tetris       GB  1989.0        Puzzle           5\n",
       "6     New Super Mario Bros.       DS  2006.0      Platform           4"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vg_df['GenreLabel'] = genre_labels\n",
    "vg_df[['Name', 'Platform', 'Year', 'Genre', 'GenreLabel']].iloc[1:7]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Transforming Ordinal Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Gen 1', 'Gen 2', 'Gen 3', 'Gen 4', 'Gen 5', 'Gen 6'], dtype=object)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "poke_df = pd.read_csv('datasets/Pokemon.csv', encoding='utf-8')\n",
    "poke_df = poke_df.sample(random_state=1, frac=1).reset_index(drop=True)\n",
    "\n",
    "np.unique(poke_df['Generation'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>GenerationLabel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Octillery</td>\n",
       "      <td>Gen 2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Helioptile</td>\n",
       "      <td>Gen 6</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Dialga</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>DeoxysDefense Forme</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Rapidash</td>\n",
       "      <td>Gen 1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Swanna</td>\n",
       "      <td>Gen 5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Name Generation  GenerationLabel\n",
       "4            Octillery      Gen 2                2\n",
       "5           Helioptile      Gen 6                6\n",
       "6               Dialga      Gen 4                4\n",
       "7  DeoxysDefense Forme      Gen 3                3\n",
       "8             Rapidash      Gen 1                1\n",
       "9               Swanna      Gen 5                5"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gen_ord_map = {'Gen 1': 1, 'Gen 2': 2, 'Gen 3': 3, \n",
    "               'Gen 4': 4, 'Gen 5': 5, 'Gen 6': 6}\n",
    "\n",
    "poke_df['GenerationLabel'] = poke_df['Generation'].map(gen_ord_map)\n",
    "poke_df[['Name', 'Generation', 'GenerationLabel']].iloc[4:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Encoding Categorical Features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## One-hot Encoding Scheme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Legendary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Octillery</td>\n",
       "      <td>Gen 2</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Helioptile</td>\n",
       "      <td>Gen 6</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Dialga</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>DeoxysDefense Forme</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Rapidash</td>\n",
       "      <td>Gen 1</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Swanna</td>\n",
       "      <td>Gen 5</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Name Generation  Legendary\n",
       "4            Octillery      Gen 2      False\n",
       "5           Helioptile      Gen 6      False\n",
       "6               Dialga      Gen 4       True\n",
       "7  DeoxysDefense Forme      Gen 3       True\n",
       "8             Rapidash      Gen 1      False\n",
       "9               Swanna      Gen 5      False"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "poke_df[['Name', 'Generation', 'Legendary']].iloc[4:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Gen_Label</th>\n",
       "      <th>Legendary</th>\n",
       "      <th>Lgnd_Label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Octillery</td>\n",
       "      <td>Gen 2</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Helioptile</td>\n",
       "      <td>Gen 6</td>\n",
       "      <td>5</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Dialga</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>DeoxysDefense Forme</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>2</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Rapidash</td>\n",
       "      <td>Gen 1</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Swanna</td>\n",
       "      <td>Gen 5</td>\n",
       "      <td>4</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Name Generation  Gen_Label  Legendary  Lgnd_Label\n",
       "4            Octillery      Gen 2          1      False           0\n",
       "5           Helioptile      Gen 6          5      False           0\n",
       "6               Dialga      Gen 4          3       True           1\n",
       "7  DeoxysDefense Forme      Gen 3          2       True           1\n",
       "8             Rapidash      Gen 1          0      False           0\n",
       "9               Swanna      Gen 5          4      False           0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import OneHotEncoder, LabelEncoder\n",
    "\n",
    "# transform and map pokemon generations\n",
    "gen_le = LabelEncoder()\n",
    "gen_labels = gen_le.fit_transform(poke_df['Generation'])\n",
    "poke_df['Gen_Label'] = gen_labels\n",
    "\n",
    "# transform and map pokemon legendary status\n",
    "leg_le = LabelEncoder()\n",
    "leg_labels = leg_le.fit_transform(poke_df['Legendary'])\n",
    "poke_df['Lgnd_Label'] = leg_labels\n",
    "\n",
    "poke_df_sub = poke_df[['Name', 'Generation', 'Gen_Label', 'Legendary', 'Lgnd_Label']]\n",
    "poke_df_sub.iloc[4:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# encode generation labels using one-hot encoding scheme\n",
    "gen_ohe = OneHotEncoder()\n",
    "gen_feature_arr = gen_ohe.fit_transform(poke_df[['Gen_Label']]).toarray()\n",
    "gen_feature_labels = list(gen_le.classes_)\n",
    "gen_features = pd.DataFrame(gen_feature_arr, columns=gen_feature_labels)\n",
    "\n",
    "# encode legendary status labels using one-hot encoding scheme\n",
    "leg_ohe = OneHotEncoder()\n",
    "leg_feature_arr = leg_ohe.fit_transform(poke_df[['Lgnd_Label']]).toarray()\n",
    "leg_feature_labels = ['Legendary_'+str(cls_label) for cls_label in leg_le.classes_]\n",
    "leg_features = pd.DataFrame(leg_feature_arr, columns=leg_feature_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Gen_Label</th>\n",
       "      <th>Gen 1</th>\n",
       "      <th>Gen 2</th>\n",
       "      <th>Gen 3</th>\n",
       "      <th>Gen 4</th>\n",
       "      <th>Gen 5</th>\n",
       "      <th>Gen 6</th>\n",
       "      <th>Legendary</th>\n",
       "      <th>Lgnd_Label</th>\n",
       "      <th>Legendary_False</th>\n",
       "      <th>Legendary_True</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Octillery</td>\n",
       "      <td>Gen 2</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Helioptile</td>\n",
       "      <td>Gen 6</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Dialga</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>DeoxysDefense Forme</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Rapidash</td>\n",
       "      <td>Gen 1</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Swanna</td>\n",
       "      <td>Gen 5</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Name Generation  Gen_Label  Gen 1  Gen 2  Gen 3  Gen 4  \\\n",
       "4            Octillery      Gen 2          1    0.0    1.0    0.0    0.0   \n",
       "5           Helioptile      Gen 6          5    0.0    0.0    0.0    0.0   \n",
       "6               Dialga      Gen 4          3    0.0    0.0    0.0    1.0   \n",
       "7  DeoxysDefense Forme      Gen 3          2    0.0    0.0    1.0    0.0   \n",
       "8             Rapidash      Gen 1          0    1.0    0.0    0.0    0.0   \n",
       "9               Swanna      Gen 5          4    0.0    0.0    0.0    0.0   \n",
       "\n",
       "   Gen 5  Gen 6  Legendary  Lgnd_Label  Legendary_False  Legendary_True  \n",
       "4    0.0    0.0      False           0              1.0             0.0  \n",
       "5    0.0    1.0      False           0              1.0             0.0  \n",
       "6    0.0    0.0       True           1              0.0             1.0  \n",
       "7    0.0    0.0       True           1              0.0             1.0  \n",
       "8    0.0    0.0      False           0              1.0             0.0  \n",
       "9    1.0    0.0      False           0              1.0             0.0  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "poke_df_ohe = pd.concat([poke_df_sub, gen_features, leg_features], axis=1)\n",
    "columns = sum([['Name', 'Generation', 'Gen_Label'],gen_feature_labels,\n",
    "              ['Legendary', 'Lgnd_Label'],leg_feature_labels], [])\n",
    "poke_df_ohe[columns].iloc[4:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Legendary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>PikaZoom</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>CharMyToast</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Name Generation  Legendary\n",
       "0     PikaZoom      Gen 3       True\n",
       "1  CharMyToast      Gen 4      False"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_poke_df = pd.DataFrame([['PikaZoom', 'Gen 3', True], \n",
    "                           ['CharMyToast', 'Gen 4', False]],\n",
    "                           columns=['Name', 'Generation', 'Legendary'])\n",
    "new_poke_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Gen_Label</th>\n",
       "      <th>Legendary</th>\n",
       "      <th>Lgnd_Label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>PikaZoom</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>2</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>CharMyToast</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>3</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Name Generation  Gen_Label  Legendary  Lgnd_Label\n",
       "0     PikaZoom      Gen 3          2       True           1\n",
       "1  CharMyToast      Gen 4          3      False           0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_gen_labels = gen_le.transform(new_poke_df['Generation'])\n",
    "new_poke_df['Gen_Label'] = new_gen_labels\n",
    "\n",
    "new_leg_labels = leg_le.transform(new_poke_df['Legendary'])\n",
    "new_poke_df['Lgnd_Label'] = new_leg_labels\n",
    "\n",
    "new_poke_df[['Name', 'Generation', 'Gen_Label', 'Legendary', 'Lgnd_Label']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Gen_Label</th>\n",
       "      <th>Gen 1</th>\n",
       "      <th>Gen 2</th>\n",
       "      <th>Gen 3</th>\n",
       "      <th>Gen 4</th>\n",
       "      <th>Gen 5</th>\n",
       "      <th>Gen 6</th>\n",
       "      <th>Legendary</th>\n",
       "      <th>Lgnd_Label</th>\n",
       "      <th>Legendary_False</th>\n",
       "      <th>Legendary_True</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>PikaZoom</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>CharMyToast</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Name Generation  Gen_Label  Gen 1  Gen 2  Gen 3  Gen 4  Gen 5  \\\n",
       "0     PikaZoom      Gen 3          2    0.0    0.0    1.0    0.0    0.0   \n",
       "1  CharMyToast      Gen 4          3    0.0    0.0    0.0    1.0    0.0   \n",
       "\n",
       "   Gen 6  Legendary  Lgnd_Label  Legendary_False  Legendary_True  \n",
       "0    0.0       True           1              0.0             1.0  \n",
       "1    0.0      False           0              1.0             0.0  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_gen_feature_arr = gen_ohe.transform(new_poke_df[['Gen_Label']]).toarray()\n",
    "new_gen_features = pd.DataFrame(new_gen_feature_arr, columns=gen_feature_labels)\n",
    "\n",
    "new_leg_feature_arr = leg_ohe.transform(new_poke_df[['Lgnd_Label']]).toarray()\n",
    "new_leg_features = pd.DataFrame(new_leg_feature_arr, columns=leg_feature_labels)\n",
    "\n",
    "new_poke_ohe = pd.concat([new_poke_df, new_gen_features, new_leg_features], axis=1)\n",
    "columns = sum([['Name', 'Generation', 'Gen_Label'], gen_feature_labels,\n",
    "               ['Legendary', 'Lgnd_Label'], leg_feature_labels], [])\n",
    "new_poke_ohe[columns]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Gen 1</th>\n",
       "      <th>Gen 2</th>\n",
       "      <th>Gen 3</th>\n",
       "      <th>Gen 4</th>\n",
       "      <th>Gen 5</th>\n",
       "      <th>Gen 6</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Octillery</td>\n",
       "      <td>Gen 2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Helioptile</td>\n",
       "      <td>Gen 6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Dialga</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>DeoxysDefense Forme</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Rapidash</td>\n",
       "      <td>Gen 1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Swanna</td>\n",
       "      <td>Gen 5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Name Generation  Gen 1  Gen 2  Gen 3  Gen 4  Gen 5  Gen 6\n",
       "4            Octillery      Gen 2      0      1      0      0      0      0\n",
       "5           Helioptile      Gen 6      0      0      0      0      0      1\n",
       "6               Dialga      Gen 4      0      0      0      1      0      0\n",
       "7  DeoxysDefense Forme      Gen 3      0      0      1      0      0      0\n",
       "8             Rapidash      Gen 1      1      0      0      0      0      0\n",
       "9               Swanna      Gen 5      0      0      0      0      1      0"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gen_onehot_features = pd.get_dummies(poke_df['Generation'])\n",
    "pd.concat([poke_df[['Name', 'Generation']], gen_onehot_features], axis=1).iloc[4:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dummy Coding Scheme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Gen 2</th>\n",
       "      <th>Gen 3</th>\n",
       "      <th>Gen 4</th>\n",
       "      <th>Gen 5</th>\n",
       "      <th>Gen 6</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Octillery</td>\n",
       "      <td>Gen 2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Helioptile</td>\n",
       "      <td>Gen 6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Dialga</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>DeoxysDefense Forme</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Rapidash</td>\n",
       "      <td>Gen 1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Swanna</td>\n",
       "      <td>Gen 5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Name Generation  Gen 2  Gen 3  Gen 4  Gen 5  Gen 6\n",
       "4            Octillery      Gen 2      1      0      0      0      0\n",
       "5           Helioptile      Gen 6      0      0      0      0      1\n",
       "6               Dialga      Gen 4      0      0      1      0      0\n",
       "7  DeoxysDefense Forme      Gen 3      0      1      0      0      0\n",
       "8             Rapidash      Gen 1      0      0      0      0      0\n",
       "9               Swanna      Gen 5      0      0      0      1      0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gen_dummy_features = pd.get_dummies(poke_df['Generation'], drop_first=True)\n",
    "pd.concat([poke_df[['Name', 'Generation']], gen_dummy_features], axis=1).iloc[4:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Gen 1</th>\n",
       "      <th>Gen 2</th>\n",
       "      <th>Gen 3</th>\n",
       "      <th>Gen 4</th>\n",
       "      <th>Gen 5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Octillery</td>\n",
       "      <td>Gen 2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Helioptile</td>\n",
       "      <td>Gen 6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Dialga</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>DeoxysDefense Forme</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Rapidash</td>\n",
       "      <td>Gen 1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Swanna</td>\n",
       "      <td>Gen 5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Name Generation  Gen 1  Gen 2  Gen 3  Gen 4  Gen 5\n",
       "4            Octillery      Gen 2      0      1      0      0      0\n",
       "5           Helioptile      Gen 6      0      0      0      0      0\n",
       "6               Dialga      Gen 4      0      0      0      1      0\n",
       "7  DeoxysDefense Forme      Gen 3      0      0      1      0      0\n",
       "8             Rapidash      Gen 1      1      0      0      0      0\n",
       "9               Swanna      Gen 5      0      0      0      0      1"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gen_onehot_features = pd.get_dummies(poke_df['Generation'])\n",
    "gen_dummy_features = gen_onehot_features.iloc[:,:-1]\n",
    "pd.concat([poke_df[['Name', 'Generation']], gen_dummy_features], axis=1).iloc[4:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Effect Coding Scheme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Program Files\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:517: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[item] = s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Generation</th>\n",
       "      <th>Gen 1</th>\n",
       "      <th>Gen 2</th>\n",
       "      <th>Gen 3</th>\n",
       "      <th>Gen 4</th>\n",
       "      <th>Gen 5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Octillery</td>\n",
       "      <td>Gen 2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Helioptile</td>\n",
       "      <td>Gen 6</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Dialga</td>\n",
       "      <td>Gen 4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>DeoxysDefense Forme</td>\n",
       "      <td>Gen 3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Rapidash</td>\n",
       "      <td>Gen 1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Swanna</td>\n",
       "      <td>Gen 5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Name Generation  Gen 1  Gen 2  Gen 3  Gen 4  Gen 5\n",
       "4            Octillery      Gen 2    0.0    1.0    0.0    0.0    0.0\n",
       "5           Helioptile      Gen 6   -1.0   -1.0   -1.0   -1.0   -1.0\n",
       "6               Dialga      Gen 4    0.0    0.0    0.0    1.0    0.0\n",
       "7  DeoxysDefense Forme      Gen 3    0.0    0.0    1.0    0.0    0.0\n",
       "8             Rapidash      Gen 1    1.0    0.0    0.0    0.0    0.0\n",
       "9               Swanna      Gen 5    0.0    0.0    0.0    0.0    1.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gen_onehot_features = pd.get_dummies(poke_df['Generation'])\n",
    "gen_effect_features = gen_onehot_features.iloc[:,:-1]\n",
    "gen_effect_features.loc[np.all(gen_effect_features == 0, axis=1)] = -1.\n",
    "pd.concat([poke_df[['Name', 'Generation']], gen_effect_features], axis=1).iloc[4:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Feature Hashing scheme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total game genres: 12\n",
      "['Action' 'Adventure' 'Fighting' 'Misc' 'Platform' 'Puzzle' 'Racing'\n",
      " 'Role-Playing' 'Shooter' 'Simulation' 'Sports' 'Strategy']\n"
     ]
    }
   ],
   "source": [
    "unique_genres = np.unique(vg_df[['Genre']])\n",
    "print(\"Total game genres:\", len(unique_genres))\n",
    "print(unique_genres)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Genre</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Super Mario Bros.</td>\n",
       "      <td>Platform</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Mario Kart Wii</td>\n",
       "      <td>Racing</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Wii Sports Resort</td>\n",
       "      <td>Sports</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Pokemon Red/Pokemon Blue</td>\n",
       "      <td>Role-Playing</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Tetris</td>\n",
       "      <td>Puzzle</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>New Super Mario Bros.</td>\n",
       "      <td>Platform</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       Name         Genre    0    1    2    3    4    5\n",
       "1         Super Mario Bros.      Platform  0.0  2.0  2.0 -1.0  1.0  0.0\n",
       "2            Mario Kart Wii        Racing -1.0  0.0  0.0  0.0  0.0 -1.0\n",
       "3         Wii Sports Resort        Sports -2.0  2.0  0.0 -2.0  0.0  0.0\n",
       "4  Pokemon Red/Pokemon Blue  Role-Playing -1.0  1.0  2.0  0.0  1.0 -1.0\n",
       "5                    Tetris        Puzzle  0.0  1.0  1.0 -2.0  1.0 -1.0\n",
       "6     New Super Mario Bros.      Platform  0.0  2.0  2.0 -1.0  1.0  0.0"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.feature_extraction import FeatureHasher\n",
    "\n",
    "fh = FeatureHasher(n_features=6, input_type='string')\n",
    "hashed_features = fh.fit_transform(vg_df['Genre'])\n",
    "hashed_features = hashed_features.toarray()\n",
    "pd.concat([vg_df[['Name', 'Genre']], pd.DataFrame(hashed_features)], axis=1).iloc[1:7]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'dtype': numpy.float64,\n",
       " 'input_type': 'string',\n",
       " 'n_features': 6,\n",
       " 'non_negative': False}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fh.get_params()"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}