{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 315,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 316,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"Train.csv\")\n",
    "test = pd.read_csv(\"Test.csv\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 317,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = [df,test]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 318,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>realtionship_status</th>\n",
       "      <th>industry</th>\n",
       "      <th>genre</th>\n",
       "      <th>targeted_sex</th>\n",
       "      <th>average_runtime(minutes_per_week)</th>\n",
       "      <th>airtime</th>\n",
       "      <th>airlocation</th>\n",
       "      <th>ratings</th>\n",
       "      <th>expensive</th>\n",
       "      <th>money_back_guarantee</th>\n",
       "      <th>netgain</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>19717</td>\n",
       "      <td>Married-spouse-absent</td>\n",
       "      <td>Auto</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>Male</td>\n",
       "      <td>45</td>\n",
       "      <td>Primetime</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>High</td>\n",
       "      <td>No</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>31593</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Pharma</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>Male</td>\n",
       "      <td>45</td>\n",
       "      <td>Primetime</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>Low</td>\n",
       "      <td>No</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5681</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Entertainment</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>Female</td>\n",
       "      <td>45</td>\n",
       "      <td>Primetime</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>High</td>\n",
       "      <td>Yes</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>15491</td>\n",
       "      <td>Separated</td>\n",
       "      <td>Political</td>\n",
       "      <td>Infomercial</td>\n",
       "      <td>Female</td>\n",
       "      <td>40</td>\n",
       "      <td>Primetime</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>Low</td>\n",
       "      <td>No</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>23587</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Pharma</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>Male</td>\n",
       "      <td>48</td>\n",
       "      <td>Primetime</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>High</td>\n",
       "      <td>No</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      id    realtionship_status       industry        genre targeted_sex  \\\n",
       "0  19717  Married-spouse-absent           Auto       Comedy         Male   \n",
       "1  31593     Married-civ-spouse         Pharma       Comedy         Male   \n",
       "2   5681               Divorced  Entertainment       Comedy       Female   \n",
       "3  15491              Separated      Political  Infomercial       Female   \n",
       "4  23587     Married-civ-spouse         Pharma       Comedy         Male   \n",
       "\n",
       "   average_runtime(minutes_per_week)    airtime    airlocation   ratings  \\\n",
       "0                                 45  Primetime  United-States  0.027465   \n",
       "1                                 45  Primetime  United-States  0.027465   \n",
       "2                                 45  Primetime  United-States  0.027465   \n",
       "3                                 40  Primetime  United-States  0.027465   \n",
       "4                                 48  Primetime  United-States  0.027465   \n",
       "\n",
       "  expensive money_back_guarantee  netgain  \n",
       "0      High                   No    False  \n",
       "1       Low                   No    False  \n",
       "2      High                  Yes    False  \n",
       "3       Low                   No    False  \n",
       "4      High                   No     True  "
      ]
     },
     "execution_count": 318,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 319,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>realtionship_status</th>\n",
       "      <th>industry</th>\n",
       "      <th>genre</th>\n",
       "      <th>targeted_sex</th>\n",
       "      <th>average_runtime(minutes_per_week)</th>\n",
       "      <th>airtime</th>\n",
       "      <th>airlocation</th>\n",
       "      <th>ratings</th>\n",
       "      <th>expensive</th>\n",
       "      <th>money_back_guarantee</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>Widowed</td>\n",
       "      <td>Auto</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>Female</td>\n",
       "      <td>10</td>\n",
       "      <td>Daytime</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>Low</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Pharma</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>Male</td>\n",
       "      <td>40</td>\n",
       "      <td>Morning</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.056262</td>\n",
       "      <td>High</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Entertainment</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>Female</td>\n",
       "      <td>50</td>\n",
       "      <td>Morning</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>Low</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Pharma</td>\n",
       "      <td>Infomercial</td>\n",
       "      <td>Male</td>\n",
       "      <td>40</td>\n",
       "      <td>Primetime</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>Low</td>\n",
       "      <td>No</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Pharma</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>Male</td>\n",
       "      <td>40</td>\n",
       "      <td>Primetime</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>Low</td>\n",
       "      <td>Yes</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id realtionship_status       industry        genre targeted_sex  \\\n",
       "0   1             Widowed           Auto       Comedy       Female   \n",
       "1   4  Married-civ-spouse         Pharma       Comedy         Male   \n",
       "2   5            Divorced  Entertainment       Comedy       Female   \n",
       "3   9  Married-civ-spouse         Pharma  Infomercial         Male   \n",
       "4  10  Married-civ-spouse         Pharma       Comedy         Male   \n",
       "\n",
       "   average_runtime(minutes_per_week)    airtime    airlocation   ratings  \\\n",
       "0                                 10    Daytime  United-States  0.027465   \n",
       "1                                 40    Morning  United-States  0.056262   \n",
       "2                                 50    Morning  United-States  0.027465   \n",
       "3                                 40  Primetime  United-States  0.027465   \n",
       "4                                 40  Primetime  United-States  0.027465   \n",
       "\n",
       "  expensive money_back_guarantee  \n",
       "0       Low                   No  \n",
       "1      High                  Yes  \n",
       "2       Low                   No  \n",
       "3       Low                   No  \n",
       "4       Low                  Yes  "
      ]
     },
     "execution_count": 319,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 320,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 26048 entries, 0 to 26047\n",
      "Data columns (total 12 columns):\n",
      "id                                   26048 non-null int64\n",
      "realtionship_status                  26048 non-null object\n",
      "industry                             26048 non-null object\n",
      "genre                                26048 non-null object\n",
      "targeted_sex                         26048 non-null object\n",
      "average_runtime(minutes_per_week)    26048 non-null int64\n",
      "airtime                              26048 non-null object\n",
      "airlocation                          26048 non-null object\n",
      "ratings                              26048 non-null float64\n",
      "expensive                            26048 non-null object\n",
      "money_back_guarantee                 26048 non-null object\n",
      "netgain                              26048 non-null bool\n",
      "dtypes: bool(1), float64(1), int64(2), object(8)\n",
      "memory usage: 2.2+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 321,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pharma           10339\n",
       "Auto              6801\n",
       "Political         4014\n",
       "Entertainment     2765\n",
       "Other             1333\n",
       "ClassAction        796\n",
       "Name: industry, dtype: int64"
      ]
     },
     "execution_count": 321,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.industry.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 322,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Comedy         22258\n",
       "Infomercial     2516\n",
       "Drama            803\n",
       "Direct           247\n",
       "Other            224\n",
       "Name: genre, dtype: int64"
      ]
     },
     "execution_count": 322,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.genre.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 323,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Low       15693\n",
       "High       7279\n",
       "Medium     3076\n",
       "Name: expensive, dtype: int64"
      ]
     },
     "execution_count": 323,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.expensive.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 324,
   "metadata": {},
   "outputs": [],
   "source": [
    "for x in dataset:\n",
    "    x['industry'] = x['industry'].map({\"Pharma\":5,\"Auto\":4,\"Political\":3,\"Entertainment\":2,\"Other\":1,\"ClassAction\":0})\n",
    "    x['genre'] = x['genre'].map({\"Comedy\":4,\"Infomercial\":3,\"Drama\":2,\"Direct\":1,\"Other\":0})\n",
    "    x['targeted_sex']=x['targeted_sex'].map({\"Male\":1,\"Female\":0})\n",
    "    x['airtime']=x.airtime.map({\"Primetime\":2,\"Morning\":1,\"Daytime\":0})\n",
    "    x['expensive']=x.expensive.map({\"High\":2,\"Medium\":1,\"Low\":0})\n",
    "    x['money_back_guarantee']=x.money_back_guarantee.map({\"Yes\":1,\"No\":0})\n",
    "    x['realtionship_status']=x.realtionship_status.map({\"Married-civ-spouse\":6,\"Never-married\":5,\"Divorced\":4,\"Widowed\":3,\"Separated\":2,\"Married-spouse-absent\":1,\"Married-AF-spouse\":0})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 325,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['ratings']=(df['ratings']*100000).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 368,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>realtionship_status</th>\n",
       "      <th>industry</th>\n",
       "      <th>genre</th>\n",
       "      <th>targeted_sex</th>\n",
       "      <th>average_runtime(minutes_per_week)</th>\n",
       "      <th>airtime</th>\n",
       "      <th>airlocation</th>\n",
       "      <th>ratings</th>\n",
       "      <th>expensive</th>\n",
       "      <th>money_back_guarantee</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>38</td>\n",
       "      <td>2746</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>1</td>\n",
       "      <td>38</td>\n",
       "      <td>5626</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "      <td>38</td>\n",
       "      <td>2746</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>38</td>\n",
       "      <td>2746</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>38</td>\n",
       "      <td>2746</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  realtionship_status  industry  genre  targeted_sex  \\\n",
       "0   1                    3         4      4             0   \n",
       "1   4                    6         5      4             1   \n",
       "2   5                    4         2      4             0   \n",
       "3   9                    6         5      3             1   \n",
       "4  10                    6         5      4             1   \n",
       "\n",
       "   average_runtime(minutes_per_week)  airtime  airlocation  ratings  \\\n",
       "0                                 10        0           38     2746   \n",
       "1                                 40        1           38     5626   \n",
       "2                                 50        1           38     2746   \n",
       "3                                 40        2           38     2746   \n",
       "4                                 40        2           38     2746   \n",
       "\n",
       "   expensive  money_back_guarantee  \n",
       "0          0                     0  \n",
       "1          2                     1  \n",
       "2          0                     0  \n",
       "3          0                     0  \n",
       "4          0                     1  "
      ]
     },
     "execution_count": 368,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 326,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>realtionship_status</th>\n",
       "      <th>industry</th>\n",
       "      <th>genre</th>\n",
       "      <th>targeted_sex</th>\n",
       "      <th>average_runtime(minutes_per_week)</th>\n",
       "      <th>airtime</th>\n",
       "      <th>airlocation</th>\n",
       "      <th>ratings</th>\n",
       "      <th>expensive</th>\n",
       "      <th>money_back_guarantee</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>1</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.056262</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  realtionship_status  industry  genre  targeted_sex  \\\n",
       "0   1                    3         4      4             0   \n",
       "1   4                    6         5      4             1   \n",
       "2   5                    4         2      4             0   \n",
       "3   9                    6         5      3             1   \n",
       "4  10                    6         5      4             1   \n",
       "\n",
       "   average_runtime(minutes_per_week)  airtime    airlocation   ratings  \\\n",
       "0                                 10        0  United-States  0.027465   \n",
       "1                                 40        1  United-States  0.056262   \n",
       "2                                 50        1  United-States  0.027465   \n",
       "3                                 40        2  United-States  0.027465   \n",
       "4                                 40        2  United-States  0.027465   \n",
       "\n",
       "   expensive  money_back_guarantee  \n",
       "0          0                     0  \n",
       "1          2                     1  \n",
       "2          0                     0  \n",
       "3          0                     0  \n",
       "4          0                     1  "
      ]
     },
     "execution_count": 326,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 327,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "le = LabelEncoder()\n",
    "for x in dataset:\n",
    "    x['airlocation'] = le.fit_transform(x['airlocation'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 328,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>realtionship_status</th>\n",
       "      <th>industry</th>\n",
       "      <th>genre</th>\n",
       "      <th>targeted_sex</th>\n",
       "      <th>average_runtime(minutes_per_week)</th>\n",
       "      <th>airtime</th>\n",
       "      <th>airlocation</th>\n",
       "      <th>ratings</th>\n",
       "      <th>expensive</th>\n",
       "      <th>money_back_guarantee</th>\n",
       "      <th>netgain</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>19717</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>31593</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5681</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>15491</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>23587</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>48</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      id  realtionship_status  industry  genre  targeted_sex  \\\n",
       "0  19717                    1         4      4             1   \n",
       "1  31593                    6         5      4             1   \n",
       "2   5681                    4         2      4             0   \n",
       "3  15491                    2         3      3             0   \n",
       "4  23587                    6         5      4             1   \n",
       "\n",
       "   average_runtime(minutes_per_week)  airtime  airlocation  ratings  \\\n",
       "0                                 45        2           39     2746   \n",
       "1                                 45        2           39     2746   \n",
       "2                                 45        2           39     2746   \n",
       "3                                 40        2           39     2746   \n",
       "4                                 48        2           39     2746   \n",
       "\n",
       "   expensive  money_back_guarantee  netgain  \n",
       "0          2                     0    False  \n",
       "1          0                     0    False  \n",
       "2          2                     1    False  \n",
       "3          0                     0    False  \n",
       "4          2                     0     True  "
      ]
     },
     "execution_count": 328,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 329,
   "metadata": {},
   "outputs": [],
   "source": [
    "# def gain(x):\n",
    "#     if x==\"True\":\n",
    "#         return 1\n",
    "#     if x==\"False\":\n",
    "#         return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 330,
   "metadata": {},
   "outputs": [],
   "source": [
    "# df['netgain_e'] = df['netgain'].apply(gain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 331,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['netgain'] = le.fit_transform(df.netgain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 332,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6    11844\n",
       "5     8547\n",
       "4     3649\n",
       "3      818\n",
       "2      793\n",
       "1      378\n",
       "0       19\n",
       "Name: realtionship_status, dtype: int64"
      ]
     },
     "execution_count": 332,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.realtionship_status.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 333,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>realtionship_status</th>\n",
       "      <th>industry</th>\n",
       "      <th>genre</th>\n",
       "      <th>targeted_sex</th>\n",
       "      <th>average_runtime(minutes_per_week)</th>\n",
       "      <th>airtime</th>\n",
       "      <th>airlocation</th>\n",
       "      <th>ratings</th>\n",
       "      <th>expensive</th>\n",
       "      <th>money_back_guarantee</th>\n",
       "      <th>netgain</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>19717</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>31593</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5681</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>45</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>15491</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>23587</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>48</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2746</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      id  realtionship_status  industry  genre  targeted_sex  \\\n",
       "0  19717                    1         4      4             1   \n",
       "1  31593                    6         5      4             1   \n",
       "2   5681                    4         2      4             0   \n",
       "3  15491                    2         3      3             0   \n",
       "4  23587                    6         5      4             1   \n",
       "\n",
       "   average_runtime(minutes_per_week)  airtime  airlocation  ratings  \\\n",
       "0                                 45        2           39     2746   \n",
       "1                                 45        2           39     2746   \n",
       "2                                 45        2           39     2746   \n",
       "3                                 40        2           39     2746   \n",
       "4                                 48        2           39     2746   \n",
       "\n",
       "   expensive  money_back_guarantee  netgain  \n",
       "0          2                     0        0  \n",
       "1          0                     0        0  \n",
       "2          2                     1        0  \n",
       "3          0                     0        0  \n",
       "4          2                     0        1  "
      ]
     },
     "execution_count": 333,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 334,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = df['netgain'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 335,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = df.drop(['netgain'],axis=1).values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 336,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 337,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
       "                       max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
       "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "                       min_samples_leaf=1, min_samples_split=2,\n",
       "                       min_weight_fraction_leaf=0.0, n_estimators=100,\n",
       "                       n_jobs=None, oob_score=False, random_state=None,\n",
       "                       verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 337,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "clf=RandomForestClassifier(n_estimators=100)\n",
    "clf.fit(x_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 338,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 26048 entries, 0 to 26047\n",
      "Data columns (total 12 columns):\n",
      "id                                   26048 non-null int64\n",
      "realtionship_status                  26048 non-null int64\n",
      "industry                             26048 non-null int64\n",
      "genre                                26048 non-null int64\n",
      "targeted_sex                         26048 non-null int64\n",
      "average_runtime(minutes_per_week)    26048 non-null int64\n",
      "airtime                              26048 non-null int64\n",
      "airlocation                          26048 non-null int64\n",
      "ratings                              26048 non-null int64\n",
      "expensive                            26048 non-null int64\n",
      "money_back_guarantee                 26048 non-null int64\n",
      "netgain                              26048 non-null int64\n",
      "dtypes: int64(12)\n",
      "memory usage: 2.4 MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 339,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 339,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.expensive.isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 340,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = clf.predict(x_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 341,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 0, ..., 0, 1, 0])"
      ]
     },
     "execution_count": 341,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 342,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, ..., 0, 1, 0])"
      ]
     },
     "execution_count": 342,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 343,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 344,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.7869481765834933\n"
     ]
    }
   ],
   "source": [
    "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 345,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>realtionship_status</th>\n",
       "      <th>industry</th>\n",
       "      <th>genre</th>\n",
       "      <th>targeted_sex</th>\n",
       "      <th>average_runtime(minutes_per_week)</th>\n",
       "      <th>airtime</th>\n",
       "      <th>airlocation</th>\n",
       "      <th>ratings</th>\n",
       "      <th>expensive</th>\n",
       "      <th>money_back_guarantee</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>38</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>1</td>\n",
       "      <td>38</td>\n",
       "      <td>0.056262</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "      <td>38</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>38</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>38</td>\n",
       "      <td>0.027465</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  realtionship_status  industry  genre  targeted_sex  \\\n",
       "0   1                    3         4      4             0   \n",
       "1   4                    6         5      4             1   \n",
       "2   5                    4         2      4             0   \n",
       "3   9                    6         5      3             1   \n",
       "4  10                    6         5      4             1   \n",
       "\n",
       "   average_runtime(minutes_per_week)  airtime  airlocation   ratings  \\\n",
       "0                                 10        0           38  0.027465   \n",
       "1                                 40        1           38  0.056262   \n",
       "2                                 50        1           38  0.027465   \n",
       "3                                 40        2           38  0.027465   \n",
       "4                                 40        2           38  0.027465   \n",
       "\n",
       "   expensive  money_back_guarantee  \n",
       "0          0                     0  \n",
       "1          2                     1  \n",
       "2          0                     0  \n",
       "3          0                     0  \n",
       "4          0                     1  "
      ]
     },
     "execution_count": 345,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 346,
   "metadata": {},
   "outputs": [],
   "source": [
    "test['ratings']=(test['ratings']*100000).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 347,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 6513 entries, 0 to 6512\n",
      "Data columns (total 11 columns):\n",
      "id                                   6513 non-null int64\n",
      "realtionship_status                  6513 non-null int64\n",
      "industry                             6513 non-null int64\n",
      "genre                                6513 non-null int64\n",
      "targeted_sex                         6513 non-null int64\n",
      "average_runtime(minutes_per_week)    6513 non-null int64\n",
      "airtime                              6513 non-null int64\n",
      "airlocation                          6513 non-null int64\n",
      "ratings                              6513 non-null int64\n",
      "expensive                            6513 non-null int64\n",
      "money_back_guarantee                 6513 non-null int64\n",
      "dtypes: int64(11)\n",
      "memory usage: 559.8 KB\n"
     ]
    }
   ],
   "source": [
    "test.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 348,
   "metadata": {},
   "outputs": [],
   "source": [
    "    x = test.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 349,
   "metadata": {},
   "outputs": [],
   "source": [
    "Y_pred = clf.predict(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 350,
   "metadata": {},
   "outputs": [],
   "source": [
    "dft = test['id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 351,
   "metadata": {},
   "outputs": [],
   "source": [
    "dft = pd.DataFrame(dft)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 354,
   "metadata": {},
   "outputs": [],
   "source": [
    "dft['netgain'] = Y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 355,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>netgain</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>32</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>34</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  netgain\n",
       "0   1        0\n",
       "1   4        1\n",
       "2   5        0\n",
       "3   9        1\n",
       "4  10        1\n",
       "5  20        0\n",
       "6  28        0\n",
       "7  31        0\n",
       "8  32        0\n",
       "9  34        0"
      ]
     },
     "execution_count": 355,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dft.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 356,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dft['netgain'] = df['netgain'].apply(bool)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 357,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>netgain</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  netgain\n",
       "0   1        0\n",
       "1   4        1\n",
       "2   5        0\n",
       "3   9        1\n",
       "4  10        1"
      ]
     },
     "execution_count": 357,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dft.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 358,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    5096\n",
       "1    1417\n",
       "Name: netgain, dtype: int64"
      ]
     },
     "execution_count": 358,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dft.netgain.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 359,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for x in range(len(dft['netgain'])):\n",
    "#     if x==0:\n",
    "#         dft['netgain'][x] = 'false'\n",
    "#     elif x==1:\n",
    "#         dft['netgain'][x] = 'true'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 363,
   "metadata": {},
   "outputs": [],
   "source": [
    "dft.loc[dft['netgain']==0,'netgain']='false'\n",
    "dft.loc[dft['netgain']==1,'netgain']='true'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 364,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>netgain</th>\n",
       "      <th>netgain_e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>false</td>\n",
       "      <td>false</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>true</td>\n",
       "      <td>true</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>false</td>\n",
       "      <td>false</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>true</td>\n",
       "      <td>true</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>true</td>\n",
       "      <td>true</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id netgain netgain_e\n",
       "0   1   false     false\n",
       "1   4    true      true\n",
       "2   5   false     false\n",
       "3   9    true      true\n",
       "4  10    true      true"
      ]
     },
     "execution_count": 364,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dft.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 365,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "false    5096\n",
       "true     1417\n",
       "Name: netgain_e, dtype: int64"
      ]
     },
     "execution_count": 365,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dft.netgain_e.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 366,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>netgain</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>false</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>true</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>false</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>true</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>true</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6508</td>\n",
       "      <td>32538</td>\n",
       "      <td>false</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6509</td>\n",
       "      <td>32542</td>\n",
       "      <td>false</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6510</td>\n",
       "      <td>32549</td>\n",
       "      <td>false</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6511</td>\n",
       "      <td>32558</td>\n",
       "      <td>true</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6512</td>\n",
       "      <td>32560</td>\n",
       "      <td>false</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>6513 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         id netgain\n",
       "0         1   false\n",
       "1         4    true\n",
       "2         5   false\n",
       "3         9    true\n",
       "4        10    true\n",
       "...     ...     ...\n",
       "6508  32538   false\n",
       "6509  32542   false\n",
       "6510  32549   false\n",
       "6511  32558    true\n",
       "6512  32560   false\n",
       "\n",
       "[6513 rows x 2 columns]"
      ]
     },
     "execution_count": 366,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dft.drop(['netgain_e'],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 367,
   "metadata": {},
   "outputs": [],
   "source": [
    "dft.to_csv(\"First_submission.csv\",index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}