{ "cells": [ { "cell_type": "code", "execution_count": 315, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 316, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"Train.csv\")\n", "test = pd.read_csv(\"Test.csv\")\n" ] }, { "cell_type": "code", "execution_count": 317, "metadata": {}, "outputs": [], "source": [ "dataset = [df,test]" ] }, { "cell_type": "code", "execution_count": 318, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idrealtionship_statusindustrygenretargeted_sexaverage_runtime(minutes_per_week)airtimeairlocationratingsexpensivemoney_back_guaranteenetgain
019717Married-spouse-absentAutoComedyMale45PrimetimeUnited-States0.027465HighNoFalse
131593Married-civ-spousePharmaComedyMale45PrimetimeUnited-States0.027465LowNoFalse
25681DivorcedEntertainmentComedyFemale45PrimetimeUnited-States0.027465HighYesFalse
315491SeparatedPoliticalInfomercialFemale40PrimetimeUnited-States0.027465LowNoFalse
423587Married-civ-spousePharmaComedyMale48PrimetimeUnited-States0.027465HighNoTrue
\n", "
" ], "text/plain": [ " id realtionship_status industry genre targeted_sex \\\n", "0 19717 Married-spouse-absent Auto Comedy Male \n", "1 31593 Married-civ-spouse Pharma Comedy Male \n", "2 5681 Divorced Entertainment Comedy Female \n", "3 15491 Separated Political Infomercial Female \n", "4 23587 Married-civ-spouse Pharma Comedy Male \n", "\n", " average_runtime(minutes_per_week) airtime airlocation ratings \\\n", "0 45 Primetime United-States 0.027465 \n", "1 45 Primetime United-States 0.027465 \n", "2 45 Primetime United-States 0.027465 \n", "3 40 Primetime United-States 0.027465 \n", "4 48 Primetime United-States 0.027465 \n", "\n", " expensive money_back_guarantee netgain \n", "0 High No False \n", "1 Low No False \n", "2 High Yes False \n", "3 Low No False \n", "4 High No True " ] }, "execution_count": 318, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 319, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idrealtionship_statusindustrygenretargeted_sexaverage_runtime(minutes_per_week)airtimeairlocationratingsexpensivemoney_back_guarantee
01WidowedAutoComedyFemale10DaytimeUnited-States0.027465LowNo
14Married-civ-spousePharmaComedyMale40MorningUnited-States0.056262HighYes
25DivorcedEntertainmentComedyFemale50MorningUnited-States0.027465LowNo
39Married-civ-spousePharmaInfomercialMale40PrimetimeUnited-States0.027465LowNo
410Married-civ-spousePharmaComedyMale40PrimetimeUnited-States0.027465LowYes
\n", "
" ], "text/plain": [ " id realtionship_status industry genre targeted_sex \\\n", "0 1 Widowed Auto Comedy Female \n", "1 4 Married-civ-spouse Pharma Comedy Male \n", "2 5 Divorced Entertainment Comedy Female \n", "3 9 Married-civ-spouse Pharma Infomercial Male \n", "4 10 Married-civ-spouse Pharma Comedy Male \n", "\n", " average_runtime(minutes_per_week) airtime airlocation ratings \\\n", "0 10 Daytime United-States 0.027465 \n", "1 40 Morning United-States 0.056262 \n", "2 50 Morning United-States 0.027465 \n", "3 40 Primetime United-States 0.027465 \n", "4 40 Primetime United-States 0.027465 \n", "\n", " expensive money_back_guarantee \n", "0 Low No \n", "1 High Yes \n", "2 Low No \n", "3 Low No \n", "4 Low Yes " ] }, "execution_count": 319, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.head()" ] }, { "cell_type": "code", "execution_count": 320, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 26048 entries, 0 to 26047\n", "Data columns (total 12 columns):\n", "id 26048 non-null int64\n", "realtionship_status 26048 non-null object\n", "industry 26048 non-null object\n", "genre 26048 non-null object\n", "targeted_sex 26048 non-null object\n", "average_runtime(minutes_per_week) 26048 non-null int64\n", "airtime 26048 non-null object\n", "airlocation 26048 non-null object\n", "ratings 26048 non-null float64\n", "expensive 26048 non-null object\n", "money_back_guarantee 26048 non-null object\n", "netgain 26048 non-null bool\n", "dtypes: bool(1), float64(1), int64(2), object(8)\n", "memory usage: 2.2+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 321, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pharma 10339\n", "Auto 6801\n", "Political 4014\n", "Entertainment 2765\n", "Other 1333\n", "ClassAction 796\n", "Name: industry, dtype: int64" ] }, "execution_count": 321, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.industry.value_counts()" ] }, { "cell_type": "code", "execution_count": 322, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Comedy 22258\n", "Infomercial 2516\n", "Drama 803\n", "Direct 247\n", "Other 224\n", "Name: genre, dtype: int64" ] }, "execution_count": 322, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.genre.value_counts()" ] }, { "cell_type": "code", "execution_count": 323, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Low 15693\n", "High 7279\n", "Medium 3076\n", "Name: expensive, dtype: int64" ] }, "execution_count": 323, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.expensive.value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 324, "metadata": {}, "outputs": [], "source": [ "for x in dataset:\n", " x['industry'] = x['industry'].map({\"Pharma\":5,\"Auto\":4,\"Political\":3,\"Entertainment\":2,\"Other\":1,\"ClassAction\":0})\n", " x['genre'] = x['genre'].map({\"Comedy\":4,\"Infomercial\":3,\"Drama\":2,\"Direct\":1,\"Other\":0})\n", " x['targeted_sex']=x['targeted_sex'].map({\"Male\":1,\"Female\":0})\n", " x['airtime']=x.airtime.map({\"Primetime\":2,\"Morning\":1,\"Daytime\":0})\n", " x['expensive']=x.expensive.map({\"High\":2,\"Medium\":1,\"Low\":0})\n", " x['money_back_guarantee']=x.money_back_guarantee.map({\"Yes\":1,\"No\":0})\n", " x['realtionship_status']=x.realtionship_status.map({\"Married-civ-spouse\":6,\"Never-married\":5,\"Divorced\":4,\"Widowed\":3,\"Separated\":2,\"Married-spouse-absent\":1,\"Married-AF-spouse\":0})" ] }, { "cell_type": "code", "execution_count": 325, "metadata": {}, "outputs": [], "source": [ "df['ratings']=(df['ratings']*100000).astype(int)" ] }, { "cell_type": "code", "execution_count": 368, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idrealtionship_statusindustrygenretargeted_sexaverage_runtime(minutes_per_week)airtimeairlocationratingsexpensivemoney_back_guarantee
01344010038274600
14654140138562621
25424050138274600
39653140238274600
410654140238274601
\n", "
" ], "text/plain": [ " id realtionship_status industry genre targeted_sex \\\n", "0 1 3 4 4 0 \n", "1 4 6 5 4 1 \n", "2 5 4 2 4 0 \n", "3 9 6 5 3 1 \n", "4 10 6 5 4 1 \n", "\n", " average_runtime(minutes_per_week) airtime airlocation ratings \\\n", "0 10 0 38 2746 \n", "1 40 1 38 5626 \n", "2 50 1 38 2746 \n", "3 40 2 38 2746 \n", "4 40 2 38 2746 \n", "\n", " expensive money_back_guarantee \n", "0 0 0 \n", "1 2 1 \n", "2 0 0 \n", "3 0 0 \n", "4 0 1 " ] }, "execution_count": 368, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 326, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idrealtionship_statusindustrygenretargeted_sexaverage_runtime(minutes_per_week)airtimeairlocationratingsexpensivemoney_back_guarantee
013440100United-States0.02746500
146541401United-States0.05626221
254240501United-States0.02746500
396531402United-States0.02746500
4106541402United-States0.02746501
\n", "
" ], "text/plain": [ " id realtionship_status industry genre targeted_sex \\\n", "0 1 3 4 4 0 \n", "1 4 6 5 4 1 \n", "2 5 4 2 4 0 \n", "3 9 6 5 3 1 \n", "4 10 6 5 4 1 \n", "\n", " average_runtime(minutes_per_week) airtime airlocation ratings \\\n", "0 10 0 United-States 0.027465 \n", "1 40 1 United-States 0.056262 \n", "2 50 1 United-States 0.027465 \n", "3 40 2 United-States 0.027465 \n", "4 40 2 United-States 0.027465 \n", "\n", " expensive money_back_guarantee \n", "0 0 0 \n", "1 2 1 \n", "2 0 0 \n", "3 0 0 \n", "4 0 1 " ] }, "execution_count": 326, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.head()" ] }, { "cell_type": "code", "execution_count": 327, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import LabelEncoder\n", "le = LabelEncoder()\n", "for x in dataset:\n", " x['airlocation'] = le.fit_transform(x['airlocation'])\n" ] }, { "cell_type": "code", "execution_count": 328, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idrealtionship_statusindustrygenretargeted_sexaverage_runtime(minutes_per_week)airtimeairlocationratingsexpensivemoney_back_guaranteenetgain
019717144145239274620False
131593654145239274600False
25681424045239274621False
315491233040239274600False
423587654148239274620True
\n", "
" ], "text/plain": [ " id realtionship_status industry genre targeted_sex \\\n", "0 19717 1 4 4 1 \n", "1 31593 6 5 4 1 \n", "2 5681 4 2 4 0 \n", "3 15491 2 3 3 0 \n", "4 23587 6 5 4 1 \n", "\n", " average_runtime(minutes_per_week) airtime airlocation ratings \\\n", "0 45 2 39 2746 \n", "1 45 2 39 2746 \n", "2 45 2 39 2746 \n", "3 40 2 39 2746 \n", "4 48 2 39 2746 \n", "\n", " expensive money_back_guarantee netgain \n", "0 2 0 False \n", "1 0 0 False \n", "2 2 1 False \n", "3 0 0 False \n", "4 2 0 True " ] }, "execution_count": 328, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 329, "metadata": {}, "outputs": [], "source": [ "# def gain(x):\n", "# if x==\"True\":\n", "# return 1\n", "# if x==\"False\":\n", "# return 0" ] }, { "cell_type": "code", "execution_count": 330, "metadata": {}, "outputs": [], "source": [ "# df['netgain_e'] = df['netgain'].apply(gain)" ] }, { "cell_type": "code", "execution_count": 331, "metadata": {}, "outputs": [], "source": [ "df['netgain'] = le.fit_transform(df.netgain)" ] }, { "cell_type": "code", "execution_count": 332, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6 11844\n", "5 8547\n", "4 3649\n", "3 818\n", "2 793\n", "1 378\n", "0 19\n", "Name: realtionship_status, dtype: int64" ] }, "execution_count": 332, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.realtionship_status.value_counts()" ] }, { "cell_type": "code", "execution_count": 333, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idrealtionship_statusindustrygenretargeted_sexaverage_runtime(minutes_per_week)airtimeairlocationratingsexpensivemoney_back_guaranteenetgain
0197171441452392746200
1315936541452392746000
256814240452392746210
3154912330402392746000
4235876541482392746201
\n", "
" ], "text/plain": [ " id realtionship_status industry genre targeted_sex \\\n", "0 19717 1 4 4 1 \n", "1 31593 6 5 4 1 \n", "2 5681 4 2 4 0 \n", "3 15491 2 3 3 0 \n", "4 23587 6 5 4 1 \n", "\n", " average_runtime(minutes_per_week) airtime airlocation ratings \\\n", "0 45 2 39 2746 \n", "1 45 2 39 2746 \n", "2 45 2 39 2746 \n", "3 40 2 39 2746 \n", "4 48 2 39 2746 \n", "\n", " expensive money_back_guarantee netgain \n", "0 2 0 0 \n", "1 0 0 0 \n", "2 2 1 0 \n", "3 0 0 0 \n", "4 2 0 1 " ] }, "execution_count": 333, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 334, "metadata": {}, "outputs": [], "source": [ "y = df['netgain'].values" ] }, { "cell_type": "code", "execution_count": 335, "metadata": {}, "outputs": [], "source": [ "x = df.drop(['netgain'],axis=1).values" ] }, { "cell_type": "code", "execution_count": 336, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)" ] }, { "cell_type": "code", "execution_count": 337, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=100,\n", " n_jobs=None, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 337, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "clf=RandomForestClassifier(n_estimators=100)\n", "clf.fit(x_train,y_train)" ] }, { "cell_type": "code", "execution_count": 338, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 26048 entries, 0 to 26047\n", "Data columns (total 12 columns):\n", "id 26048 non-null int64\n", "realtionship_status 26048 non-null int64\n", "industry 26048 non-null int64\n", "genre 26048 non-null int64\n", "targeted_sex 26048 non-null int64\n", "average_runtime(minutes_per_week) 26048 non-null int64\n", "airtime 26048 non-null int64\n", "airlocation 26048 non-null int64\n", "ratings 26048 non-null int64\n", "expensive 26048 non-null int64\n", "money_back_guarantee 26048 non-null int64\n", "netgain 26048 non-null int64\n", "dtypes: int64(12)\n", "memory usage: 2.4 MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 339, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 339, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.expensive.isna().sum()" ] }, { "cell_type": "code", "execution_count": 340, "metadata": {}, "outputs": [], "source": [ "y_pred = clf.predict(x_test)" ] }, { "cell_type": "code", "execution_count": 341, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 1, 0, ..., 0, 1, 0])" ] }, "execution_count": 341, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test" ] }, { "cell_type": "code", "execution_count": 342, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, ..., 0, 1, 0])" ] }, "execution_count": 342, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_pred" ] }, { "cell_type": "code", "execution_count": 343, "metadata": {}, "outputs": [], "source": [ "from sklearn import metrics" ] }, { "cell_type": "code", "execution_count": 344, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.7869481765834933\n" ] } ], "source": [ "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))" ] }, { "cell_type": "code", "execution_count": 345, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idrealtionship_statusindustrygenretargeted_sexaverage_runtime(minutes_per_week)airtimeairlocationratingsexpensivemoney_back_guarantee
013440100380.02746500
146541401380.05626221
254240501380.02746500
396531402380.02746500
4106541402380.02746501
\n", "
" ], "text/plain": [ " id realtionship_status industry genre targeted_sex \\\n", "0 1 3 4 4 0 \n", "1 4 6 5 4 1 \n", "2 5 4 2 4 0 \n", "3 9 6 5 3 1 \n", "4 10 6 5 4 1 \n", "\n", " average_runtime(minutes_per_week) airtime airlocation ratings \\\n", "0 10 0 38 0.027465 \n", "1 40 1 38 0.056262 \n", "2 50 1 38 0.027465 \n", "3 40 2 38 0.027465 \n", "4 40 2 38 0.027465 \n", "\n", " expensive money_back_guarantee \n", "0 0 0 \n", "1 2 1 \n", "2 0 0 \n", "3 0 0 \n", "4 0 1 " ] }, "execution_count": 345, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.head()" ] }, { "cell_type": "code", "execution_count": 346, "metadata": {}, "outputs": [], "source": [ "test['ratings']=(test['ratings']*100000).astype(int)" ] }, { "cell_type": "code", "execution_count": 347, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 6513 entries, 0 to 6512\n", "Data columns (total 11 columns):\n", "id 6513 non-null int64\n", "realtionship_status 6513 non-null int64\n", "industry 6513 non-null int64\n", "genre 6513 non-null int64\n", "targeted_sex 6513 non-null int64\n", "average_runtime(minutes_per_week) 6513 non-null int64\n", "airtime 6513 non-null int64\n", "airlocation 6513 non-null int64\n", "ratings 6513 non-null int64\n", "expensive 6513 non-null int64\n", "money_back_guarantee 6513 non-null int64\n", "dtypes: int64(11)\n", "memory usage: 559.8 KB\n" ] } ], "source": [ "test.info()" ] }, { "cell_type": "code", "execution_count": 348, "metadata": {}, "outputs": [], "source": [ " x = test.values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 349, "metadata": {}, "outputs": [], "source": [ "Y_pred = clf.predict(x)" ] }, { "cell_type": "code", "execution_count": 350, "metadata": {}, "outputs": [], "source": [ "dft = test['id']" ] }, { "cell_type": "code", "execution_count": 351, "metadata": {}, "outputs": [], "source": [ "dft = pd.DataFrame(dft)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 354, "metadata": {}, "outputs": [], "source": [ "dft['netgain'] = Y_pred" ] }, { "cell_type": "code", "execution_count": 355, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnetgain
010
141
250
391
4101
5200
6280
7310
8320
9340
\n", "
" ], "text/plain": [ " id netgain\n", "0 1 0\n", "1 4 1\n", "2 5 0\n", "3 9 1\n", "4 10 1\n", "5 20 0\n", "6 28 0\n", "7 31 0\n", "8 32 0\n", "9 34 0" ] }, "execution_count": 355, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dft.head(10)" ] }, { "cell_type": "code", "execution_count": 356, "metadata": {}, "outputs": [], "source": [ "# dft['netgain'] = df['netgain'].apply(bool)" ] }, { "cell_type": "code", "execution_count": 357, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnetgain
010
141
250
391
4101
\n", "
" ], "text/plain": [ " id netgain\n", "0 1 0\n", "1 4 1\n", "2 5 0\n", "3 9 1\n", "4 10 1" ] }, "execution_count": 357, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dft.head()" ] }, { "cell_type": "code", "execution_count": 358, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 5096\n", "1 1417\n", "Name: netgain, dtype: int64" ] }, "execution_count": 358, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dft.netgain.value_counts()" ] }, { "cell_type": "code", "execution_count": 359, "metadata": {}, "outputs": [], "source": [ "# for x in range(len(dft['netgain'])):\n", "# if x==0:\n", "# dft['netgain'][x] = 'false'\n", "# elif x==1:\n", "# dft['netgain'][x] = 'true'" ] }, { "cell_type": "code", "execution_count": 363, "metadata": {}, "outputs": [], "source": [ "dft.loc[dft['netgain']==0,'netgain']='false'\n", "dft.loc[dft['netgain']==1,'netgain']='true'" ] }, { "cell_type": "code", "execution_count": 364, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnetgainnetgain_e
01falsefalse
14truetrue
25falsefalse
39truetrue
410truetrue
\n", "
" ], "text/plain": [ " id netgain netgain_e\n", "0 1 false false\n", "1 4 true true\n", "2 5 false false\n", "3 9 true true\n", "4 10 true true" ] }, "execution_count": 364, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dft.head()\n" ] }, { "cell_type": "code", "execution_count": 365, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "false 5096\n", "true 1417\n", "Name: netgain_e, dtype: int64" ] }, "execution_count": 365, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dft.netgain_e.value_counts()" ] }, { "cell_type": "code", "execution_count": 366, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnetgain
01false
14true
25false
39true
410true
.........
650832538false
650932542false
651032549false
651132558true
651232560false
\n", "

6513 rows × 2 columns

\n", "
" ], "text/plain": [ " id netgain\n", "0 1 false\n", "1 4 true\n", "2 5 false\n", "3 9 true\n", "4 10 true\n", "... ... ...\n", "6508 32538 false\n", "6509 32542 false\n", "6510 32549 false\n", "6511 32558 true\n", "6512 32560 false\n", "\n", "[6513 rows x 2 columns]" ] }, "execution_count": 366, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dft.drop(['netgain_e'],axis=1)" ] }, { "cell_type": "code", "execution_count": 367, "metadata": {}, "outputs": [], "source": [ "dft.to_csv(\"First_submission.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }