{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import multiprocessing" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "16" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multiprocessing.cpu_count()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.ensemble import RandomForestClassifier" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "train_df = pd.read_csv('forest_train.csv', index_col='Id')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ElevationAspectSlopeHorizontal_Distance_To_HydrologyVertical_Distance_To_HydrologyHorizontal_Distance_To_RoadwaysHillshade_9amHillshade_NoonHillshade_3pmHorizontal_Distance_To_Fire_Points...Soil_Type32Soil_Type33Soil_Type34Soil_Type35Soil_Type36Soil_Type37Soil_Type38Soil_Type39Soil_Type40Cover_Type
Id
1259651325805102212321486279...0000000005
22590562212-63902202351516225...0000000005
3280413992686531802342381356121...0000000002
427851551824211830902382381226211...0000000002
52595452153-13912202341506172...0000000005
\n", "

5 rows × 55 columns

\n", "
" ], "text/plain": [ " Elevation Aspect Slope Horizontal_Distance_To_Hydrology \\\n", "Id \n", "1 2596 51 3 258 \n", "2 2590 56 2 212 \n", "3 2804 139 9 268 \n", "4 2785 155 18 242 \n", "5 2595 45 2 153 \n", "\n", " Vertical_Distance_To_Hydrology Horizontal_Distance_To_Roadways \\\n", "Id \n", "1 0 510 \n", "2 -6 390 \n", "3 65 3180 \n", "4 118 3090 \n", "5 -1 391 \n", "\n", " Hillshade_9am Hillshade_Noon Hillshade_3pm \\\n", "Id \n", "1 221 232 148 \n", "2 220 235 151 \n", "3 234 238 135 \n", "4 238 238 122 \n", "5 220 234 150 \n", "\n", " Horizontal_Distance_To_Fire_Points ... Soil_Type32 Soil_Type33 \\\n", "Id ... \n", "1 6279 ... 0 0 \n", "2 6225 ... 0 0 \n", "3 6121 ... 0 0 \n", "4 6211 ... 0 0 \n", "5 6172 ... 0 0 \n", "\n", " Soil_Type34 Soil_Type35 Soil_Type36 Soil_Type37 Soil_Type38 \\\n", "Id \n", "1 0 0 0 0 0 \n", "2 0 0 0 0 0 \n", "3 0 0 0 0 0 \n", "4 0 0 0 0 0 \n", "5 0 0 0 0 0 \n", "\n", " Soil_Type39 Soil_Type40 Cover_Type \n", "Id \n", "1 0 0 5 \n", "2 0 0 5 \n", "3 0 0 2 \n", "4 0 0 2 \n", "5 0 0 5 \n", "\n", "[5 rows x 55 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "forest = RandomForestClassifier(n_estimators=1000, n_jobs=-1, max_depth=15, random_state=42, oob_score=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 29.2 s, sys: 386 ms, total: 29.6 s\n", "Wall time: 5.48 s\n" ] }, { "data": { "text/plain": [ "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", " max_depth=15, max_features='auto', max_leaf_nodes=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=-1,\n", " oob_score=True, random_state=42, verbose=0, warm_start=False)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "forest.fit(train_df.drop('Cover_Type', axis=1), train_df['Cover_Type'])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.84656084656084651" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "forest.oob_score_" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }