{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# BIKES" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "day = pd.read_csv(\"data/day.csv\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = day.drop([\"dteday\", \"instant\", \"casual\", 'registered', 'cnt', 'yr'], axis=1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index(['season', 'mnth', 'holiday', 'weekday', 'workingday', 'weathersit',\n", " 'temp', 'atemp', 'hum', 'windspeed'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data_raw = data.copy()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "data.season = data.season.map({1: \"spring\", 2: \"summer\", 3: \"fall\", 4: 'winter'})\n", "data.weathersit = data.weathersit.map({1: \"clear, partly cloudy\", 2: 'mist, cloudy', 3: 'light snow, light rain', 4:'heavy rain, snow and fog'})\n", "data.mnth = pd.to_datetime(data.mnth, format=\"%m\").dt.strftime(\"%b\")\n", "data.weekday = pd.to_datetime(data.weekday, format=\"%w\").dt.strftime(\"%a\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data_dummies = pd.get_dummies(data, columns=['season', 'mnth', 'weekday', 'weathersit'])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | holiday | \n", "workingday | \n", "temp | \n", "atemp | \n", "hum | \n", "windspeed | \n", "season_fall | \n", "season_spring | \n", "season_summer | \n", "season_winter | \n", "... | \n", "mnth_Mar | \n", "mnth_May | \n", "mnth_Nov | \n", "mnth_Oct | \n", "mnth_Sep | \n", "weekday_Mon | \n", "weathersit_clear, partly cloudy | \n", "weathersit_light snow, light rain | \n", "weathersit_mist, cloudy | \n", "cnt | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "0 | \n", "0.344167 | \n", "0.363625 | \n", "0.805833 | \n", "0.160446 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "985 | \n", "
1 | \n", "0 | \n", "0 | \n", "0.363478 | \n", "0.353739 | \n", "0.696087 | \n", "0.248539 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "801 | \n", "
2 | \n", "0 | \n", "1 | \n", "0.196364 | \n", "0.189405 | \n", "0.437273 | \n", "0.248309 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1349 | \n", "
3 | \n", "0 | \n", "1 | \n", "0.200000 | \n", "0.212122 | \n", "0.590435 | \n", "0.160296 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1562 | \n", "
4 | \n", "0 | \n", "1 | \n", "0.226957 | \n", "0.229270 | \n", "0.436957 | \n", "0.186900 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1600 | \n", "
5 rows × 27 columns
\n", "\n", " | id | \n", "member_id | \n", "loan_amnt | \n", "funded_amnt | \n", "funded_amnt_inv | \n", "term | \n", "int_rate | \n", "installment | \n", "grade | \n", "sub_grade | \n", "... | \n", "total_bal_il | \n", "il_util | \n", "open_rv_12m | \n", "open_rv_24m | \n", "max_bal_bc | \n", "all_util | \n", "total_rev_hi_lim | \n", "inq_fi | \n", "total_cu_tl | \n", "inq_last_12m | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1077501 | \n", "1296599 | \n", "5000.0 | \n", "5000.0 | \n", "4975.0 | \n", "36 months | \n", "10.65 | \n", "162.87 | \n", "B | \n", "B2 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
23 | \n", "1069700 | \n", "1304810 | \n", "10000.0 | \n", "10000.0 | \n", "10000.0 | \n", "36 months | \n", "11.71 | \n", "330.76 | \n", "B | \n", "B3 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
46 | \n", "1069465 | \n", "1304521 | \n", "5000.0 | \n", "5000.0 | \n", "5000.0 | \n", "36 months | \n", "8.90 | \n", "158.77 | \n", "A | \n", "A5 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
69 | \n", "1069073 | \n", "1303718 | \n", "15000.0 | \n", "15000.0 | \n", "15000.0 | \n", "36 months | \n", "14.65 | \n", "517.42 | \n", "C | \n", "C3 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
92 | \n", "1065674 | \n", "1299785 | \n", "8200.0 | \n", "8200.0 | \n", "8200.0 | \n", "60 months | \n", "19.42 | \n", "214.62 | \n", "E | \n", "E3 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
5 rows × 74 columns
\n", "\n", " | out_prncp | \n", "total_rec_late_fee | \n", "total_rec_prncp | \n", "total_pymnt_inv | \n", "total_pymnt | \n", "out_prncp_inv | \n", "initial_list_status | \n", "revol_bal | \n", "dti | \n", "addr_state | \n", "... | \n", "pub_rec | \n", "open_acc | \n", "title | \n", "collections_12_mths_ex_med | \n", "revol_util | \n", "last_pymnt_d | \n", "emp_title | \n", "total_rev_hi_lim | \n", "tot_cur_bal | \n", "tot_coll_amt | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
42550 | \n", "4145.19 | \n", "15.0 | \n", "3854.81 | \n", "4990.03 | \n", "4990.030000 | \n", "4145.19 | \n", "f | \n", "7203.0 | \n", "15.75 | \n", "CO | \n", "... | \n", "1.0 | \n", "9.0 | \n", "My Life Saving Loan :) | \n", "0.0 | \n", "34.6 | \n", "Oct-2015 | \n", "PARTS MANAGER | \n", "20800.0 | \n", "15949.0 | \n", "0.0 | \n", "
42573 | \n", "0.00 | \n", "0.0 | \n", "16000.00 | \n", "16900.62 | \n", "16900.617682 | \n", "0.00 | \n", "w | \n", "6868.0 | \n", "7.39 | \n", "MA | \n", "... | \n", "0.0 | \n", "12.0 | \n", "Debt Consolidation | \n", "0.0 | \n", "10.3 | \n", "Nov-2014 | \n", "Senior Software Engineer | \n", "66800.0 | \n", "369614.0 | \n", "0.0 | \n", "
42596 | \n", "13859.72 | \n", "0.0 | \n", "6140.28 | \n", "11734.08 | \n", "11734.080000 | \n", "13859.72 | \n", "w | \n", "13772.0 | \n", "17.03 | \n", "TX | \n", "... | \n", "0.0 | \n", "14.0 | \n", "Credit card refinancing | \n", "0.0 | \n", "41.6 | \n", "Jan-2016 | \n", "Project Manager | \n", "33100.0 | \n", "75993.0 | \n", "3712.0 | \n", "
42619 | \n", "0.00 | \n", "0.0 | \n", "4631.49 | \n", "12732.64 | \n", "12732.640000 | \n", "0.00 | \n", "w | \n", "27597.0 | \n", "8.58 | \n", "NY | \n", "... | \n", "3.0 | \n", "19.0 | \n", "Consolidation for Self-Publication | \n", "0.0 | \n", "69.9 | \n", "May-2015 | \n", "Administrator | \n", "39500.0 | \n", "181757.0 | \n", "0.0 | \n", "
42642 | \n", "0.00 | \n", "0.0 | \n", "30000.00 | \n", "34218.71 | \n", "34218.711966 | \n", "0.00 | \n", "f | \n", "17756.0 | \n", "9.41 | \n", "MN | \n", "... | \n", "0.0 | \n", "12.0 | \n", "Credit card refinancing | \n", "0.0 | \n", "61.0 | \n", "Mar-2015 | \n", "Engineer/Conductor | \n", "29100.0 | \n", "161166.0 | \n", "0.0 | \n", "
5 rows × 52 columns
\n", "\n", " | AnimalID | \n", "Name | \n", "DateTime | \n", "OutcomeType | \n", "OutcomeSubtype | \n", "AnimalType | \n", "SexuponOutcome | \n", "AgeuponOutcome | \n", "Breed | \n", "Color | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "A671945 | \n", "Hambone | \n", "2014-02-12 18:22:00 | \n", "Return_to_owner | \n", "NaN | \n", "Dog | \n", "Neutered Male | \n", "1 year | \n", "Shetland Sheepdog Mix | \n", "Brown/White | \n", "
1 | \n", "A656520 | \n", "Emily | \n", "2013-10-13 12:44:00 | \n", "Euthanasia | \n", "Suffering | \n", "Cat | \n", "Spayed Female | \n", "1 year | \n", "Domestic Shorthair Mix | \n", "Cream Tabby | \n", "
2 | \n", "A686464 | \n", "Pearce | \n", "2015-01-31 12:28:00 | \n", "Adoption | \n", "Foster | \n", "Dog | \n", "Neutered Male | \n", "2 years | \n", "Pit Bull Mix | \n", "Blue/White | \n", "
3 | \n", "A683430 | \n", "NaN | \n", "2014-07-11 19:09:00 | \n", "Transfer | \n", "Partner | \n", "Cat | \n", "Intact Male | \n", "3 weeks | \n", "Domestic Shorthair Mix | \n", "Blue Cream | \n", "
4 | \n", "A667013 | \n", "NaN | \n", "2013-11-15 12:52:00 | \n", "Transfer | \n", "Partner | \n", "Dog | \n", "Neutered Male | \n", "2 years | \n", "Lhasa Apso/Miniature Poodle | \n", "Tan | \n", "
\n", " | age | \n", "job | \n", "marital | \n", "education | \n", "default | \n", "housing | \n", "loan | \n", "contact | \n", "month | \n", "day_of_week | \n", "... | \n", "campaign | \n", "pdays | \n", "previous | \n", "poutcome | \n", "emp.var.rate | \n", "cons.price.idx | \n", "cons.conf.idx | \n", "euribor3m | \n", "nr.employed | \n", "y | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "56 | \n", "housemaid | \n", "married | \n", "basic.4y | \n", "no | \n", "no | \n", "no | \n", "telephone | \n", "may | \n", "mon | \n", "... | \n", "1 | \n", "999 | \n", "0 | \n", "nonexistent | \n", "1.1 | \n", "93.994 | \n", "-36.4 | \n", "4.857 | \n", "5191.0 | \n", "no | \n", "
1 | \n", "57 | \n", "services | \n", "married | \n", "high.school | \n", "unknown | \n", "no | \n", "no | \n", "telephone | \n", "may | \n", "mon | \n", "... | \n", "1 | \n", "999 | \n", "0 | \n", "nonexistent | \n", "1.1 | \n", "93.994 | \n", "-36.4 | \n", "4.857 | \n", "5191.0 | \n", "no | \n", "
2 | \n", "37 | \n", "services | \n", "married | \n", "high.school | \n", "no | \n", "yes | \n", "no | \n", "telephone | \n", "may | \n", "mon | \n", "... | \n", "1 | \n", "999 | \n", "0 | \n", "nonexistent | \n", "1.1 | \n", "93.994 | \n", "-36.4 | \n", "4.857 | \n", "5191.0 | \n", "no | \n", "
3 | \n", "40 | \n", "admin. | \n", "married | \n", "basic.6y | \n", "no | \n", "no | \n", "no | \n", "telephone | \n", "may | \n", "mon | \n", "... | \n", "1 | \n", "999 | \n", "0 | \n", "nonexistent | \n", "1.1 | \n", "93.994 | \n", "-36.4 | \n", "4.857 | \n", "5191.0 | \n", "no | \n", "
4 | \n", "56 | \n", "services | \n", "married | \n", "high.school | \n", "no | \n", "no | \n", "yes | \n", "telephone | \n", "may | \n", "mon | \n", "... | \n", "1 | \n", "999 | \n", "0 | \n", "nonexistent | \n", "1.1 | \n", "93.994 | \n", "-36.4 | \n", "4.857 | \n", "5191.0 | \n", "no | \n", "
5 rows × 21 columns
\n", "