{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "dataset = pd.read_csv('~/Documents/ML/forestfires.csv') " ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def data_transform(dataset):\n", " for i in range(0,517):\n", " if dataset['month'][i] == 'jan':\n", " dataset['month'][i] = 1\n", " elif dataset['month'][i] == 'feb':\n", " dataset['month'][i] = 2\n", " elif dataset['month'][i] == 'mar':\n", " dataset['month'][i] = 3\n", " elif dataset['month'][i] == 'apr':\n", " dataset['month'][i] = 4\n", " elif dataset['month'][i] == 'may':\n", " dataset['month'][i] = 5\n", " elif dataset['month'][i] == 'jun':\n", " dataset['month'][i] = 6\n", " elif dataset['month'][i] == 'jul':\n", " dataset['month'][i] = 7\n", " elif dataset['month'][i] == 'aug':\n", " dataset['month'][i] = 8\n", " elif dataset['month'][i] == 'sep':\n", " dataset['month'][i] = 9\n", " elif dataset['month'][i] == 'oct':\n", " dataset['month'][i] = 10\n", " elif dataset['month'][i] == 'nov':\n", " dataset['month'][i] = 11\n", " elif dataset['month'][i] == 'dec':\n", " dataset['month'][i] = 12\n", " \n", " for i in range(0,517):\n", " \n", " if dataset['day'][i] == 'mon':\n", " dataset['day'][i] = 1\n", " elif dataset['day'][i] == 'tue':\n", " dataset['day'][i] = 2\n", " elif dataset['day'][i] == 'wed':\n", " dataset['day'][i] = 3\n", " elif dataset['day'][i] == 'thu':\n", " dataset['day'][i] = 4\n", " elif dataset['day'][i] == 'fri':\n", " dataset['day'][i] = 5\n", " elif dataset['day'][i] == 'sat':\n", " dataset['day'][i] = 6\n", " elif dataset['day'][i] == 'sun':\n", " dataset['day'][i] = 7\n", " return dataset" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " \n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:22: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:18: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:20: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:10: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " # Remove the CWD from sys.path while we load stuff.\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:14: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " \n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:16: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " app.launch_new_instance()\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " \n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " after removing the cwd from sys.path.\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:26: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:12: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " if sys.path[0] == '':\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:24: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:39: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:33: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:41: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:31: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:35: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", "/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:37: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
XYmonthdayFFMCDMCDCISItempRHwindrainarea
0753586.226.294.35.18.2516.70.00.00
17410290.635.4669.16.718.0330.90.00.00
27410690.643.7686.96.714.6331.30.00.00
3863591.733.377.59.08.3974.00.20.00
4863789.351.3102.29.611.4991.80.00.00
5868792.385.3488.014.722.2295.40.00.00
6868192.388.9495.68.524.1273.10.00.00
7868191.5145.4608.210.78.0862.20.00.00
8869291.0129.5692.67.013.1635.40.00.00
9759692.588.0698.67.122.8404.00.00.00
10759692.588.0698.67.117.8517.20.00.00
11759692.873.2713.022.619.3384.00.00.00
12658563.570.8665.30.817.0726.70.00.00
13659190.9126.5686.57.021.3422.20.00.00
14659392.9133.3699.69.226.4214.50.00.00
15659593.3141.2713.913.922.9445.40.00.00
16553691.735.880.87.815.1275.40.00.00
178510184.932.8664.23.016.7474.90.00.00
18643389.227.970.86.315.9354.00.00.00
19644686.327.497.15.19.3444.50.00.00
20649291.0129.5692.67.018.3402.70.00.00
21549191.878.5724.39.219.1382.70.00.00
22746794.396.3200.056.121.0444.50.00.00
23748690.2110.9537.46.219.5435.80.00.00
24748693.5139.4594.220.323.7325.80.00.00
25748791.4142.4601.410.616.3605.40.00.00
26749592.4117.9668.012.219.0345.80.00.00
27749190.9126.5686.57.019.4481.30.00.00
28639693.4145.4721.48.130.2242.70.00.00
29639793.5149.3728.68.122.8393.60.00.00
..........................................
487548295.1141.3605.817.726.4343.60.016.40
488448295.1141.3605.817.719.4717.60.046.70
489448395.1141.3605.817.720.6581.30.00.00
490448395.1141.3605.817.728.7334.00.00.00
491448495.8152.0624.113.832.4214.50.00.00
492138595.9158.0633.611.332.4272.20.00.00
493138595.9158.0633.611.327.5294.50.043.32
494668696.0164.0643.014.030.8304.90.08.59
495668196.2175.5661.816.823.9422.20.00.00
496458196.2175.5661.816.832.6263.10.02.77
497348296.1181.1671.214.332.3272.20.014.68
498658296.1181.1671.214.333.3262.70.040.54
499758296.1181.1671.214.327.3634.96.410.82
500868296.1181.1671.214.321.6654.90.80.00
501758296.1181.1671.214.321.6654.90.80.00
502448296.1181.1671.214.320.7694.90.40.00
503248394.5139.4689.120.029.2304.90.01.95
504438394.5139.4689.120.028.9294.90.049.59
505128491.0163.2744.410.126.7351.80.05.80
506128591.0166.9752.67.118.5738.50.00.00
507248591.0166.9752.67.125.9413.60.00.00
508128591.0166.9752.67.125.9413.60.00.00
509548591.0166.9752.67.121.1717.61.42.17
510658591.0166.9752.67.118.2625.40.00.43
511868781.656.7665.61.927.8352.70.00.00
512438781.656.7665.61.927.8322.70.06.44
513248781.656.7665.61.921.9715.80.054.29
514748781.656.7665.61.921.2706.70.011.16
515148694.4146.0614.711.325.6424.00.00.00
5166311279.53.0106.71.111.8314.50.00.00
\n", "

517 rows × 13 columns

\n", "
" ], "text/plain": [ " X Y month day FFMC DMC DC ISI temp RH wind rain area\n", "0 7 5 3 5 86.2 26.2 94.3 5.1 8.2 51 6.7 0.0 0.00\n", "1 7 4 10 2 90.6 35.4 669.1 6.7 18.0 33 0.9 0.0 0.00\n", "2 7 4 10 6 90.6 43.7 686.9 6.7 14.6 33 1.3 0.0 0.00\n", "3 8 6 3 5 91.7 33.3 77.5 9.0 8.3 97 4.0 0.2 0.00\n", "4 8 6 3 7 89.3 51.3 102.2 9.6 11.4 99 1.8 0.0 0.00\n", "5 8 6 8 7 92.3 85.3 488.0 14.7 22.2 29 5.4 0.0 0.00\n", "6 8 6 8 1 92.3 88.9 495.6 8.5 24.1 27 3.1 0.0 0.00\n", "7 8 6 8 1 91.5 145.4 608.2 10.7 8.0 86 2.2 0.0 0.00\n", "8 8 6 9 2 91.0 129.5 692.6 7.0 13.1 63 5.4 0.0 0.00\n", "9 7 5 9 6 92.5 88.0 698.6 7.1 22.8 40 4.0 0.0 0.00\n", "10 7 5 9 6 92.5 88.0 698.6 7.1 17.8 51 7.2 0.0 0.00\n", "11 7 5 9 6 92.8 73.2 713.0 22.6 19.3 38 4.0 0.0 0.00\n", "12 6 5 8 5 63.5 70.8 665.3 0.8 17.0 72 6.7 0.0 0.00\n", "13 6 5 9 1 90.9 126.5 686.5 7.0 21.3 42 2.2 0.0 0.00\n", "14 6 5 9 3 92.9 133.3 699.6 9.2 26.4 21 4.5 0.0 0.00\n", "15 6 5 9 5 93.3 141.2 713.9 13.9 22.9 44 5.4 0.0 0.00\n", "16 5 5 3 6 91.7 35.8 80.8 7.8 15.1 27 5.4 0.0 0.00\n", "17 8 5 10 1 84.9 32.8 664.2 3.0 16.7 47 4.9 0.0 0.00\n", "18 6 4 3 3 89.2 27.9 70.8 6.3 15.9 35 4.0 0.0 0.00\n", "19 6 4 4 6 86.3 27.4 97.1 5.1 9.3 44 4.5 0.0 0.00\n", "20 6 4 9 2 91.0 129.5 692.6 7.0 18.3 40 2.7 0.0 0.00\n", "21 5 4 9 1 91.8 78.5 724.3 9.2 19.1 38 2.7 0.0 0.00\n", "22 7 4 6 7 94.3 96.3 200.0 56.1 21.0 44 4.5 0.0 0.00\n", "23 7 4 8 6 90.2 110.9 537.4 6.2 19.5 43 5.8 0.0 0.00\n", "24 7 4 8 6 93.5 139.4 594.2 20.3 23.7 32 5.8 0.0 0.00\n", "25 7 4 8 7 91.4 142.4 601.4 10.6 16.3 60 5.4 0.0 0.00\n", "26 7 4 9 5 92.4 117.9 668.0 12.2 19.0 34 5.8 0.0 0.00\n", "27 7 4 9 1 90.9 126.5 686.5 7.0 19.4 48 1.3 0.0 0.00\n", "28 6 3 9 6 93.4 145.4 721.4 8.1 30.2 24 2.7 0.0 0.00\n", "29 6 3 9 7 93.5 149.3 728.6 8.1 22.8 39 3.6 0.0 0.00\n", ".. .. .. ... .. ... ... ... ... ... .. ... ... ...\n", "487 5 4 8 2 95.1 141.3 605.8 17.7 26.4 34 3.6 0.0 16.40\n", "488 4 4 8 2 95.1 141.3 605.8 17.7 19.4 71 7.6 0.0 46.70\n", "489 4 4 8 3 95.1 141.3 605.8 17.7 20.6 58 1.3 0.0 0.00\n", "490 4 4 8 3 95.1 141.3 605.8 17.7 28.7 33 4.0 0.0 0.00\n", "491 4 4 8 4 95.8 152.0 624.1 13.8 32.4 21 4.5 0.0 0.00\n", "492 1 3 8 5 95.9 158.0 633.6 11.3 32.4 27 2.2 0.0 0.00\n", "493 1 3 8 5 95.9 158.0 633.6 11.3 27.5 29 4.5 0.0 43.32\n", "494 6 6 8 6 96.0 164.0 643.0 14.0 30.8 30 4.9 0.0 8.59\n", "495 6 6 8 1 96.2 175.5 661.8 16.8 23.9 42 2.2 0.0 0.00\n", "496 4 5 8 1 96.2 175.5 661.8 16.8 32.6 26 3.1 0.0 2.77\n", "497 3 4 8 2 96.1 181.1 671.2 14.3 32.3 27 2.2 0.0 14.68\n", "498 6 5 8 2 96.1 181.1 671.2 14.3 33.3 26 2.7 0.0 40.54\n", "499 7 5 8 2 96.1 181.1 671.2 14.3 27.3 63 4.9 6.4 10.82\n", "500 8 6 8 2 96.1 181.1 671.2 14.3 21.6 65 4.9 0.8 0.00\n", "501 7 5 8 2 96.1 181.1 671.2 14.3 21.6 65 4.9 0.8 0.00\n", "502 4 4 8 2 96.1 181.1 671.2 14.3 20.7 69 4.9 0.4 0.00\n", "503 2 4 8 3 94.5 139.4 689.1 20.0 29.2 30 4.9 0.0 1.95\n", "504 4 3 8 3 94.5 139.4 689.1 20.0 28.9 29 4.9 0.0 49.59\n", "505 1 2 8 4 91.0 163.2 744.4 10.1 26.7 35 1.8 0.0 5.80\n", "506 1 2 8 5 91.0 166.9 752.6 7.1 18.5 73 8.5 0.0 0.00\n", "507 2 4 8 5 91.0 166.9 752.6 7.1 25.9 41 3.6 0.0 0.00\n", "508 1 2 8 5 91.0 166.9 752.6 7.1 25.9 41 3.6 0.0 0.00\n", "509 5 4 8 5 91.0 166.9 752.6 7.1 21.1 71 7.6 1.4 2.17\n", "510 6 5 8 5 91.0 166.9 752.6 7.1 18.2 62 5.4 0.0 0.43\n", "511 8 6 8 7 81.6 56.7 665.6 1.9 27.8 35 2.7 0.0 0.00\n", "512 4 3 8 7 81.6 56.7 665.6 1.9 27.8 32 2.7 0.0 6.44\n", "513 2 4 8 7 81.6 56.7 665.6 1.9 21.9 71 5.8 0.0 54.29\n", "514 7 4 8 7 81.6 56.7 665.6 1.9 21.2 70 6.7 0.0 11.16\n", "515 1 4 8 6 94.4 146.0 614.7 11.3 25.6 42 4.0 0.0 0.00\n", "516 6 3 11 2 79.5 3.0 106.7 1.1 11.8 31 4.5 0.0 0.00\n", "\n", "[517 rows x 13 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_transform(dataset)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "X = dataset.iloc[:,0:12] \n", "y = dataset.iloc[:,-1]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "model = LinearRegression()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(X_train,y_train)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "y_pred = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_squared_error" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "16.441192733314047" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_absolute_error(y_test, y_pred)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "percentage = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99])\n", "error = np.zeros(10)\n", "for percent in percentage:\n", " for i in range(10):\n", " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = percent, random_state = 0)\n", " model.fit(X_train,y_train)\n", " y_pred = model.predict(X_test)\n", " error[i] = mean_squared_error(y_test, y_pred)\n" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0,0.5,'Error')" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.plot(percentage,error)\n", "plt.xlabel(\"Percentage of test data\")\n", "plt.ylabel(\"Error\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 2 }