{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Part 1:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Loader la data\n", "\n", "import pandas as pd\n", "import statsmodels.formula.api as smf\n", "\n", "df = pd.read_csv('advertising.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Description de la data\n", "\n", "df.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "fig, ax = plt.subplots(1,1)\n", "df['Radio'].hist(bins = 100)\n", "plt.title('Radio')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# La distribution des variables (Histogrammes)\n", "\n", "df.hist(bins = 100)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Détection des outliers (Boxplots)\n", "\n", "df.boxplot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Détecter les excates outliers\n", "\n", "import numpy as np\n", "\n", "np.percentile(df.Newspaper, 99.5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Supprimer les outliers\n", "\n", "condition = df['Newspaper'] < 100\n", "df[condition].shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = df[condition]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Vérification:\n", "\n", "df.boxplot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Corrélation\n", "\n", "import seaborn as sns\n", "corr = df.corr()\n", "fig, ax = plt.subplots(1,1, figsize = (9,9))\n", "sns.heatmap(corr,\n", " xticklabels=corr.columns.values,\n", " yticklabels=corr.columns.values)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Scatterplot de chaque couple de variables\n", "\n", "for i in range (-1,3):\n", " figure = plt.figure()\n", " plt.scatter(df.iloc[:,i], df.iloc[:,i+1])\n", " plt.xlabel('{}'.format(df.columns[i]))\n", " plt.ylabel('{}'.format(df.columns[i+1])) \n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "for i in range (-1,1):\n", " figure = plt.figure()\n", " plt.scatter(df.iloc[:,i], df.iloc[:,i+2])\n", " plt.xlabel('{}'.format(df.columns[i]))\n", " plt.ylabel('{}'.format(df.columns[i+2]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Part 2:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Linear Regression pour les 3 variables:\n", "\n", "lm = smf.ols(formula='Sales ~ Radio ', data=df).fit()\n", "lm.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lm.pvalues" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "lm_2 = smf.ols(formula='Sales ~ TV ', data=df).fit()\n", "lm_2.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "lm_3 = smf.ols(formula='Sales ~ Newspaper ', data=df).fit()\n", "lm_3.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Le coefficient de la regression Sales ~ Radio est plus grand que le coefficient de la regression Sales ~ TV\n", "# alors que TV est plus corrélé à Sales que Radio, ceci revient au fait que les 2 variables TV et Radio ne sont pas\n", "# normées de la même façon !" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# calcul de la MSE\n", "\n", "from sklearn.metrics import mean_squared_error\n", "mse = mean_squared_error(df.Sales, lm.fittedvalues)\n", "mse" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mse_2 = mean_squared_error(df.Sales, lm_2.fittedvalues)\n", "mse_2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mse_3 = mean_squared_error(df.Sales, lm_3.fittedvalues)\n", "mse_3" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TVRadioNewspaperSales
count198.000000198.000000198.000000198.000000
mean0.49962023.13080829.77727313.980808
std0.29101914.86211120.4463035.196097
min0.0023840.0000000.3000001.600000
25%0.2547689.92500012.65000010.325000
50%0.51004822.40000025.60000012.900000
75%0.74412536.32500044.05000017.375000
max1.00000049.60000089.40000027.000000
\n", "
" ], "text/plain": [ " TV Radio Newspaper Sales\n", "count 198.000000 198.000000 198.000000 198.000000\n", "mean 0.499620 23.130808 29.777273 13.980808\n", "std 0.291019 14.862111 20.446303 5.196097\n", "min 0.002384 0.000000 0.300000 1.600000\n", "25% 0.254768 9.925000 12.650000 10.325000\n", "50% 0.510048 22.400000 25.600000 12.900000\n", "75% 0.744125 36.325000 44.050000 17.375000\n", "max 1.000000 49.600000 89.400000 27.000000" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ASUS N752V\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " \"\"\"Entry point for launching an IPython kernel.\n", "C:\\Users\\ASUS N752V\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " \n", "C:\\Users\\ASUS N752V\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] } ], "source": [ "# Normalisation des variables:\n", "# modifier les données pour que le coefficient de la regression linéaire \n", "# reflete l'importance de la variable par rapport aux autres.\n", "\n", "df['TV'] = df['TV']/df.TV.max()\n", "df['Radio'] = df['Radio']/df.Radio.max()\n", "df['Newspaper'] = df['Newspaper']/df.Newspaper.max()" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TVRadioNewspaperSales
00.7837190.7620970.77404922.1
10.1515670.7923390.50447410.4
20.0585830.9254030.7751689.3
30.5160080.8326610.65436218.5
40.6158040.2177420.65324412.9
50.0296320.9858870.8389267.2
60.1958450.6612900.26286411.8
70.4094010.3951610.12975413.2
80.0292920.0423390.0111864.8
90.6805180.0524190.23713610.6
100.2251360.1169350.2706948.6
110.7312670.4838710.04474317.4
120.0810630.7076610.7371369.2
130.3320840.1532260.0805379.7
140.6951630.6633060.51454119.0
150.6655310.9616940.59172322.4
170.9584470.7983870.62416124.4
180.2356950.4133060.20469811.3
190.5017030.4818550.21364714.6
200.7438690.5584680.59731518.0
210.8085830.1028230.26286412.5
220.0449590.3205650.5548105.6
230.7775890.3407260.29306515.5
240.2121930.2540320.2046989.7
250.8954360.0705650.21812112.0
260.4867170.5907260.14094015.0
270.8177790.3366940.25615215.9
280.8474110.5463710.25615218.9
290.2404630.3225810.45637610.5
300.9976160.5705650.48322121.4
...............
1700.1703000.2338710.2058178.4
1710.5602860.4213710.53020114.5
1720.0667570.4052420.1901577.6
1730.5735690.1431450.14317711.7
1740.7574930.0685480.14653211.5
1750.9431200.9858870.46756227.0
1760.8460490.6088710.22706920.2
1770.5797000.1572580.39373611.7
1780.9424390.0463710.26510111.8
1790.5640330.2016130.19686812.6
1800.5333790.0524190.09284110.5
1810.7442100.1088710.30648812.2
1820.1914170.1149190.3322158.7
1830.9795640.8669350.80313226.2
1840.8644410.4294350.33557017.6
1850.6982290.9092740.21923922.6
1860.4751360.0423390.29753910.3
1870.6508860.5786290.20357917.3
1880.9741140.2802420.04138715.9
1890.0636920.2439520.2617456.7
1900.1345370.8286290.06487710.8
1910.2571530.2177420.0671149.9
1920.0585830.0826610.3534685.9
1930.5681200.8467740.04026819.6
1940.5098770.7177420.06711417.3
1950.1301090.0745970.1543627.6
1960.3208450.0987900.0906049.7
1970.6028610.1875000.07158812.8
1980.9659400.8467740.74049225.5
1990.7905310.1733870.09731513.4
\n", "

198 rows × 4 columns

\n", "
" ], "text/plain": [ " TV Radio Newspaper Sales\n", "0 0.783719 0.762097 0.774049 22.1\n", "1 0.151567 0.792339 0.504474 10.4\n", "2 0.058583 0.925403 0.775168 9.3\n", "3 0.516008 0.832661 0.654362 18.5\n", "4 0.615804 0.217742 0.653244 12.9\n", "5 0.029632 0.985887 0.838926 7.2\n", "6 0.195845 0.661290 0.262864 11.8\n", "7 0.409401 0.395161 0.129754 13.2\n", "8 0.029292 0.042339 0.011186 4.8\n", "9 0.680518 0.052419 0.237136 10.6\n", "10 0.225136 0.116935 0.270694 8.6\n", "11 0.731267 0.483871 0.044743 17.4\n", "12 0.081063 0.707661 0.737136 9.2\n", "13 0.332084 0.153226 0.080537 9.7\n", "14 0.695163 0.663306 0.514541 19.0\n", "15 0.665531 0.961694 0.591723 22.4\n", "17 0.958447 0.798387 0.624161 24.4\n", "18 0.235695 0.413306 0.204698 11.3\n", "19 0.501703 0.481855 0.213647 14.6\n", "20 0.743869 0.558468 0.597315 18.0\n", "21 0.808583 0.102823 0.262864 12.5\n", "22 0.044959 0.320565 0.554810 5.6\n", "23 0.777589 0.340726 0.293065 15.5\n", "24 0.212193 0.254032 0.204698 9.7\n", "25 0.895436 0.070565 0.218121 12.0\n", "26 0.486717 0.590726 0.140940 15.0\n", "27 0.817779 0.336694 0.256152 15.9\n", "28 0.847411 0.546371 0.256152 18.9\n", "29 0.240463 0.322581 0.456376 10.5\n", "30 0.997616 0.570565 0.483221 21.4\n", ".. ... ... ... ...\n", "170 0.170300 0.233871 0.205817 8.4\n", "171 0.560286 0.421371 0.530201 14.5\n", "172 0.066757 0.405242 0.190157 7.6\n", "173 0.573569 0.143145 0.143177 11.7\n", "174 0.757493 0.068548 0.146532 11.5\n", "175 0.943120 0.985887 0.467562 27.0\n", "176 0.846049 0.608871 0.227069 20.2\n", "177 0.579700 0.157258 0.393736 11.7\n", "178 0.942439 0.046371 0.265101 11.8\n", "179 0.564033 0.201613 0.196868 12.6\n", "180 0.533379 0.052419 0.092841 10.5\n", "181 0.744210 0.108871 0.306488 12.2\n", "182 0.191417 0.114919 0.332215 8.7\n", "183 0.979564 0.866935 0.803132 26.2\n", "184 0.864441 0.429435 0.335570 17.6\n", "185 0.698229 0.909274 0.219239 22.6\n", "186 0.475136 0.042339 0.297539 10.3\n", "187 0.650886 0.578629 0.203579 17.3\n", "188 0.974114 0.280242 0.041387 15.9\n", "189 0.063692 0.243952 0.261745 6.7\n", "190 0.134537 0.828629 0.064877 10.8\n", "191 0.257153 0.217742 0.067114 9.9\n", "192 0.058583 0.082661 0.353468 5.9\n", "193 0.568120 0.846774 0.040268 19.6\n", "194 0.509877 0.717742 0.067114 17.3\n", "195 0.130109 0.074597 0.154362 7.6\n", "196 0.320845 0.098790 0.090604 9.7\n", "197 0.602861 0.187500 0.071588 12.8\n", "198 0.965940 0.846774 0.740492 25.5\n", "199 0.790531 0.173387 0.097315 13.4\n", "\n", "[198 rows x 4 columns]" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: Sales R-squared: 0.333
Model: OLS Adj. R-squared: 0.329
Method: Least Squares F-statistic: 97.69
Date: Wed, 26 Sep 2018 Prob (F-statistic): 5.99e-19
Time: 16:15:16 Log-Likelihood: -566.70
No. Observations: 198 AIC: 1137.
Df Residuals: 196 BIC: 1144.
Df Model: 1
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 9.3166 0.560 16.622 0.000 8.211 10.422
Radio 10.0015 1.012 9.884 0.000 8.006 11.997
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 20.193 Durbin-Watson: 1.923
Prob(Omnibus): 0.000 Jarque-Bera (JB): 23.115
Skew: -0.785 Prob(JB): 9.56e-06
Kurtosis: 3.582 Cond. No. 4.13


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.333\n", "Model: OLS Adj. R-squared: 0.329\n", "Method: Least Squares F-statistic: 97.69\n", "Date: Wed, 26 Sep 2018 Prob (F-statistic): 5.99e-19\n", "Time: 16:15:16 Log-Likelihood: -566.70\n", "No. Observations: 198 AIC: 1137.\n", "Df Residuals: 196 BIC: 1144.\n", "Df Model: 1 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 9.3166 0.560 16.622 0.000 8.211 10.422\n", "Radio 10.0015 1.012 9.884 0.000 8.006 11.997\n", "==============================================================================\n", "Omnibus: 20.193 Durbin-Watson: 1.923\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 23.115\n", "Skew: -0.785 Prob(JB): 9.56e-06\n", "Kurtosis: 3.582 Cond. No. 4.13\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm = smf.ols(formula='Sales ~ Radio ', data=df).fit()\n", "lm.summary()" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: Sales R-squared: 0.607
Model: OLS Adj. R-squared: 0.605
Method: Least Squares F-statistic: 302.8
Date: Wed, 26 Sep 2018 Prob (F-statistic): 1.29e-41
Time: 16:15:35 Log-Likelihood: -514.27
No. Observations: 198 AIC: 1033.
Df Residuals: 196 BIC: 1039.
Df Model: 1
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 7.0306 0.462 15.219 0.000 6.120 7.942
TV 13.9111 0.799 17.400 0.000 12.334 15.488
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 0.404 Durbin-Watson: 1.872
Prob(Omnibus): 0.817 Jarque-Bera (JB): 0.551
Skew: -0.062 Prob(JB): 0.759
Kurtosis: 2.774 Cond. No. 4.37


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.607\n", "Model: OLS Adj. R-squared: 0.605\n", "Method: Least Squares F-statistic: 302.8\n", "Date: Wed, 26 Sep 2018 Prob (F-statistic): 1.29e-41\n", "Time: 16:15:35 Log-Likelihood: -514.27\n", "No. Observations: 198 AIC: 1033.\n", "Df Residuals: 196 BIC: 1039.\n", "Df Model: 1 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 7.0306 0.462 15.219 0.000 6.120 7.942\n", "TV 13.9111 0.799 17.400 0.000 12.334 15.488\n", "==============================================================================\n", "Omnibus: 0.404 Durbin-Watson: 1.872\n", "Prob(Omnibus): 0.817 Jarque-Bera (JB): 0.551\n", "Skew: -0.062 Prob(JB): 0.759\n", "Kurtosis: 2.774 Cond. No. 4.37\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm_2 = smf.ols(formula='Sales ~ TV ', data=df).fit()\n", "lm_2.summary()" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: Sales R-squared: 0.048
Model: OLS Adj. R-squared: 0.043
Method: Least Squares F-statistic: 9.927
Date: Wed, 26 Sep 2018 Prob (F-statistic): 0.00188
Time: 16:15:57 Log-Likelihood: -601.84
No. Observations: 198 AIC: 1208.
Df Residuals: 196 BIC: 1214.
Df Model: 1
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 12.3193 0.639 19.274 0.000 11.059 13.580
Newspaper 4.9882 1.583 3.151 0.002 1.866 8.111
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 5.835 Durbin-Watson: 1.916
Prob(Omnibus): 0.054 Jarque-Bera (JB): 5.303
Skew: 0.333 Prob(JB): 0.0706
Kurtosis: 2.555 Cond. No. 4.89


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.048\n", "Model: OLS Adj. R-squared: 0.043\n", "Method: Least Squares F-statistic: 9.927\n", "Date: Wed, 26 Sep 2018 Prob (F-statistic): 0.00188\n", "Time: 16:15:57 Log-Likelihood: -601.84\n", "No. Observations: 198 AIC: 1208.\n", "Df Residuals: 196 BIC: 1214.\n", "Df Model: 1 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 12.3193 0.639 19.274 0.000 11.059 13.580\n", "Newspaper 4.9882 1.583 3.151 0.002 1.866 8.111\n", "==============================================================================\n", "Omnibus: 5.835 Durbin-Watson: 1.916\n", "Prob(Omnibus): 0.054 Jarque-Bera (JB): 5.303\n", "Skew: 0.333 Prob(JB): 0.0706\n", "Kurtosis: 2.555 Cond. No. 4.89\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm_3 = smf.ols(formula='Sales ~ Newspaper ', data=df).fit()\n", "lm_3.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Dans le modèle Sales~TV, l'intercept représente le gain même sans dépenser de l'argent à la publicité télévisée." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Part 3" ] }, { "cell_type": "code", "execution_count": 94, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: Sales R-squared: 0.895
Model: OLS Adj. R-squared: 0.894
Method: Least Squares F-statistic: 553.5
Date: Wed, 26 Sep 2018 Prob (F-statistic): 8.35e-95
Time: 16:18:24 Log-Likelihood: -383.24
No. Observations: 198 AIC: 774.5
Df Residuals: 194 BIC: 787.6
Df Model: 3
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 2.9523 0.318 9.280 0.000 2.325 3.580
Radio 9.3521 0.430 21.772 0.000 8.505 10.199
TV 13.4147 0.415 32.293 0.000 12.595 14.234
Newspaper -0.1053 0.563 -0.187 0.852 -1.215 1.005
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 59.593 Durbin-Watson: 2.041
Prob(Omnibus): 0.000 Jarque-Bera (JB): 147.654
Skew: -1.324 Prob(JB): 8.66e-33
Kurtosis: 6.299 Cond. No. 6.35


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.895\n", "Model: OLS Adj. R-squared: 0.894\n", "Method: Least Squares F-statistic: 553.5\n", "Date: Wed, 26 Sep 2018 Prob (F-statistic): 8.35e-95\n", "Time: 16:18:24 Log-Likelihood: -383.24\n", "No. Observations: 198 AIC: 774.5\n", "Df Residuals: 194 BIC: 787.6\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 2.9523 0.318 9.280 0.000 2.325 3.580\n", "Radio 9.3521 0.430 21.772 0.000 8.505 10.199\n", "TV 13.4147 0.415 32.293 0.000 12.595 14.234\n", "Newspaper -0.1053 0.563 -0.187 0.852 -1.215 1.005\n", "==============================================================================\n", "Omnibus: 59.593 Durbin-Watson: 2.041\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 147.654\n", "Skew: -1.324 Prob(JB): 8.66e-33\n", "Kurtosis: 6.299 Cond. No. 6.35\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# modele multi variables\n", "\n", "lm_4 = smf.ols(formula='Sales ~ Radio + TV + Newspaper', data=df).fit()\n", "lm_4.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Si on augmente de 50 les sommes allouées au média TV, les ventes augmentent de 50 fois le coef de TV dans le modèle." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Le coef de la variable Newspaper est presuqe nul vu que cette dernière n'est pas une variable significative.\n", "# Le coef de la variable Newspaper est devenue négatif, parce qu'elle diminue les ventes quand elle est incluse dans \n", "# le même modèle avec TV et Radio" ] }, { "cell_type": "code", "execution_count": 95, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: Sales R-squared: 0.895
Model: OLS Adj. R-squared: 0.894
Method: Least Squares F-statistic: 834.4
Date: Wed, 26 Sep 2018 Prob (F-statistic): 2.60e-96
Time: 16:24:40 Log-Likelihood: -383.26
No. Observations: 198 AIC: 772.5
Df Residuals: 195 BIC: 782.4
Df Model: 2
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 2.9315 0.297 9.861 0.000 2.345 3.518
Radio 9.3244 0.402 23.182 0.000 8.531 10.118
TV 13.4120 0.414 32.385 0.000 12.595 14.229
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 59.228 Durbin-Watson: 2.038
Prob(Omnibus): 0.000 Jarque-Bera (JB): 145.127
Skew: -1.321 Prob(JB): 3.06e-32
Kurtosis: 6.257 Cond. No. 4.97


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.895\n", "Model: OLS Adj. R-squared: 0.894\n", "Method: Least Squares F-statistic: 834.4\n", "Date: Wed, 26 Sep 2018 Prob (F-statistic): 2.60e-96\n", "Time: 16:24:40 Log-Likelihood: -383.26\n", "No. Observations: 198 AIC: 772.5\n", "Df Residuals: 195 BIC: 782.4\n", "Df Model: 2 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 2.9315 0.297 9.861 0.000 2.345 3.518\n", "Radio 9.3244 0.402 23.182 0.000 8.531 10.118\n", "TV 13.4120 0.414 32.385 0.000 12.595 14.229\n", "==============================================================================\n", "Omnibus: 59.228 Durbin-Watson: 2.038\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 145.127\n", "Skew: -1.321 Prob(JB): 3.06e-32\n", "Kurtosis: 6.257 Cond. No. 4.97\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Le modèle sans Newspaper:\n", "\n", "lm_5 = smf.ols(formula='Sales ~ Radio + TV', data=df).fit()\n", "lm_5.summary()" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.8100991768174137\n", "2.8106062513305865\n" ] } ], "source": [ "mse_4 = mean_squared_error(df.Sales, lm_4.fittedvalues)\n", "mse_5 = mean_squared_error(df.Sales, lm_5.fittedvalues)\n", "print(mse_4)\n", "print(mse_5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Même si on a enlevé la variable Newspaper le modèle ne s'améliore pas, il est pratiquement le même." ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "# Part 4:" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ASUS N752V\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] } ], "source": [ "# Ajouter la nouvelle variable tv_radio:\n", "\n", "df['TV_Radio'] = df.TV * df.Radio" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TVRadioNewspaperSalesTV_Radio
00.7837190.7620970.77404922.10.597270
10.1515670.7923390.50447410.40.120092
20.0585830.9254030.7751689.30.054213
30.5160080.8326610.65436218.50.429660
40.6158040.2177420.65324412.90.134086
50.0296320.9858870.8389267.20.029214
60.1958450.6612900.26286411.80.129510
70.4094010.3951610.12975413.20.161779
80.0292920.0423390.0111864.80.001240
90.6805180.0524190.23713610.60.035672
100.2251360.1169350.2706948.60.026326
110.7312670.4838710.04474317.40.353839
120.0810630.7076610.7371369.20.057365
130.3320840.1532260.0805379.70.050884
140.6951630.6633060.51454119.00.461106
150.6655310.9616940.59172322.40.640037
170.9584470.7983870.62416124.40.765212
180.2356950.4133060.20469811.30.097414
190.5017030.4818550.21364714.60.241748
200.7438690.5584680.59731518.00.415427
210.8085830.1028230.26286412.50.083141
220.0449590.3205650.5548105.60.014412
230.7775890.3407260.29306515.50.264944
240.2121930.2540320.2046989.70.053904
250.8954360.0705650.21812112.00.063186
260.4867170.5907260.14094015.00.287516
270.8177790.3366940.25615215.90.275341
280.8474110.5463710.25615218.90.463001
290.2404630.3225810.45637610.50.077569
300.9976160.5705650.48322121.40.569204
..................
1700.1703000.2338710.2058178.40.039828
1710.5602860.4213710.53020114.50.236088
1720.0667570.4052420.1901577.60.027053
1730.5735690.1431450.14317711.70.082104
1740.7574930.0685480.14653211.50.051925
1750.9431200.9858870.46756227.00.929810
1760.8460490.6088710.22706920.20.515135
1770.5797000.1572580.39373611.70.091163
1780.9424390.0463710.26510111.80.043702
1790.5640330.2016130.19686812.60.113716
1800.5333790.0524190.09284110.50.027959
1810.7442100.1088710.30648812.20.081023
1820.1914170.1149190.3322158.70.021998
1830.9795640.8669350.80313226.20.849219
1840.8644410.4294350.33557017.60.371222
1850.6982290.9092740.21923922.60.634882
1860.4751360.0423390.29753910.30.020117
1870.6508860.5786290.20357917.30.376621
1880.9741140.2802420.04138715.90.272988
1890.0636920.2439520.2617456.70.015538
1900.1345370.8286290.06487710.80.111481
1910.2571530.2177420.0671149.90.055993
1920.0585830.0826610.3534685.90.004843
1930.5681200.8467740.04026819.60.481069
1940.5098770.7177420.06711417.30.365960
1950.1301090.0745970.1543627.60.009706
1960.3208450.0987900.0906049.70.031696
1970.6028610.1875000.07158812.80.113036
1980.9659400.8467740.74049225.50.817933
1990.7905310.1733870.09731513.40.137068
\n", "

198 rows × 5 columns

\n", "
" ], "text/plain": [ " TV Radio Newspaper Sales TV_Radio\n", "0 0.783719 0.762097 0.774049 22.1 0.597270\n", "1 0.151567 0.792339 0.504474 10.4 0.120092\n", "2 0.058583 0.925403 0.775168 9.3 0.054213\n", "3 0.516008 0.832661 0.654362 18.5 0.429660\n", "4 0.615804 0.217742 0.653244 12.9 0.134086\n", "5 0.029632 0.985887 0.838926 7.2 0.029214\n", "6 0.195845 0.661290 0.262864 11.8 0.129510\n", "7 0.409401 0.395161 0.129754 13.2 0.161779\n", "8 0.029292 0.042339 0.011186 4.8 0.001240\n", "9 0.680518 0.052419 0.237136 10.6 0.035672\n", "10 0.225136 0.116935 0.270694 8.6 0.026326\n", "11 0.731267 0.483871 0.044743 17.4 0.353839\n", "12 0.081063 0.707661 0.737136 9.2 0.057365\n", "13 0.332084 0.153226 0.080537 9.7 0.050884\n", "14 0.695163 0.663306 0.514541 19.0 0.461106\n", "15 0.665531 0.961694 0.591723 22.4 0.640037\n", "17 0.958447 0.798387 0.624161 24.4 0.765212\n", "18 0.235695 0.413306 0.204698 11.3 0.097414\n", "19 0.501703 0.481855 0.213647 14.6 0.241748\n", "20 0.743869 0.558468 0.597315 18.0 0.415427\n", "21 0.808583 0.102823 0.262864 12.5 0.083141\n", "22 0.044959 0.320565 0.554810 5.6 0.014412\n", "23 0.777589 0.340726 0.293065 15.5 0.264944\n", "24 0.212193 0.254032 0.204698 9.7 0.053904\n", "25 0.895436 0.070565 0.218121 12.0 0.063186\n", "26 0.486717 0.590726 0.140940 15.0 0.287516\n", "27 0.817779 0.336694 0.256152 15.9 0.275341\n", "28 0.847411 0.546371 0.256152 18.9 0.463001\n", "29 0.240463 0.322581 0.456376 10.5 0.077569\n", "30 0.997616 0.570565 0.483221 21.4 0.569204\n", ".. ... ... ... ... ...\n", "170 0.170300 0.233871 0.205817 8.4 0.039828\n", "171 0.560286 0.421371 0.530201 14.5 0.236088\n", "172 0.066757 0.405242 0.190157 7.6 0.027053\n", "173 0.573569 0.143145 0.143177 11.7 0.082104\n", "174 0.757493 0.068548 0.146532 11.5 0.051925\n", "175 0.943120 0.985887 0.467562 27.0 0.929810\n", "176 0.846049 0.608871 0.227069 20.2 0.515135\n", "177 0.579700 0.157258 0.393736 11.7 0.091163\n", "178 0.942439 0.046371 0.265101 11.8 0.043702\n", "179 0.564033 0.201613 0.196868 12.6 0.113716\n", "180 0.533379 0.052419 0.092841 10.5 0.027959\n", "181 0.744210 0.108871 0.306488 12.2 0.081023\n", "182 0.191417 0.114919 0.332215 8.7 0.021998\n", "183 0.979564 0.866935 0.803132 26.2 0.849219\n", "184 0.864441 0.429435 0.335570 17.6 0.371222\n", "185 0.698229 0.909274 0.219239 22.6 0.634882\n", "186 0.475136 0.042339 0.297539 10.3 0.020117\n", "187 0.650886 0.578629 0.203579 17.3 0.376621\n", "188 0.974114 0.280242 0.041387 15.9 0.272988\n", "189 0.063692 0.243952 0.261745 6.7 0.015538\n", "190 0.134537 0.828629 0.064877 10.8 0.111481\n", "191 0.257153 0.217742 0.067114 9.9 0.055993\n", "192 0.058583 0.082661 0.353468 5.9 0.004843\n", "193 0.568120 0.846774 0.040268 19.6 0.481069\n", "194 0.509877 0.717742 0.067114 17.3 0.365960\n", "195 0.130109 0.074597 0.154362 7.6 0.009706\n", "196 0.320845 0.098790 0.090604 9.7 0.031696\n", "197 0.602861 0.187500 0.071588 12.8 0.113036\n", "198 0.965940 0.846774 0.740492 25.5 0.817933\n", "199 0.790531 0.173387 0.097315 13.4 0.137068\n", "\n", "[198 rows x 5 columns]" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 100, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: Sales R-squared: 0.968
Model: OLS Adj. R-squared: 0.967
Method: Least Squares F-statistic: 1934.
Date: Wed, 26 Sep 2018 Prob (F-statistic): 3.19e-144
Time: 16:41:33 Log-Likelihood: -267.07
No. Observations: 198 AIC: 542.1
Df Residuals: 194 BIC: 555.3
Df Model: 3
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 6.7577 0.247 27.304 0.000 6.270 7.246
Radio 1.3688 0.443 3.089 0.002 0.495 2.243
TV 5.5919 0.441 12.682 0.000 4.722 6.462
TV_Radio 15.9617 0.767 20.817 0.000 14.449 17.474
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 126.182 Durbin-Watson: 2.241
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1151.060
Skew: -2.306 Prob(JB): 1.12e-250
Kurtosis: 13.875 Cond. No. 18.1


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.968\n", "Model: OLS Adj. R-squared: 0.967\n", "Method: Least Squares F-statistic: 1934.\n", "Date: Wed, 26 Sep 2018 Prob (F-statistic): 3.19e-144\n", "Time: 16:41:33 Log-Likelihood: -267.07\n", "No. Observations: 198 AIC: 542.1\n", "Df Residuals: 194 BIC: 555.3\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 6.7577 0.247 27.304 0.000 6.270 7.246\n", "Radio 1.3688 0.443 3.089 0.002 0.495 2.243\n", "TV 5.5919 0.441 12.682 0.000 4.722 6.462\n", "TV_Radio 15.9617 0.767 20.817 0.000 14.449 17.474\n", "==============================================================================\n", "Omnibus: 126.182 Durbin-Watson: 2.241\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1151.060\n", "Skew: -2.306 Prob(JB): 1.12e-250\n", "Kurtosis: 13.875 Cond. No. 18.1\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Rajoutez au modèle la variable multiplicative:\n", "\n", "lm_6 = smf.ols(formula='Sales ~ Radio + TV + TV_Radio', data=df).fit()\n", "lm_6.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# On peut expliquer que cette variable ait une grande influence sur le modèle par le fait d'investir sur les deux à la fois\n", "# et en même temps est mieux qu'investir sur l'un des deux ou les deux séparément.\n", "\n", "# C'est à dire si la personne voit la publication à la fois à la télé et au radio, ceci augmente les chances de ventes." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }