{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Part 1:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Loader la data\n",
"\n",
"import pandas as pd\n",
"import statsmodels.formula.api as smf\n",
"\n",
"df = pd.read_csv('advertising.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Description de la data\n",
"\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"fig, ax = plt.subplots(1,1)\n",
"df['Radio'].hist(bins = 100)\n",
"plt.title('Radio')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# La distribution des variables (Histogrammes)\n",
"\n",
"df.hist(bins = 100)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Détection des outliers (Boxplots)\n",
"\n",
"df.boxplot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Détecter les excates outliers\n",
"\n",
"import numpy as np\n",
"\n",
"np.percentile(df.Newspaper, 99.5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Supprimer les outliers\n",
"\n",
"condition = df['Newspaper'] < 100\n",
"df[condition].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = df[condition]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Vérification:\n",
"\n",
"df.boxplot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Corrélation\n",
"\n",
"import seaborn as sns\n",
"corr = df.corr()\n",
"fig, ax = plt.subplots(1,1, figsize = (9,9))\n",
"sns.heatmap(corr,\n",
" xticklabels=corr.columns.values,\n",
" yticklabels=corr.columns.values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Scatterplot de chaque couple de variables\n",
"\n",
"for i in range (-1,3):\n",
" figure = plt.figure()\n",
" plt.scatter(df.iloc[:,i], df.iloc[:,i+1])\n",
" plt.xlabel('{}'.format(df.columns[i]))\n",
" plt.ylabel('{}'.format(df.columns[i+1])) \n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"for i in range (-1,1):\n",
" figure = plt.figure()\n",
" plt.scatter(df.iloc[:,i], df.iloc[:,i+2])\n",
" plt.xlabel('{}'.format(df.columns[i]))\n",
" plt.ylabel('{}'.format(df.columns[i+2]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Part 2:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Linear Regression pour les 3 variables:\n",
"\n",
"lm = smf.ols(formula='Sales ~ Radio ', data=df).fit()\n",
"lm.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lm.pvalues"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"lm_2 = smf.ols(formula='Sales ~ TV ', data=df).fit()\n",
"lm_2.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"lm_3 = smf.ols(formula='Sales ~ Newspaper ', data=df).fit()\n",
"lm_3.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Le coefficient de la regression Sales ~ Radio est plus grand que le coefficient de la regression Sales ~ TV\n",
"# alors que TV est plus corrélé à Sales que Radio, ceci revient au fait que les 2 variables TV et Radio ne sont pas\n",
"# normées de la même façon !"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# calcul de la MSE\n",
"\n",
"from sklearn.metrics import mean_squared_error\n",
"mse = mean_squared_error(df.Sales, lm.fittedvalues)\n",
"mse"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mse_2 = mean_squared_error(df.Sales, lm_2.fittedvalues)\n",
"mse_2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mse_3 = mean_squared_error(df.Sales, lm_3.fittedvalues)\n",
"mse_3"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" TV | \n",
" Radio | \n",
" Newspaper | \n",
" Sales | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 198.000000 | \n",
" 198.000000 | \n",
" 198.000000 | \n",
" 198.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.499620 | \n",
" 23.130808 | \n",
" 29.777273 | \n",
" 13.980808 | \n",
"
\n",
" \n",
" std | \n",
" 0.291019 | \n",
" 14.862111 | \n",
" 20.446303 | \n",
" 5.196097 | \n",
"
\n",
" \n",
" min | \n",
" 0.002384 | \n",
" 0.000000 | \n",
" 0.300000 | \n",
" 1.600000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.254768 | \n",
" 9.925000 | \n",
" 12.650000 | \n",
" 10.325000 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.510048 | \n",
" 22.400000 | \n",
" 25.600000 | \n",
" 12.900000 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.744125 | \n",
" 36.325000 | \n",
" 44.050000 | \n",
" 17.375000 | \n",
"
\n",
" \n",
" max | \n",
" 1.000000 | \n",
" 49.600000 | \n",
" 89.400000 | \n",
" 27.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" TV Radio Newspaper Sales\n",
"count 198.000000 198.000000 198.000000 198.000000\n",
"mean 0.499620 23.130808 29.777273 13.980808\n",
"std 0.291019 14.862111 20.446303 5.196097\n",
"min 0.002384 0.000000 0.300000 1.600000\n",
"25% 0.254768 9.925000 12.650000 10.325000\n",
"50% 0.510048 22.400000 25.600000 12.900000\n",
"75% 0.744125 36.325000 44.050000 17.375000\n",
"max 1.000000 49.600000 89.400000 27.000000"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\ASUS N752V\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" \"\"\"Entry point for launching an IPython kernel.\n",
"C:\\Users\\ASUS N752V\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" \n",
"C:\\Users\\ASUS N752V\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n"
]
}
],
"source": [
"# Normalisation des variables:\n",
"# modifier les données pour que le coefficient de la regression linéaire \n",
"# reflete l'importance de la variable par rapport aux autres.\n",
"\n",
"df['TV'] = df['TV']/df.TV.max()\n",
"df['Radio'] = df['Radio']/df.Radio.max()\n",
"df['Newspaper'] = df['Newspaper']/df.Newspaper.max()"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" TV | \n",
" Radio | \n",
" Newspaper | \n",
" Sales | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.783719 | \n",
" 0.762097 | \n",
" 0.774049 | \n",
" 22.1 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.151567 | \n",
" 0.792339 | \n",
" 0.504474 | \n",
" 10.4 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.058583 | \n",
" 0.925403 | \n",
" 0.775168 | \n",
" 9.3 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.516008 | \n",
" 0.832661 | \n",
" 0.654362 | \n",
" 18.5 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.615804 | \n",
" 0.217742 | \n",
" 0.653244 | \n",
" 12.9 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.029632 | \n",
" 0.985887 | \n",
" 0.838926 | \n",
" 7.2 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.195845 | \n",
" 0.661290 | \n",
" 0.262864 | \n",
" 11.8 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.409401 | \n",
" 0.395161 | \n",
" 0.129754 | \n",
" 13.2 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.029292 | \n",
" 0.042339 | \n",
" 0.011186 | \n",
" 4.8 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.680518 | \n",
" 0.052419 | \n",
" 0.237136 | \n",
" 10.6 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.225136 | \n",
" 0.116935 | \n",
" 0.270694 | \n",
" 8.6 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.731267 | \n",
" 0.483871 | \n",
" 0.044743 | \n",
" 17.4 | \n",
"
\n",
" \n",
" 12 | \n",
" 0.081063 | \n",
" 0.707661 | \n",
" 0.737136 | \n",
" 9.2 | \n",
"
\n",
" \n",
" 13 | \n",
" 0.332084 | \n",
" 0.153226 | \n",
" 0.080537 | \n",
" 9.7 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.695163 | \n",
" 0.663306 | \n",
" 0.514541 | \n",
" 19.0 | \n",
"
\n",
" \n",
" 15 | \n",
" 0.665531 | \n",
" 0.961694 | \n",
" 0.591723 | \n",
" 22.4 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.958447 | \n",
" 0.798387 | \n",
" 0.624161 | \n",
" 24.4 | \n",
"
\n",
" \n",
" 18 | \n",
" 0.235695 | \n",
" 0.413306 | \n",
" 0.204698 | \n",
" 11.3 | \n",
"
\n",
" \n",
" 19 | \n",
" 0.501703 | \n",
" 0.481855 | \n",
" 0.213647 | \n",
" 14.6 | \n",
"
\n",
" \n",
" 20 | \n",
" 0.743869 | \n",
" 0.558468 | \n",
" 0.597315 | \n",
" 18.0 | \n",
"
\n",
" \n",
" 21 | \n",
" 0.808583 | \n",
" 0.102823 | \n",
" 0.262864 | \n",
" 12.5 | \n",
"
\n",
" \n",
" 22 | \n",
" 0.044959 | \n",
" 0.320565 | \n",
" 0.554810 | \n",
" 5.6 | \n",
"
\n",
" \n",
" 23 | \n",
" 0.777589 | \n",
" 0.340726 | \n",
" 0.293065 | \n",
" 15.5 | \n",
"
\n",
" \n",
" 24 | \n",
" 0.212193 | \n",
" 0.254032 | \n",
" 0.204698 | \n",
" 9.7 | \n",
"
\n",
" \n",
" 25 | \n",
" 0.895436 | \n",
" 0.070565 | \n",
" 0.218121 | \n",
" 12.0 | \n",
"
\n",
" \n",
" 26 | \n",
" 0.486717 | \n",
" 0.590726 | \n",
" 0.140940 | \n",
" 15.0 | \n",
"
\n",
" \n",
" 27 | \n",
" 0.817779 | \n",
" 0.336694 | \n",
" 0.256152 | \n",
" 15.9 | \n",
"
\n",
" \n",
" 28 | \n",
" 0.847411 | \n",
" 0.546371 | \n",
" 0.256152 | \n",
" 18.9 | \n",
"
\n",
" \n",
" 29 | \n",
" 0.240463 | \n",
" 0.322581 | \n",
" 0.456376 | \n",
" 10.5 | \n",
"
\n",
" \n",
" 30 | \n",
" 0.997616 | \n",
" 0.570565 | \n",
" 0.483221 | \n",
" 21.4 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 170 | \n",
" 0.170300 | \n",
" 0.233871 | \n",
" 0.205817 | \n",
" 8.4 | \n",
"
\n",
" \n",
" 171 | \n",
" 0.560286 | \n",
" 0.421371 | \n",
" 0.530201 | \n",
" 14.5 | \n",
"
\n",
" \n",
" 172 | \n",
" 0.066757 | \n",
" 0.405242 | \n",
" 0.190157 | \n",
" 7.6 | \n",
"
\n",
" \n",
" 173 | \n",
" 0.573569 | \n",
" 0.143145 | \n",
" 0.143177 | \n",
" 11.7 | \n",
"
\n",
" \n",
" 174 | \n",
" 0.757493 | \n",
" 0.068548 | \n",
" 0.146532 | \n",
" 11.5 | \n",
"
\n",
" \n",
" 175 | \n",
" 0.943120 | \n",
" 0.985887 | \n",
" 0.467562 | \n",
" 27.0 | \n",
"
\n",
" \n",
" 176 | \n",
" 0.846049 | \n",
" 0.608871 | \n",
" 0.227069 | \n",
" 20.2 | \n",
"
\n",
" \n",
" 177 | \n",
" 0.579700 | \n",
" 0.157258 | \n",
" 0.393736 | \n",
" 11.7 | \n",
"
\n",
" \n",
" 178 | \n",
" 0.942439 | \n",
" 0.046371 | \n",
" 0.265101 | \n",
" 11.8 | \n",
"
\n",
" \n",
" 179 | \n",
" 0.564033 | \n",
" 0.201613 | \n",
" 0.196868 | \n",
" 12.6 | \n",
"
\n",
" \n",
" 180 | \n",
" 0.533379 | \n",
" 0.052419 | \n",
" 0.092841 | \n",
" 10.5 | \n",
"
\n",
" \n",
" 181 | \n",
" 0.744210 | \n",
" 0.108871 | \n",
" 0.306488 | \n",
" 12.2 | \n",
"
\n",
" \n",
" 182 | \n",
" 0.191417 | \n",
" 0.114919 | \n",
" 0.332215 | \n",
" 8.7 | \n",
"
\n",
" \n",
" 183 | \n",
" 0.979564 | \n",
" 0.866935 | \n",
" 0.803132 | \n",
" 26.2 | \n",
"
\n",
" \n",
" 184 | \n",
" 0.864441 | \n",
" 0.429435 | \n",
" 0.335570 | \n",
" 17.6 | \n",
"
\n",
" \n",
" 185 | \n",
" 0.698229 | \n",
" 0.909274 | \n",
" 0.219239 | \n",
" 22.6 | \n",
"
\n",
" \n",
" 186 | \n",
" 0.475136 | \n",
" 0.042339 | \n",
" 0.297539 | \n",
" 10.3 | \n",
"
\n",
" \n",
" 187 | \n",
" 0.650886 | \n",
" 0.578629 | \n",
" 0.203579 | \n",
" 17.3 | \n",
"
\n",
" \n",
" 188 | \n",
" 0.974114 | \n",
" 0.280242 | \n",
" 0.041387 | \n",
" 15.9 | \n",
"
\n",
" \n",
" 189 | \n",
" 0.063692 | \n",
" 0.243952 | \n",
" 0.261745 | \n",
" 6.7 | \n",
"
\n",
" \n",
" 190 | \n",
" 0.134537 | \n",
" 0.828629 | \n",
" 0.064877 | \n",
" 10.8 | \n",
"
\n",
" \n",
" 191 | \n",
" 0.257153 | \n",
" 0.217742 | \n",
" 0.067114 | \n",
" 9.9 | \n",
"
\n",
" \n",
" 192 | \n",
" 0.058583 | \n",
" 0.082661 | \n",
" 0.353468 | \n",
" 5.9 | \n",
"
\n",
" \n",
" 193 | \n",
" 0.568120 | \n",
" 0.846774 | \n",
" 0.040268 | \n",
" 19.6 | \n",
"
\n",
" \n",
" 194 | \n",
" 0.509877 | \n",
" 0.717742 | \n",
" 0.067114 | \n",
" 17.3 | \n",
"
\n",
" \n",
" 195 | \n",
" 0.130109 | \n",
" 0.074597 | \n",
" 0.154362 | \n",
" 7.6 | \n",
"
\n",
" \n",
" 196 | \n",
" 0.320845 | \n",
" 0.098790 | \n",
" 0.090604 | \n",
" 9.7 | \n",
"
\n",
" \n",
" 197 | \n",
" 0.602861 | \n",
" 0.187500 | \n",
" 0.071588 | \n",
" 12.8 | \n",
"
\n",
" \n",
" 198 | \n",
" 0.965940 | \n",
" 0.846774 | \n",
" 0.740492 | \n",
" 25.5 | \n",
"
\n",
" \n",
" 199 | \n",
" 0.790531 | \n",
" 0.173387 | \n",
" 0.097315 | \n",
" 13.4 | \n",
"
\n",
" \n",
"
\n",
"
198 rows × 4 columns
\n",
"
"
],
"text/plain": [
" TV Radio Newspaper Sales\n",
"0 0.783719 0.762097 0.774049 22.1\n",
"1 0.151567 0.792339 0.504474 10.4\n",
"2 0.058583 0.925403 0.775168 9.3\n",
"3 0.516008 0.832661 0.654362 18.5\n",
"4 0.615804 0.217742 0.653244 12.9\n",
"5 0.029632 0.985887 0.838926 7.2\n",
"6 0.195845 0.661290 0.262864 11.8\n",
"7 0.409401 0.395161 0.129754 13.2\n",
"8 0.029292 0.042339 0.011186 4.8\n",
"9 0.680518 0.052419 0.237136 10.6\n",
"10 0.225136 0.116935 0.270694 8.6\n",
"11 0.731267 0.483871 0.044743 17.4\n",
"12 0.081063 0.707661 0.737136 9.2\n",
"13 0.332084 0.153226 0.080537 9.7\n",
"14 0.695163 0.663306 0.514541 19.0\n",
"15 0.665531 0.961694 0.591723 22.4\n",
"17 0.958447 0.798387 0.624161 24.4\n",
"18 0.235695 0.413306 0.204698 11.3\n",
"19 0.501703 0.481855 0.213647 14.6\n",
"20 0.743869 0.558468 0.597315 18.0\n",
"21 0.808583 0.102823 0.262864 12.5\n",
"22 0.044959 0.320565 0.554810 5.6\n",
"23 0.777589 0.340726 0.293065 15.5\n",
"24 0.212193 0.254032 0.204698 9.7\n",
"25 0.895436 0.070565 0.218121 12.0\n",
"26 0.486717 0.590726 0.140940 15.0\n",
"27 0.817779 0.336694 0.256152 15.9\n",
"28 0.847411 0.546371 0.256152 18.9\n",
"29 0.240463 0.322581 0.456376 10.5\n",
"30 0.997616 0.570565 0.483221 21.4\n",
".. ... ... ... ...\n",
"170 0.170300 0.233871 0.205817 8.4\n",
"171 0.560286 0.421371 0.530201 14.5\n",
"172 0.066757 0.405242 0.190157 7.6\n",
"173 0.573569 0.143145 0.143177 11.7\n",
"174 0.757493 0.068548 0.146532 11.5\n",
"175 0.943120 0.985887 0.467562 27.0\n",
"176 0.846049 0.608871 0.227069 20.2\n",
"177 0.579700 0.157258 0.393736 11.7\n",
"178 0.942439 0.046371 0.265101 11.8\n",
"179 0.564033 0.201613 0.196868 12.6\n",
"180 0.533379 0.052419 0.092841 10.5\n",
"181 0.744210 0.108871 0.306488 12.2\n",
"182 0.191417 0.114919 0.332215 8.7\n",
"183 0.979564 0.866935 0.803132 26.2\n",
"184 0.864441 0.429435 0.335570 17.6\n",
"185 0.698229 0.909274 0.219239 22.6\n",
"186 0.475136 0.042339 0.297539 10.3\n",
"187 0.650886 0.578629 0.203579 17.3\n",
"188 0.974114 0.280242 0.041387 15.9\n",
"189 0.063692 0.243952 0.261745 6.7\n",
"190 0.134537 0.828629 0.064877 10.8\n",
"191 0.257153 0.217742 0.067114 9.9\n",
"192 0.058583 0.082661 0.353468 5.9\n",
"193 0.568120 0.846774 0.040268 19.6\n",
"194 0.509877 0.717742 0.067114 17.3\n",
"195 0.130109 0.074597 0.154362 7.6\n",
"196 0.320845 0.098790 0.090604 9.7\n",
"197 0.602861 0.187500 0.071588 12.8\n",
"198 0.965940 0.846774 0.740492 25.5\n",
"199 0.790531 0.173387 0.097315 13.4\n",
"\n",
"[198 rows x 4 columns]"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | Sales | R-squared: | 0.333 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.329 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 97.69 | \n",
"
\n",
"\n",
" Date: | Wed, 26 Sep 2018 | Prob (F-statistic): | 5.99e-19 | \n",
"
\n",
"\n",
" Time: | 16:15:16 | Log-Likelihood: | -566.70 | \n",
"
\n",
"\n",
" No. Observations: | 198 | AIC: | 1137. | \n",
"
\n",
"\n",
" Df Residuals: | 196 | BIC: | 1144. | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 9.3166 | 0.560 | 16.622 | 0.000 | 8.211 | 10.422 | \n",
"
\n",
"\n",
" Radio | 10.0015 | 1.012 | 9.884 | 0.000 | 8.006 | 11.997 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 20.193 | Durbin-Watson: | 1.923 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 23.115 | \n",
"
\n",
"\n",
" Skew: | -0.785 | Prob(JB): | 9.56e-06 | \n",
"
\n",
"\n",
" Kurtosis: | 3.582 | Cond. No. | 4.13 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Sales R-squared: 0.333\n",
"Model: OLS Adj. R-squared: 0.329\n",
"Method: Least Squares F-statistic: 97.69\n",
"Date: Wed, 26 Sep 2018 Prob (F-statistic): 5.99e-19\n",
"Time: 16:15:16 Log-Likelihood: -566.70\n",
"No. Observations: 198 AIC: 1137.\n",
"Df Residuals: 196 BIC: 1144.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 9.3166 0.560 16.622 0.000 8.211 10.422\n",
"Radio 10.0015 1.012 9.884 0.000 8.006 11.997\n",
"==============================================================================\n",
"Omnibus: 20.193 Durbin-Watson: 1.923\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 23.115\n",
"Skew: -0.785 Prob(JB): 9.56e-06\n",
"Kurtosis: 3.582 Cond. No. 4.13\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lm = smf.ols(formula='Sales ~ Radio ', data=df).fit()\n",
"lm.summary()"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | Sales | R-squared: | 0.607 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.605 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 302.8 | \n",
"
\n",
"\n",
" Date: | Wed, 26 Sep 2018 | Prob (F-statistic): | 1.29e-41 | \n",
"
\n",
"\n",
" Time: | 16:15:35 | Log-Likelihood: | -514.27 | \n",
"
\n",
"\n",
" No. Observations: | 198 | AIC: | 1033. | \n",
"
\n",
"\n",
" Df Residuals: | 196 | BIC: | 1039. | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 7.0306 | 0.462 | 15.219 | 0.000 | 6.120 | 7.942 | \n",
"
\n",
"\n",
" TV | 13.9111 | 0.799 | 17.400 | 0.000 | 12.334 | 15.488 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 0.404 | Durbin-Watson: | 1.872 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.817 | Jarque-Bera (JB): | 0.551 | \n",
"
\n",
"\n",
" Skew: | -0.062 | Prob(JB): | 0.759 | \n",
"
\n",
"\n",
" Kurtosis: | 2.774 | Cond. No. | 4.37 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Sales R-squared: 0.607\n",
"Model: OLS Adj. R-squared: 0.605\n",
"Method: Least Squares F-statistic: 302.8\n",
"Date: Wed, 26 Sep 2018 Prob (F-statistic): 1.29e-41\n",
"Time: 16:15:35 Log-Likelihood: -514.27\n",
"No. Observations: 198 AIC: 1033.\n",
"Df Residuals: 196 BIC: 1039.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 7.0306 0.462 15.219 0.000 6.120 7.942\n",
"TV 13.9111 0.799 17.400 0.000 12.334 15.488\n",
"==============================================================================\n",
"Omnibus: 0.404 Durbin-Watson: 1.872\n",
"Prob(Omnibus): 0.817 Jarque-Bera (JB): 0.551\n",
"Skew: -0.062 Prob(JB): 0.759\n",
"Kurtosis: 2.774 Cond. No. 4.37\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lm_2 = smf.ols(formula='Sales ~ TV ', data=df).fit()\n",
"lm_2.summary()"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | Sales | R-squared: | 0.048 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.043 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 9.927 | \n",
"
\n",
"\n",
" Date: | Wed, 26 Sep 2018 | Prob (F-statistic): | 0.00188 | \n",
"
\n",
"\n",
" Time: | 16:15:57 | Log-Likelihood: | -601.84 | \n",
"
\n",
"\n",
" No. Observations: | 198 | AIC: | 1208. | \n",
"
\n",
"\n",
" Df Residuals: | 196 | BIC: | 1214. | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 12.3193 | 0.639 | 19.274 | 0.000 | 11.059 | 13.580 | \n",
"
\n",
"\n",
" Newspaper | 4.9882 | 1.583 | 3.151 | 0.002 | 1.866 | 8.111 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 5.835 | Durbin-Watson: | 1.916 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.054 | Jarque-Bera (JB): | 5.303 | \n",
"
\n",
"\n",
" Skew: | 0.333 | Prob(JB): | 0.0706 | \n",
"
\n",
"\n",
" Kurtosis: | 2.555 | Cond. No. | 4.89 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Sales R-squared: 0.048\n",
"Model: OLS Adj. R-squared: 0.043\n",
"Method: Least Squares F-statistic: 9.927\n",
"Date: Wed, 26 Sep 2018 Prob (F-statistic): 0.00188\n",
"Time: 16:15:57 Log-Likelihood: -601.84\n",
"No. Observations: 198 AIC: 1208.\n",
"Df Residuals: 196 BIC: 1214.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 12.3193 0.639 19.274 0.000 11.059 13.580\n",
"Newspaper 4.9882 1.583 3.151 0.002 1.866 8.111\n",
"==============================================================================\n",
"Omnibus: 5.835 Durbin-Watson: 1.916\n",
"Prob(Omnibus): 0.054 Jarque-Bera (JB): 5.303\n",
"Skew: 0.333 Prob(JB): 0.0706\n",
"Kurtosis: 2.555 Cond. No. 4.89\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lm_3 = smf.ols(formula='Sales ~ Newspaper ', data=df).fit()\n",
"lm_3.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Dans le modèle Sales~TV, l'intercept représente le gain même sans dépenser de l'argent à la publicité télévisée."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Part 3"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | Sales | R-squared: | 0.895 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.894 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 553.5 | \n",
"
\n",
"\n",
" Date: | Wed, 26 Sep 2018 | Prob (F-statistic): | 8.35e-95 | \n",
"
\n",
"\n",
" Time: | 16:18:24 | Log-Likelihood: | -383.24 | \n",
"
\n",
"\n",
" No. Observations: | 198 | AIC: | 774.5 | \n",
"
\n",
"\n",
" Df Residuals: | 194 | BIC: | 787.6 | \n",
"
\n",
"\n",
" Df Model: | 3 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 2.9523 | 0.318 | 9.280 | 0.000 | 2.325 | 3.580 | \n",
"
\n",
"\n",
" Radio | 9.3521 | 0.430 | 21.772 | 0.000 | 8.505 | 10.199 | \n",
"
\n",
"\n",
" TV | 13.4147 | 0.415 | 32.293 | 0.000 | 12.595 | 14.234 | \n",
"
\n",
"\n",
" Newspaper | -0.1053 | 0.563 | -0.187 | 0.852 | -1.215 | 1.005 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 59.593 | Durbin-Watson: | 2.041 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 147.654 | \n",
"
\n",
"\n",
" Skew: | -1.324 | Prob(JB): | 8.66e-33 | \n",
"
\n",
"\n",
" Kurtosis: | 6.299 | Cond. No. | 6.35 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Sales R-squared: 0.895\n",
"Model: OLS Adj. R-squared: 0.894\n",
"Method: Least Squares F-statistic: 553.5\n",
"Date: Wed, 26 Sep 2018 Prob (F-statistic): 8.35e-95\n",
"Time: 16:18:24 Log-Likelihood: -383.24\n",
"No. Observations: 198 AIC: 774.5\n",
"Df Residuals: 194 BIC: 787.6\n",
"Df Model: 3 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 2.9523 0.318 9.280 0.000 2.325 3.580\n",
"Radio 9.3521 0.430 21.772 0.000 8.505 10.199\n",
"TV 13.4147 0.415 32.293 0.000 12.595 14.234\n",
"Newspaper -0.1053 0.563 -0.187 0.852 -1.215 1.005\n",
"==============================================================================\n",
"Omnibus: 59.593 Durbin-Watson: 2.041\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 147.654\n",
"Skew: -1.324 Prob(JB): 8.66e-33\n",
"Kurtosis: 6.299 Cond. No. 6.35\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# modele multi variables\n",
"\n",
"lm_4 = smf.ols(formula='Sales ~ Radio + TV + Newspaper', data=df).fit()\n",
"lm_4.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Si on augmente de 50 les sommes allouées au média TV, les ventes augmentent de 50 fois le coef de TV dans le modèle."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Le coef de la variable Newspaper est presuqe nul vu que cette dernière n'est pas une variable significative.\n",
"# Le coef de la variable Newspaper est devenue négatif, parce qu'elle diminue les ventes quand elle est incluse dans \n",
"# le même modèle avec TV et Radio"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | Sales | R-squared: | 0.895 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.894 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 834.4 | \n",
"
\n",
"\n",
" Date: | Wed, 26 Sep 2018 | Prob (F-statistic): | 2.60e-96 | \n",
"
\n",
"\n",
" Time: | 16:24:40 | Log-Likelihood: | -383.26 | \n",
"
\n",
"\n",
" No. Observations: | 198 | AIC: | 772.5 | \n",
"
\n",
"\n",
" Df Residuals: | 195 | BIC: | 782.4 | \n",
"
\n",
"\n",
" Df Model: | 2 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 2.9315 | 0.297 | 9.861 | 0.000 | 2.345 | 3.518 | \n",
"
\n",
"\n",
" Radio | 9.3244 | 0.402 | 23.182 | 0.000 | 8.531 | 10.118 | \n",
"
\n",
"\n",
" TV | 13.4120 | 0.414 | 32.385 | 0.000 | 12.595 | 14.229 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 59.228 | Durbin-Watson: | 2.038 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 145.127 | \n",
"
\n",
"\n",
" Skew: | -1.321 | Prob(JB): | 3.06e-32 | \n",
"
\n",
"\n",
" Kurtosis: | 6.257 | Cond. No. | 4.97 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Sales R-squared: 0.895\n",
"Model: OLS Adj. R-squared: 0.894\n",
"Method: Least Squares F-statistic: 834.4\n",
"Date: Wed, 26 Sep 2018 Prob (F-statistic): 2.60e-96\n",
"Time: 16:24:40 Log-Likelihood: -383.26\n",
"No. Observations: 198 AIC: 772.5\n",
"Df Residuals: 195 BIC: 782.4\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 2.9315 0.297 9.861 0.000 2.345 3.518\n",
"Radio 9.3244 0.402 23.182 0.000 8.531 10.118\n",
"TV 13.4120 0.414 32.385 0.000 12.595 14.229\n",
"==============================================================================\n",
"Omnibus: 59.228 Durbin-Watson: 2.038\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 145.127\n",
"Skew: -1.321 Prob(JB): 3.06e-32\n",
"Kurtosis: 6.257 Cond. No. 4.97\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Le modèle sans Newspaper:\n",
"\n",
"lm_5 = smf.ols(formula='Sales ~ Radio + TV', data=df).fit()\n",
"lm_5.summary()"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.8100991768174137\n",
"2.8106062513305865\n"
]
}
],
"source": [
"mse_4 = mean_squared_error(df.Sales, lm_4.fittedvalues)\n",
"mse_5 = mean_squared_error(df.Sales, lm_5.fittedvalues)\n",
"print(mse_4)\n",
"print(mse_5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Même si on a enlevé la variable Newspaper le modèle ne s'améliore pas, il est pratiquement le même."
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"# Part 4:"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\ASUS N752V\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n"
]
}
],
"source": [
"# Ajouter la nouvelle variable tv_radio:\n",
"\n",
"df['TV_Radio'] = df.TV * df.Radio"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" TV | \n",
" Radio | \n",
" Newspaper | \n",
" Sales | \n",
" TV_Radio | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.783719 | \n",
" 0.762097 | \n",
" 0.774049 | \n",
" 22.1 | \n",
" 0.597270 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.151567 | \n",
" 0.792339 | \n",
" 0.504474 | \n",
" 10.4 | \n",
" 0.120092 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.058583 | \n",
" 0.925403 | \n",
" 0.775168 | \n",
" 9.3 | \n",
" 0.054213 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.516008 | \n",
" 0.832661 | \n",
" 0.654362 | \n",
" 18.5 | \n",
" 0.429660 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.615804 | \n",
" 0.217742 | \n",
" 0.653244 | \n",
" 12.9 | \n",
" 0.134086 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.029632 | \n",
" 0.985887 | \n",
" 0.838926 | \n",
" 7.2 | \n",
" 0.029214 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.195845 | \n",
" 0.661290 | \n",
" 0.262864 | \n",
" 11.8 | \n",
" 0.129510 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.409401 | \n",
" 0.395161 | \n",
" 0.129754 | \n",
" 13.2 | \n",
" 0.161779 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.029292 | \n",
" 0.042339 | \n",
" 0.011186 | \n",
" 4.8 | \n",
" 0.001240 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.680518 | \n",
" 0.052419 | \n",
" 0.237136 | \n",
" 10.6 | \n",
" 0.035672 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.225136 | \n",
" 0.116935 | \n",
" 0.270694 | \n",
" 8.6 | \n",
" 0.026326 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.731267 | \n",
" 0.483871 | \n",
" 0.044743 | \n",
" 17.4 | \n",
" 0.353839 | \n",
"
\n",
" \n",
" 12 | \n",
" 0.081063 | \n",
" 0.707661 | \n",
" 0.737136 | \n",
" 9.2 | \n",
" 0.057365 | \n",
"
\n",
" \n",
" 13 | \n",
" 0.332084 | \n",
" 0.153226 | \n",
" 0.080537 | \n",
" 9.7 | \n",
" 0.050884 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.695163 | \n",
" 0.663306 | \n",
" 0.514541 | \n",
" 19.0 | \n",
" 0.461106 | \n",
"
\n",
" \n",
" 15 | \n",
" 0.665531 | \n",
" 0.961694 | \n",
" 0.591723 | \n",
" 22.4 | \n",
" 0.640037 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.958447 | \n",
" 0.798387 | \n",
" 0.624161 | \n",
" 24.4 | \n",
" 0.765212 | \n",
"
\n",
" \n",
" 18 | \n",
" 0.235695 | \n",
" 0.413306 | \n",
" 0.204698 | \n",
" 11.3 | \n",
" 0.097414 | \n",
"
\n",
" \n",
" 19 | \n",
" 0.501703 | \n",
" 0.481855 | \n",
" 0.213647 | \n",
" 14.6 | \n",
" 0.241748 | \n",
"
\n",
" \n",
" 20 | \n",
" 0.743869 | \n",
" 0.558468 | \n",
" 0.597315 | \n",
" 18.0 | \n",
" 0.415427 | \n",
"
\n",
" \n",
" 21 | \n",
" 0.808583 | \n",
" 0.102823 | \n",
" 0.262864 | \n",
" 12.5 | \n",
" 0.083141 | \n",
"
\n",
" \n",
" 22 | \n",
" 0.044959 | \n",
" 0.320565 | \n",
" 0.554810 | \n",
" 5.6 | \n",
" 0.014412 | \n",
"
\n",
" \n",
" 23 | \n",
" 0.777589 | \n",
" 0.340726 | \n",
" 0.293065 | \n",
" 15.5 | \n",
" 0.264944 | \n",
"
\n",
" \n",
" 24 | \n",
" 0.212193 | \n",
" 0.254032 | \n",
" 0.204698 | \n",
" 9.7 | \n",
" 0.053904 | \n",
"
\n",
" \n",
" 25 | \n",
" 0.895436 | \n",
" 0.070565 | \n",
" 0.218121 | \n",
" 12.0 | \n",
" 0.063186 | \n",
"
\n",
" \n",
" 26 | \n",
" 0.486717 | \n",
" 0.590726 | \n",
" 0.140940 | \n",
" 15.0 | \n",
" 0.287516 | \n",
"
\n",
" \n",
" 27 | \n",
" 0.817779 | \n",
" 0.336694 | \n",
" 0.256152 | \n",
" 15.9 | \n",
" 0.275341 | \n",
"
\n",
" \n",
" 28 | \n",
" 0.847411 | \n",
" 0.546371 | \n",
" 0.256152 | \n",
" 18.9 | \n",
" 0.463001 | \n",
"
\n",
" \n",
" 29 | \n",
" 0.240463 | \n",
" 0.322581 | \n",
" 0.456376 | \n",
" 10.5 | \n",
" 0.077569 | \n",
"
\n",
" \n",
" 30 | \n",
" 0.997616 | \n",
" 0.570565 | \n",
" 0.483221 | \n",
" 21.4 | \n",
" 0.569204 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 170 | \n",
" 0.170300 | \n",
" 0.233871 | \n",
" 0.205817 | \n",
" 8.4 | \n",
" 0.039828 | \n",
"
\n",
" \n",
" 171 | \n",
" 0.560286 | \n",
" 0.421371 | \n",
" 0.530201 | \n",
" 14.5 | \n",
" 0.236088 | \n",
"
\n",
" \n",
" 172 | \n",
" 0.066757 | \n",
" 0.405242 | \n",
" 0.190157 | \n",
" 7.6 | \n",
" 0.027053 | \n",
"
\n",
" \n",
" 173 | \n",
" 0.573569 | \n",
" 0.143145 | \n",
" 0.143177 | \n",
" 11.7 | \n",
" 0.082104 | \n",
"
\n",
" \n",
" 174 | \n",
" 0.757493 | \n",
" 0.068548 | \n",
" 0.146532 | \n",
" 11.5 | \n",
" 0.051925 | \n",
"
\n",
" \n",
" 175 | \n",
" 0.943120 | \n",
" 0.985887 | \n",
" 0.467562 | \n",
" 27.0 | \n",
" 0.929810 | \n",
"
\n",
" \n",
" 176 | \n",
" 0.846049 | \n",
" 0.608871 | \n",
" 0.227069 | \n",
" 20.2 | \n",
" 0.515135 | \n",
"
\n",
" \n",
" 177 | \n",
" 0.579700 | \n",
" 0.157258 | \n",
" 0.393736 | \n",
" 11.7 | \n",
" 0.091163 | \n",
"
\n",
" \n",
" 178 | \n",
" 0.942439 | \n",
" 0.046371 | \n",
" 0.265101 | \n",
" 11.8 | \n",
" 0.043702 | \n",
"
\n",
" \n",
" 179 | \n",
" 0.564033 | \n",
" 0.201613 | \n",
" 0.196868 | \n",
" 12.6 | \n",
" 0.113716 | \n",
"
\n",
" \n",
" 180 | \n",
" 0.533379 | \n",
" 0.052419 | \n",
" 0.092841 | \n",
" 10.5 | \n",
" 0.027959 | \n",
"
\n",
" \n",
" 181 | \n",
" 0.744210 | \n",
" 0.108871 | \n",
" 0.306488 | \n",
" 12.2 | \n",
" 0.081023 | \n",
"
\n",
" \n",
" 182 | \n",
" 0.191417 | \n",
" 0.114919 | \n",
" 0.332215 | \n",
" 8.7 | \n",
" 0.021998 | \n",
"
\n",
" \n",
" 183 | \n",
" 0.979564 | \n",
" 0.866935 | \n",
" 0.803132 | \n",
" 26.2 | \n",
" 0.849219 | \n",
"
\n",
" \n",
" 184 | \n",
" 0.864441 | \n",
" 0.429435 | \n",
" 0.335570 | \n",
" 17.6 | \n",
" 0.371222 | \n",
"
\n",
" \n",
" 185 | \n",
" 0.698229 | \n",
" 0.909274 | \n",
" 0.219239 | \n",
" 22.6 | \n",
" 0.634882 | \n",
"
\n",
" \n",
" 186 | \n",
" 0.475136 | \n",
" 0.042339 | \n",
" 0.297539 | \n",
" 10.3 | \n",
" 0.020117 | \n",
"
\n",
" \n",
" 187 | \n",
" 0.650886 | \n",
" 0.578629 | \n",
" 0.203579 | \n",
" 17.3 | \n",
" 0.376621 | \n",
"
\n",
" \n",
" 188 | \n",
" 0.974114 | \n",
" 0.280242 | \n",
" 0.041387 | \n",
" 15.9 | \n",
" 0.272988 | \n",
"
\n",
" \n",
" 189 | \n",
" 0.063692 | \n",
" 0.243952 | \n",
" 0.261745 | \n",
" 6.7 | \n",
" 0.015538 | \n",
"
\n",
" \n",
" 190 | \n",
" 0.134537 | \n",
" 0.828629 | \n",
" 0.064877 | \n",
" 10.8 | \n",
" 0.111481 | \n",
"
\n",
" \n",
" 191 | \n",
" 0.257153 | \n",
" 0.217742 | \n",
" 0.067114 | \n",
" 9.9 | \n",
" 0.055993 | \n",
"
\n",
" \n",
" 192 | \n",
" 0.058583 | \n",
" 0.082661 | \n",
" 0.353468 | \n",
" 5.9 | \n",
" 0.004843 | \n",
"
\n",
" \n",
" 193 | \n",
" 0.568120 | \n",
" 0.846774 | \n",
" 0.040268 | \n",
" 19.6 | \n",
" 0.481069 | \n",
"
\n",
" \n",
" 194 | \n",
" 0.509877 | \n",
" 0.717742 | \n",
" 0.067114 | \n",
" 17.3 | \n",
" 0.365960 | \n",
"
\n",
" \n",
" 195 | \n",
" 0.130109 | \n",
" 0.074597 | \n",
" 0.154362 | \n",
" 7.6 | \n",
" 0.009706 | \n",
"
\n",
" \n",
" 196 | \n",
" 0.320845 | \n",
" 0.098790 | \n",
" 0.090604 | \n",
" 9.7 | \n",
" 0.031696 | \n",
"
\n",
" \n",
" 197 | \n",
" 0.602861 | \n",
" 0.187500 | \n",
" 0.071588 | \n",
" 12.8 | \n",
" 0.113036 | \n",
"
\n",
" \n",
" 198 | \n",
" 0.965940 | \n",
" 0.846774 | \n",
" 0.740492 | \n",
" 25.5 | \n",
" 0.817933 | \n",
"
\n",
" \n",
" 199 | \n",
" 0.790531 | \n",
" 0.173387 | \n",
" 0.097315 | \n",
" 13.4 | \n",
" 0.137068 | \n",
"
\n",
" \n",
"
\n",
"
198 rows × 5 columns
\n",
"
"
],
"text/plain": [
" TV Radio Newspaper Sales TV_Radio\n",
"0 0.783719 0.762097 0.774049 22.1 0.597270\n",
"1 0.151567 0.792339 0.504474 10.4 0.120092\n",
"2 0.058583 0.925403 0.775168 9.3 0.054213\n",
"3 0.516008 0.832661 0.654362 18.5 0.429660\n",
"4 0.615804 0.217742 0.653244 12.9 0.134086\n",
"5 0.029632 0.985887 0.838926 7.2 0.029214\n",
"6 0.195845 0.661290 0.262864 11.8 0.129510\n",
"7 0.409401 0.395161 0.129754 13.2 0.161779\n",
"8 0.029292 0.042339 0.011186 4.8 0.001240\n",
"9 0.680518 0.052419 0.237136 10.6 0.035672\n",
"10 0.225136 0.116935 0.270694 8.6 0.026326\n",
"11 0.731267 0.483871 0.044743 17.4 0.353839\n",
"12 0.081063 0.707661 0.737136 9.2 0.057365\n",
"13 0.332084 0.153226 0.080537 9.7 0.050884\n",
"14 0.695163 0.663306 0.514541 19.0 0.461106\n",
"15 0.665531 0.961694 0.591723 22.4 0.640037\n",
"17 0.958447 0.798387 0.624161 24.4 0.765212\n",
"18 0.235695 0.413306 0.204698 11.3 0.097414\n",
"19 0.501703 0.481855 0.213647 14.6 0.241748\n",
"20 0.743869 0.558468 0.597315 18.0 0.415427\n",
"21 0.808583 0.102823 0.262864 12.5 0.083141\n",
"22 0.044959 0.320565 0.554810 5.6 0.014412\n",
"23 0.777589 0.340726 0.293065 15.5 0.264944\n",
"24 0.212193 0.254032 0.204698 9.7 0.053904\n",
"25 0.895436 0.070565 0.218121 12.0 0.063186\n",
"26 0.486717 0.590726 0.140940 15.0 0.287516\n",
"27 0.817779 0.336694 0.256152 15.9 0.275341\n",
"28 0.847411 0.546371 0.256152 18.9 0.463001\n",
"29 0.240463 0.322581 0.456376 10.5 0.077569\n",
"30 0.997616 0.570565 0.483221 21.4 0.569204\n",
".. ... ... ... ... ...\n",
"170 0.170300 0.233871 0.205817 8.4 0.039828\n",
"171 0.560286 0.421371 0.530201 14.5 0.236088\n",
"172 0.066757 0.405242 0.190157 7.6 0.027053\n",
"173 0.573569 0.143145 0.143177 11.7 0.082104\n",
"174 0.757493 0.068548 0.146532 11.5 0.051925\n",
"175 0.943120 0.985887 0.467562 27.0 0.929810\n",
"176 0.846049 0.608871 0.227069 20.2 0.515135\n",
"177 0.579700 0.157258 0.393736 11.7 0.091163\n",
"178 0.942439 0.046371 0.265101 11.8 0.043702\n",
"179 0.564033 0.201613 0.196868 12.6 0.113716\n",
"180 0.533379 0.052419 0.092841 10.5 0.027959\n",
"181 0.744210 0.108871 0.306488 12.2 0.081023\n",
"182 0.191417 0.114919 0.332215 8.7 0.021998\n",
"183 0.979564 0.866935 0.803132 26.2 0.849219\n",
"184 0.864441 0.429435 0.335570 17.6 0.371222\n",
"185 0.698229 0.909274 0.219239 22.6 0.634882\n",
"186 0.475136 0.042339 0.297539 10.3 0.020117\n",
"187 0.650886 0.578629 0.203579 17.3 0.376621\n",
"188 0.974114 0.280242 0.041387 15.9 0.272988\n",
"189 0.063692 0.243952 0.261745 6.7 0.015538\n",
"190 0.134537 0.828629 0.064877 10.8 0.111481\n",
"191 0.257153 0.217742 0.067114 9.9 0.055993\n",
"192 0.058583 0.082661 0.353468 5.9 0.004843\n",
"193 0.568120 0.846774 0.040268 19.6 0.481069\n",
"194 0.509877 0.717742 0.067114 17.3 0.365960\n",
"195 0.130109 0.074597 0.154362 7.6 0.009706\n",
"196 0.320845 0.098790 0.090604 9.7 0.031696\n",
"197 0.602861 0.187500 0.071588 12.8 0.113036\n",
"198 0.965940 0.846774 0.740492 25.5 0.817933\n",
"199 0.790531 0.173387 0.097315 13.4 0.137068\n",
"\n",
"[198 rows x 5 columns]"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | Sales | R-squared: | 0.968 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.967 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 1934. | \n",
"
\n",
"\n",
" Date: | Wed, 26 Sep 2018 | Prob (F-statistic): | 3.19e-144 | \n",
"
\n",
"\n",
" Time: | 16:41:33 | Log-Likelihood: | -267.07 | \n",
"
\n",
"\n",
" No. Observations: | 198 | AIC: | 542.1 | \n",
"
\n",
"\n",
" Df Residuals: | 194 | BIC: | 555.3 | \n",
"
\n",
"\n",
" Df Model: | 3 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 6.7577 | 0.247 | 27.304 | 0.000 | 6.270 | 7.246 | \n",
"
\n",
"\n",
" Radio | 1.3688 | 0.443 | 3.089 | 0.002 | 0.495 | 2.243 | \n",
"
\n",
"\n",
" TV | 5.5919 | 0.441 | 12.682 | 0.000 | 4.722 | 6.462 | \n",
"
\n",
"\n",
" TV_Radio | 15.9617 | 0.767 | 20.817 | 0.000 | 14.449 | 17.474 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 126.182 | Durbin-Watson: | 2.241 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 1151.060 | \n",
"
\n",
"\n",
" Skew: | -2.306 | Prob(JB): | 1.12e-250 | \n",
"
\n",
"\n",
" Kurtosis: | 13.875 | Cond. No. | 18.1 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Sales R-squared: 0.968\n",
"Model: OLS Adj. R-squared: 0.967\n",
"Method: Least Squares F-statistic: 1934.\n",
"Date: Wed, 26 Sep 2018 Prob (F-statistic): 3.19e-144\n",
"Time: 16:41:33 Log-Likelihood: -267.07\n",
"No. Observations: 198 AIC: 542.1\n",
"Df Residuals: 194 BIC: 555.3\n",
"Df Model: 3 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 6.7577 0.247 27.304 0.000 6.270 7.246\n",
"Radio 1.3688 0.443 3.089 0.002 0.495 2.243\n",
"TV 5.5919 0.441 12.682 0.000 4.722 6.462\n",
"TV_Radio 15.9617 0.767 20.817 0.000 14.449 17.474\n",
"==============================================================================\n",
"Omnibus: 126.182 Durbin-Watson: 2.241\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 1151.060\n",
"Skew: -2.306 Prob(JB): 1.12e-250\n",
"Kurtosis: 13.875 Cond. No. 18.1\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Rajoutez au modèle la variable multiplicative:\n",
"\n",
"lm_6 = smf.ols(formula='Sales ~ Radio + TV + TV_Radio', data=df).fit()\n",
"lm_6.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# On peut expliquer que cette variable ait une grande influence sur le modèle par le fait d'investir sur les deux à la fois\n",
"# et en même temps est mieux qu'investir sur l'un des deux ou les deux séparément.\n",
"\n",
"# C'est à dire si la personne voit la publication à la fois à la télé et au radio, ceci augmente les chances de ventes."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}