{"cells":[{"cell_type":"markdown","source":"REGRESIÓN LINEAL SIMPLE","metadata":{"id":"HKkOFiEa_mGL","cell_id":"48ffa218527842a5a54e9f131ef853f3","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"#Importacion ded librerias\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\n%matplotlib inline","metadata":{"id":"vyxhkJ1n_mGT","cell_id":"c0ff7dcced74431ba838ba6637c1f582","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":697,"user_tz":240,"timestamp":1652018861311},"deepnote_cell_type":"code"},"outputs":[],"execution_count":1},{"cell_type":"code","source":"from google.colab import drive\nimport os\ndrive.mount('/content/drive')\n# Establecer ruta de acceso en drive\nimport os\nprint(os.getcwd())\nos.chdir(\"/content/drive/My Drive\")\nprint(os.getcwd())","metadata":{"id":"_5hMAX62ACFa","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"931400d4f0434d8db721013f03b7f96e","outputId":"19d5069d-b22d-43fd-b88e-4f350f3fe36c","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":55713,"user_tz":240,"timestamp":1652018921895},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Mounted at /content/drive\n/content\n/content/drive/My Drive\n"}],"execution_count":2},{"cell_type":"code","source":"#Importacion de los datos\ndataset = pd.read_csv(\"student_scores.csv\", sep = \",\")","metadata":{"id":"zpakVxDg_mGW","cell_id":"31f87b41013a42bfa8e506dfd16b670c","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":650,"user_tz":240,"timestamp":1652018924614},"deepnote_cell_type":"code"},"outputs":[],"execution_count":3},{"cell_type":"code","source":"#Vemos el dataset\ndataset.head()","metadata":{"id":"NytTMwWZ_mGX","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"beff8ad77d134b7facb9f2f328b8c987","outputId":"9c5e832a-b5e1-4063-e28a-0707716fe568","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":319,"user_tz":240,"timestamp":1652018925950},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" Hours Scores\n0 2.5 21\n1 5.1 47\n2 3.2 27\n3 8.5 75\n4 3.5 30","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
HoursScores
02.521
15.147
23.227
38.575
43.530
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":4}],"execution_count":4},{"cell_type":"code","source":"#Shape\ndataset.shape","metadata":{"id":"xXKdsaVh_mGY","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"718ec860cdcf494a8cc8244e1bb55724","outputId":"cd49d20a-16c0-41c1-ce4a-35cf5c51a5a4","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":240,"timestamp":1652018927328},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"(25, 2)"},"metadata":{},"execution_count":5}],"execution_count":5},{"cell_type":"code","source":"#Analisis estadistico basico\ndataset.describe()","metadata":{"id":"QNaczzIO_mGZ","colab":{"height":300,"base_uri":"https://localhost:8080/"},"cell_id":"8a983d02f2544023af2a188d7c9599ae","outputId":"0d83e77e-d654-4113-8b16-615658cda252","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":7,"user_tz":240,"timestamp":1652018928751},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" Hours Scores\ncount 25.000000 25.000000\nmean 5.012000 51.480000\nstd 2.525094 25.286887\nmin 1.100000 17.000000\n25% 2.700000 30.000000\n50% 4.800000 47.000000\n75% 7.400000 75.000000\nmax 9.200000 95.000000","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
HoursScores
count25.00000025.000000
mean5.01200051.480000
std2.52509425.286887
min1.10000017.000000
25%2.70000030.000000
50%4.80000047.000000
75%7.40000075.000000
max9.20000095.000000
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":6}],"execution_count":6},{"cell_type":"code","source":"#Ploteamos el dataset\ndataset.plot(x='Hours', y='Scores', style=\"o\")\nplt.title('Hours vs Percentage')\nplt.xlabel('Hours Studied')\nplt.ylabel('Percentage Score')\nplt.show()","metadata":{"id":"jOEjWiG-_mGZ","colab":{"height":295,"base_uri":"https://localhost:8080/"},"cell_id":"95de59b089f44bce9f0f721986089c83","outputId":"c8afc1a1-7360-4daf-dbfe-89676b7ca5ba","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":18,"user_tz":240,"timestamp":1652018929883},"deepnote_cell_type":"code"},"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}],"execution_count":7},{"cell_type":"code","source":"#Preparacion de datos\nX = dataset.iloc[:, :-1].values\ny = dataset.iloc[:, 1].values","metadata":{"id":"k2tC6gdw_mGa","cell_id":"35c2ff78d304424983a87a551ea77dde","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":419,"user_tz":240,"timestamp":1652018932299},"deepnote_cell_type":"code"},"outputs":[],"execution_count":8},{"cell_type":"code","source":"X","metadata":{"id":"doxozwNs8M8f","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"d336dd6bfcd14f4eb66c73b4c0ec1e56","outputId":"51c42ad9-eb1c-45cc-a8f0-6553226270da","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":240,"timestamp":1652018932564},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array([[2.5],\n [5.1],\n [3.2],\n [8.5],\n [3.5],\n [1.5],\n [9.2],\n [5.5],\n [8.3],\n [2.7],\n [7.7],\n [5.9],\n [4.5],\n [3.3],\n [1.1],\n [8.9],\n [2.5],\n [1.9],\n [6.1],\n [7.4],\n [2.7],\n [4.8],\n [3.8],\n [6.9],\n [7.8]])"},"metadata":{},"execution_count":9}],"execution_count":9},{"cell_type":"code","source":"y","metadata":{"id":"3Icfnen98OEm","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"656f0962acb741acbc1dec0afc80a66d","outputId":"caa22c0d-e5c4-4f15-bef3-2ba51fecac44","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":249,"user_tz":240,"timestamp":1652018934232},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array([21, 47, 27, 75, 30, 20, 88, 60, 81, 25, 85, 62, 41, 42, 17, 95, 30,\n 24, 67, 69, 30, 54, 35, 76, 86])"},"metadata":{},"execution_count":10}],"execution_count":10},{"cell_type":"code","source":"#Empezamos a crear nuestro modelo\nfrom sklearn.model_selection import train_test_split\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)","metadata":{"id":"8t7RBXQ2_mGb","cell_id":"699d323c290d48018e53a12bd291f242","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":704,"user_tz":240,"timestamp":1652018936050},"deepnote_cell_type":"code"},"outputs":[],"execution_count":11},{"cell_type":"code","source":"#Entrenando el modelo\nfrom sklearn.linear_model import LinearRegression\nregressor = LinearRegression()\nregressor.fit(X_train, y_train)","metadata":{"id":"AJuaNfa5_mGc","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"79b6e39ad68145eba96b03289684561e","outputId":"73329adc-67fb-43c9-9b33-71615816d48a","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":319,"user_tz":240,"timestamp":1652018936365},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"LinearRegression()"},"metadata":{},"execution_count":12}],"execution_count":12},{"cell_type":"code","source":"#Recuperamos la intersección\nprint(regressor.intercept_)","metadata":{"id":"d0oOGMwA_mGd","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"873837bc76164d31b0140080ad3090bf","outputId":"ddcf5396-24af-43f8-d497-f1b5225ba8e0","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":5,"user_tz":240,"timestamp":1652018936980},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"2.826892353899737\n"}],"execution_count":13},{"cell_type":"code","source":"#La pendiente\nprint(regressor.coef_)","metadata":{"id":"GBFQN7Wk_mGe","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"b711b39fc6a54640a8f025016710c94f","outputId":"42d30e62-e8a4-498d-f34c-7c37b7341a46","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3,"user_tz":240,"timestamp":1652018937234},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"[9.68207815]\n"}],"execution_count":14},{"cell_type":"code","source":"#Hacemos nuestras predicciones\ny_pred = regressor.predict(X_test)\ny_pred","metadata":{"id":"HMoBm0ik_mGe","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"f3726476697c415b9e50914361f60594","outputId":"99deb9d6-4b6e-4133-b631-240ad00b174f","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":424,"user_tz":240,"timestamp":1652018939058},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array([83.18814104, 27.03208774, 27.03208774, 69.63323162, 59.95115347])"},"metadata":{},"execution_count":15}],"execution_count":15},{"cell_type":"markdown","source":"El y_pred es una matriz numpy que contiene todos los valores predichos para los valores de entrada en la X_test","metadata":{"id":"a-iEdMf4_mGf","cell_id":"9862f39a733b448cb0ed19e05f8c0551","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"#Convertimos en df la salida\ndf = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})\ndf","metadata":{"id":"FLY8Eix4_mGf","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"1c2aef6b3b2144b1b256f156f22c848a","outputId":"b622e9d7-dac0-4e87-b1ae-89e9537c34db","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":9,"user_tz":240,"timestamp":1652018939699},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" Actual Predicted\n0 81 83.188141\n1 30 27.032088\n2 21 27.032088\n3 76 69.633232\n4 62 59.951153","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ActualPredicted
08183.188141
13027.032088
22127.032088
37669.633232
46259.951153
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":16}],"execution_count":16},{"cell_type":"markdown","source":"**Evaluación del modelo**:\n\nEl último paso es evaluar el rendimiento del algoritmo. Este paso es particularmente importante para comparar qué tan bien funcionan los diferentes algoritmos en un conjunto de datos en particular. Para los algoritmos de regresión, se utilizan comúnmente tres métricas de evaluación:\n\n* El error absoluto medio (MAE)\n* El error cuadrático medio (MSE)\n* Root Mean Squared Error (RMSE)","metadata":{"id":"Lu1WnDJ0_mGg","cell_id":"f1ee47561b61473eb28f0641ae4afd70","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"import numpy as np\ndef mse(actual, predicted):\n return np.mean(np.square(actual-predicted))","metadata":{"id":"bqTRWFCR9Nki","cell_id":"04dd2051ab5f4057b8804a2475b95543","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":235,"user_tz":240,"timestamp":1652018941695},"deepnote_cell_type":"code"},"outputs":[],"execution_count":17},{"cell_type":"code","source":"def mape(actual, predicted):\n return np.mean(np.abs((actual - predicted) / actual)) * 100","metadata":{"id":"i8ZPIvUr-Nkb","cell_id":"245957edda3c4e01ad11a72f5407f5d4","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":333,"user_tz":240,"timestamp":1652018942324},"deepnote_cell_type":"code"},"outputs":[],"execution_count":18},{"cell_type":"code","source":"mape(y_test, y_pred)","metadata":{"id":"CgSmTVrk-OgM","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"1dc3f174d0444796816d800402d2da92","outputId":"47ccf52d-b38d-4d90-9eaf-e3afede2f02a","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3,"user_tz":240,"timestamp":1652018942861},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"10.600118977553539"},"metadata":{},"execution_count":19}],"execution_count":19},{"cell_type":"code","source":"from sklearn import metrics \nprint('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred)) # MAE\nprint('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred)) # MSE\nprint('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred))) # RMSE","metadata":{"id":"kihmR7mp_mGh","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"9c38eb56419442128a7beecae0bf2427","outputId":"c3a0c7d4-ac1d-4e8d-f22c-39b689440274","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":240,"timestamp":1652018943307},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Mean Absolute Error: 3.9207511902099244\nMean Squared Error: 18.943211722315272\nRoot Mean Squared Error: 4.352380006653288\n"}],"execution_count":20},{"cell_type":"code","source":"from sklearn.metrics import r2_score\nprint('El r^2 es:',r2_score(y_test,y_pred))","metadata":{"id":"FEIzufCoEzHH","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"036ade47422d4c38b05c47614d2dfb3e","outputId":"4bcb304f-e427-433f-c91c-84a981787565","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":283,"user_tz":240,"timestamp":1652018944915},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"El r^2 es: 0.9678055545167994\n"}],"execution_count":21},{"cell_type":"markdown","source":"REGRESIÓN LINEAL MÚLTIPLE","metadata":{"id":"4u4y8Of1_mGi","cell_id":"d06ee10f152046c39d14cf927f4674a2","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"dataset = pd.read_csv(\"petrol_consumption.csv\", sep = \",\")","metadata":{"id":"2SUk9in9_mGi","cell_id":"ca54ca76c7e34cee9757f5e9a3d2d4d9","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":605,"user_tz":240,"timestamp":1652018946655},"deepnote_cell_type":"code"},"outputs":[],"execution_count":22},{"cell_type":"code","source":"#Vemos el head\ndataset.head()","metadata":{"id":"FfydBx_7_mGi","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"ddba6a5551204a55a69cf97e413b0493","outputId":"265f4d8e-7575-4ea1-c393-14ee01380392","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":7,"user_tz":240,"timestamp":1652018947322},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" Petrol_tax Average_income Paved_Highways Population_Driver_licence(%) \\\n0 9.0 3571 1976 0.525 \n1 9.0 4092 1250 0.572 \n2 9.0 3865 1586 0.580 \n3 7.5 4870 2351 0.529 \n4 8.0 4399 431 0.544 \n\n Petrol_Consumption \n0 541 \n1 524 \n2 561 \n3 414 \n4 410 ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Petrol_taxAverage_incomePaved_HighwaysPopulation_Driver_licence(%)Petrol_Consumption
09.0357119760.525541
19.0409212500.572524
29.0386515860.580561
37.5487023510.529414
48.043994310.544410
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":23}],"execution_count":23},{"cell_type":"code","source":"#Estadisticas\ndataset.describe()","metadata":{"id":"UEP-5QNY_mGj","colab":{"height":300,"base_uri":"https://localhost:8080/"},"cell_id":"64401f8b2af4428fa35428f7b67fcf15","outputId":"bfb7b6d0-c119-4535-b9d7-b5b3a64209ec","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":417,"user_tz":240,"timestamp":1652018948966},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" Petrol_tax Average_income Paved_Highways \\\ncount 48.000000 48.000000 48.000000 \nmean 7.668333 4241.833333 5565.416667 \nstd 0.950770 573.623768 3491.507166 \nmin 5.000000 3063.000000 431.000000 \n25% 7.000000 3739.000000 3110.250000 \n50% 7.500000 4298.000000 4735.500000 \n75% 8.125000 4578.750000 7156.000000 \nmax 10.000000 5342.000000 17782.000000 \n\n Population_Driver_licence(%) Petrol_Consumption \ncount 48.000000 48.000000 \nmean 0.570333 576.770833 \nstd 0.055470 111.885816 \nmin 0.451000 344.000000 \n25% 0.529750 509.500000 \n50% 0.564500 568.500000 \n75% 0.595250 632.750000 \nmax 0.724000 968.000000 ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Petrol_taxAverage_incomePaved_HighwaysPopulation_Driver_licence(%)Petrol_Consumption
count48.00000048.00000048.00000048.00000048.000000
mean7.6683334241.8333335565.4166670.570333576.770833
std0.950770573.6237683491.5071660.055470111.885816
min5.0000003063.000000431.0000000.451000344.000000
25%7.0000003739.0000003110.2500000.529750509.500000
50%7.5000004298.0000004735.5000000.564500568.500000
75%8.1250004578.7500007156.0000000.595250632.750000
max10.0000005342.00000017782.0000000.724000968.000000
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":24}],"execution_count":24},{"cell_type":"code","source":"#Preparación de datos\nX = dataset[['Petrol_tax', 'Average_income', 'Paved_Highways','Population_Driver_licence(%)']]\ny = dataset['Petrol_Consumption']","metadata":{"id":"DTJNUM1V_mGj","cell_id":"a0797b7991384103aa1cc31c0a3ddb1d","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":263,"user_tz":240,"timestamp":1652018950194},"deepnote_cell_type":"code"},"outputs":[],"execution_count":25},{"cell_type":"code","source":"#Separacion en train y test\nfrom sklearn.model_selection import train_test_split\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)","metadata":{"id":"YPXF2XE-_mGk","cell_id":"f3242ab85349497c930563e31356ce53","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":2,"user_tz":240,"timestamp":1652018950448},"deepnote_cell_type":"code"},"outputs":[],"execution_count":26},{"cell_type":"code","source":"#Entrenamiento del modelo\nfrom sklearn.linear_model import LinearRegression\nregressor = LinearRegression()\nregressor.fit(X_train, y_train)","metadata":{"id":"SdguVHmv_mGk","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"9f951dd008524dd4981fb6c335b77ad7","outputId":"5b1bf609-4a8b-463e-b0f8-11808cde71ab","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":255,"user_tz":240,"timestamp":1652018952114},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"LinearRegression()"},"metadata":{},"execution_count":27}],"execution_count":27},{"cell_type":"markdown","source":"Como se dijo anteriormente, en caso de regresión lineal multivariable, el modelo de regresión tiene que encontrar los coeficientes más óptimos para todos los atributos. Para ver qué coeficientes ha elegido nuestro modelo de regresión, podemos ejecutar el siguiente script:","metadata":{"id":"gGLxsmr5_mGl","cell_id":"e547c2deee264c5fad77b5672bb127d7","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"regressor.coef_","metadata":{"id":"IarAbjyeGeFi","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"75bdbd5aeaa143a2ae2007147cfb2d12","outputId":"19a922d1-0aa8-4ee4-a74f-12be63c11676","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":240,"timestamp":1652018953361},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array([-3.69937459e+01, -5.65355145e-02, -4.38217137e-03, 1.34686930e+03])"},"metadata":{},"execution_count":28}],"execution_count":28},{"cell_type":"code","source":"regressor.intercept_","metadata":{"id":"rYkVwLqyG4AT","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"8a3861f7d1464b8fa354bf8ed4f51f86","outputId":"f8f82cff-52e4-43c3-e6b9-dca6bdbe0843","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":5,"user_tz":240,"timestamp":1652018954320},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"361.45087906653225"},"metadata":{},"execution_count":29}],"execution_count":29},{"cell_type":"code","source":"coeff_df = pd.DataFrame(regressor.coef_, X.columns, columns=['Coefficient'])\ncoeff_df","metadata":{"id":"ma_SlQkj_mGl","colab":{"height":175,"base_uri":"https://localhost:8080/"},"cell_id":"29888a85f9824f9ea6efd58e2a1a7bf7","outputId":"8e418789-d4bc-4954-d314-57b382b969ea","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":404,"user_tz":240,"timestamp":1652018955603},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" Coefficient\nPetrol_tax -36.993746\nAverage_income -0.056536\nPaved_Highways -0.004382\nPopulation_Driver_licence(%) 1346.869298","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Coefficient
Petrol_tax-36.993746
Average_income-0.056536
Paved_Highways-0.004382
Population_Driver_licence(%)1346.869298
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":30}],"execution_count":30},{"cell_type":"code","source":"#Realizando las predicciones\ny_pred = regressor.predict(X_test)","metadata":{"id":"W6FSJHQt_mGl","cell_id":"b361635179844f09930cfe1ff49ed211","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":240,"timestamp":1652018956813},"deepnote_cell_type":"code"},"outputs":[],"execution_count":31},{"cell_type":"markdown","source":"Para comparar los valores de salida reales X_test con los valores predichos, convertimos en df:","metadata":{"id":"wGWILlu6_mGl","cell_id":"63477d47c39a40308dd140b29fa28808","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})\ndf","metadata":{"id":"WZiMEKDc_mGm","colab":{"height":363,"base_uri":"https://localhost:8080/"},"cell_id":"da44e05bdad24da9b59629d53a8a8d20","outputId":"eb077006-3ca7-4f92-ee09-e01e360d5cd1","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":8,"user_tz":240,"timestamp":1652018957536},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" Actual Predicted\n27 631 606.692665\n40 587 673.779442\n26 577 584.991490\n43 591 563.536910\n24 460 519.058672\n37 704 643.461003\n12 525 572.897614\n19 640 687.077036\n4 410 547.609366\n25 566 530.037630","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ActualPredicted
27631606.692665
40587673.779442
26577584.991490
43591563.536910
24460519.058672
37704643.461003
12525572.897614
19640687.077036
4410547.609366
25566530.037630
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":32}],"execution_count":32},{"cell_type":"code","source":"#Evaluación de Modelos\nfrom sklearn import metrics\nprint('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))\nprint('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))\nprint('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))","metadata":{"id":"hU4inS_o_mGm","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"9fece2aa030f476db3bd99677aaf2430","outputId":"98a1e325-4b84-4253-b3b2-5af412da237b","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":246,"user_tz":240,"timestamp":1652018959363},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Mean Absolute Error: 53.468541282916625\nMean Squared Error: 4083.2558717453767\nRoot Mean Squared Error: 63.90035893283681\n"}],"execution_count":33},{"cell_type":"code","source":"mape(y_test, y_pred)","metadata":{"id":"mG8QOMnd_uM-","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"ed43aacd830e41798d71ddccb5585b42","outputId":"19b1cb3f-1544-45a9-87a6-454f96c10ded","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":7,"user_tz":240,"timestamp":1652018960064},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"10.250194382138336"},"metadata":{},"execution_count":34}],"execution_count":34},{"cell_type":"code","source":"from sklearn.metrics import r2_score\nr2_score(y_test,y_pred)","metadata":{"id":"ehK7XGr8Htxd","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"ac323735720447e7956f42e88802e677","outputId":"8b640701-3f72-44b7-f0d9-20e99eeb355c","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":240,"user_tz":240,"timestamp":1652018961348},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0.3913664001428886"},"metadata":{},"execution_count":35}],"execution_count":35},{"cell_type":"markdown","source":"\nCreated in deepnote.com \nCreated in Deepnote","metadata":{"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Regresión - Ejemplo 1 .ipynb","provenance":[],"collapsed_sections":[]},"deepnote":{},"kernelspec":{"name":"python3","language":"python","display_name":"Python 3"},"varInspector":{"cols":{"lenVar":40,"lenName":16,"lenType":16},"kernels_config":{"r":{"library":"var_list.r","varRefreshCmd":"cat(var_dic_list()) ","delete_cmd_prefix":"rm(","delete_cmd_postfix":") "},"python":{"library":"var_list.py","varRefreshCmd":"print(var_dic_list())","delete_cmd_prefix":"del ","delete_cmd_postfix":""}},"window_display":false,"types_to_exclude":["module","function","builtin_function_or_method","instance","_Feature"]},"language_info":{"name":"python","version":"3.8.5","mimetype":"text/x-python","file_extension":".py","pygments_lexer":"ipython3","codemirror_mode":{"name":"ipython","version":3},"nbconvert_exporter":"python"},"deepnote_notebook_id":"f7c0b14a851c4cf2a13f0bd08fd73d9f","deepnote_execution_queue":[]}}