{"cells":[{"cell_type":"code","source":"# librerias\nfrom numpy import mean\nfrom numpy import std\nfrom pandas import read_csv\nfrom sklearn.model_selection import LeaveOneOut\nfrom sklearn.model_selection import StratifiedKFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier","metadata":{"id":"kO15uZdopvtz","cell_id":"82e0d29c90624ac693a187c6ef7ea1ec","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":1586,"user_tz":240,"timestamp":1652617056366},"deepnote_cell_type":"code"},"outputs":[],"execution_count":7},{"cell_type":"markdown","source":"**Descripcion de datos**\nhttps://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.names\n\n**Enlace con datos**\nhttps://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv\n\n1. CRIM per capita crime rate by town\n2. ZN proportion of residential land zoned for lots over 25,000 sq.ft.\n3. INDUS proportion of non-retail business acres per town\n4. CHAS Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\n5. NOX nitric oxides concentration (parts per 10 million)\n6. RM average number of rooms per dwelling\n7. AGE proportion of owner-occupied units built prior to 1940\n8. DIS weighted distances to five Boston employment centres\n9. RAD index of accessibility to radial highways\n10. TAX full-value property-tax rate per 10,000\n11. PTRATIO pupil teacher ratio by town\n12. B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town\n13. LSTAT % lower status of the population\n14. MEDV Median value of owner-occupied homes in $1000's","metadata":{"id":"_mKNx3NeYutA","cell_id":"b5f263f1954c4799a28130c4a27608a0","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"# librerias\nfrom pandas import read_csv\n# cargar datos\nurl = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv'\ndataframe = read_csv(url, header=None)\n# shape\nprint(dataframe.shape)","metadata":{"id":"u_0oIqz5ZVZb","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"b8193cbf10e849f1b0030671e5b5ec8b","outputId":"a040a7d8-314b-4e1e-8dea-71a81c436dcc","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":968,"user_tz":240,"timestamp":1652616992857},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"(506, 14)\n"}],"execution_count":1},{"cell_type":"code","source":"dataframe","metadata":{"id":"LE0y7f4PZdHk","colab":{"height":423,"base_uri":"https://localhost:8080/"},"cell_id":"d3cf352430c7496d9b77fedfaca0458b","outputId":"d19bf9d4-f4fd-4206-86ed-07f98e5173fe","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":355,"user_tz":240,"timestamp":1652616994831},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" 0 1 2 3 4 5 6 7 8 9 10 \\\n0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296.0 15.3 \n1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242.0 17.8 \n2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242.0 17.8 \n3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222.0 18.7 \n4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222.0 18.7 \n.. ... ... ... .. ... ... ... ... .. ... ... \n501 0.06263 0.0 11.93 0 0.573 6.593 69.1 2.4786 1 273.0 21.0 \n502 0.04527 0.0 11.93 0 0.573 6.120 76.7 2.2875 1 273.0 21.0 \n503 0.06076 0.0 11.93 0 0.573 6.976 91.0 2.1675 1 273.0 21.0 \n504 0.10959 0.0 11.93 0 0.573 6.794 89.3 2.3889 1 273.0 21.0 \n505 0.04741 0.0 11.93 0 0.573 6.030 80.8 2.5050 1 273.0 21.0 \n\n 11 12 13 \n0 396.90 4.98 24.0 \n1 396.90 9.14 21.6 \n2 392.83 4.03 34.7 \n3 394.63 2.94 33.4 \n4 396.90 5.33 36.2 \n.. ... ... ... \n501 391.99 9.67 22.4 \n502 396.90 9.08 20.6 \n503 396.90 5.64 23.9 \n504 393.45 6.48 22.0 \n505 396.90 7.88 11.9 \n\n[506 rows x 14 columns]","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
012345678910111213
00.0063218.02.3100.5386.57565.24.09001296.015.3396.904.9824.0
10.027310.07.0700.4696.42178.94.96712242.017.8396.909.1421.6
20.027290.07.0700.4697.18561.14.96712242.017.8392.834.0334.7
30.032370.02.1800.4586.99845.86.06223222.018.7394.632.9433.4
40.069050.02.1800.4587.14754.26.06223222.018.7396.905.3336.2
.............................................
5010.062630.011.9300.5736.59369.12.47861273.021.0391.999.6722.4
5020.045270.011.9300.5736.12076.72.28751273.021.0396.909.0820.6
5030.060760.011.9300.5736.97691.02.16751273.021.0396.905.6423.9
5040.109590.011.9300.5736.79489.32.38891273.021.0393.456.4822.0
5050.047410.011.9300.5736.03080.82.50501273.021.0396.907.8811.9
\n

506 rows × 14 columns

\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":2}],"execution_count":2},{"cell_type":"code","source":"dataframe.dtypes","metadata":{"id":"BAOnINNWbxhV","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"e4dd56061579419eb3b23f6c1f3dbd24","outputId":"7637266f-7d5e-4db6-947c-a672c042a37e","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":7,"user_tz":240,"timestamp":1652616995873},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0 float64\n1 float64\n2 float64\n3 int64\n4 float64\n5 float64\n6 float64\n7 float64\n8 int64\n9 float64\n10 float64\n11 float64\n12 float64\n13 float64\ndtype: object"},"metadata":{},"execution_count":3}],"execution_count":3},{"cell_type":"code","source":"dataframe.isnull().sum()","metadata":{"id":"0jTYRZU-Zd6J","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"4de5280f6f12494da8c49cb4bcefa023","outputId":"5179df6f-01a9-4018-9cda-d0ebc76dd30b","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":240,"timestamp":1652616997142},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0 0\n1 0\n2 0\n3 0\n4 0\n5 0\n6 0\n7 0\n8 0\n9 0\n10 0\n11 0\n12 0\n13 0\ndtype: int64"},"metadata":{},"execution_count":4}],"execution_count":4},{"cell_type":"code","source":"# separar en X y y\ndata= dataframe.values\nX, y = data[:, :-1], data[:, -1]\nprint(X.shape, y.shape)","metadata":{"id":"COZGHu1YZf9Y","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"d647c79fc67647579da43fe7a82227c8","outputId":"7f645398-cbd3-4c2b-a99b-9be68933873d","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":7,"user_tz":240,"timestamp":1652616998405},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"(506, 13) (506,)\n"}],"execution_count":5},{"cell_type":"code","source":"from sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestRegressor\n# crear el modelo\nmodel = RandomForestRegressor(random_state=42, n_estimators=10,max_depth=4)\nscores = cross_val_score(model, X, y, cv=7, verbose=1,scoring='r2')","metadata":{"id":"oxihow3SqgYI","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"c045e3d459ea4738923f6d5359235434","outputId":"eeb97737-f3ab-42d5-ed77-8bb4626a84ff","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":702,"user_tz":240,"timestamp":1652617397725},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stderr","text":"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n[Parallel(n_jobs=1)]: Done 7 out of 7 | elapsed: 0.3s finished\n"}],"execution_count":14},{"cell_type":"code","source":"print(\"%0.2f de r2 promedio con una desviacion estandar de %0.2f\" % (scores.mean(), scores.std()))","metadata":{"id":"g3CRDsNTqscO","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"ba81d4ca8cd3472c8c430ce14a49318b","outputId":"b133c5b5-bc74-4ac0-827d-8a5cf5c2c7df","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":334,"user_tz":240,"timestamp":1652617420786},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"0.52 de r2 promedio con una desviacion estandar de 0.26\n"}],"execution_count":16},{"cell_type":"markdown","source":"\nCreated in deepnote.com \nCreated in Deepnote","metadata":{"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ejemplo 3- Stratified- K fold (regresion).ipynb","provenance":[],"authorship_tag":"ABX9TyMDv8kJc3lWrNLitVsQeU3R","collapsed_sections":[]},"deepnote":{},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"deepnote_notebook_id":"ea2925ae9c4f40d2ac1753e7dc196395","deepnote_execution_queue":[]}}