{"cells":[{"cell_type":"code","source":"#Carga de las librerías\nimport urllib.request\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.tree import DecisionTreeClassifier","metadata":{"id":"15ZZJw9xZfE_","cell_id":"d4f8ffcb37524f32b5d7992b90a08eca","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":1663,"user_tz":240,"timestamp":1650987551421},"deepnote_cell_type":"code"},"outputs":[],"execution_count":1},{"cell_type":"code","source":"#Descargamos los datos desde internet!\nfile_name = 'dataR2.csv'\n\ndef download_file(file_name):\n print('Descargando el dataset')\n url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00451/dataR2.csv'\n urllib.request.urlretrieve(url, file_name)\n\n\ndownload_file(file_name)","metadata":{"id":"ejBtmT-zZfFM","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"966b84e6a40b4772919a7da5d924d556","outputId":"daa51b3a-b89f-42fe-8410-61de2eb9a6d1","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":824,"user_tz":240,"timestamp":1650987552235},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Descargando el dataset\n"}],"execution_count":2},{"cell_type":"code","source":"#Ahora si cargamos los datos en jupyter!\ndata = pd.read_csv(file_name)\ndata.head()","metadata":{"id":"SLcy82TpZfFO","colab":{"height":207,"base_uri":"https://localhost:8080/"},"cell_id":"102076d742e240268c7082bc273ac310","outputId":"3a6804be-2269-455a-b7dd-a32d412e930a","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":17,"user_tz":240,"timestamp":1650987552237},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" Age BMI Glucose Insulin HOMA Leptin Adiponectin Resistin \\\n0 48 23.500000 70 2.707 0.467409 8.8071 9.702400 7.99585 \n1 83 20.690495 92 3.115 0.706897 8.8438 5.429285 4.06405 \n2 82 23.124670 91 4.498 1.009651 17.9393 22.432040 9.27715 \n3 68 21.367521 77 3.226 0.612725 9.8827 7.169560 12.76600 \n4 86 21.111111 92 3.549 0.805386 6.6994 4.819240 10.57635 \n\n MCP.1 Classification \n0 417.114 1 \n1 468.786 1 \n2 554.697 1 \n3 928.220 1 \n4 773.920 1 ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
AgeBMIGlucoseInsulinHOMALeptinAdiponectinResistinMCP.1Classification
04823.500000702.7070.4674098.80719.7024007.99585417.1141
18320.690495923.1150.7068978.84385.4292854.06405468.7861
28223.124670914.4981.00965117.939322.4320409.27715554.6971
36821.367521773.2260.6127259.88277.16956012.76600928.2201
48621.111111923.5490.8053866.69944.81924010.57635773.9201
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":3}],"execution_count":3},{"cell_type":"markdown","source":"**Insights**:\n\n* El dataset se compone de 10 columnas de las cuales 9 corresponden a variables independientes, que usaremos para predecir el target. \n\n* Classification es la variable a predecir. Todas las variables son numéricas, ya sea enteras o reales y no tiene valores nulos.","metadata":{"id":"nItaq_GiZfFP","cell_id":"368e71dbee144190aef6e458ca357ddf","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"#Separamos en X e y\nX = data.drop([\"Classification\"], axis=1)\ny = data[\"Classification\"]","metadata":{"id":"K2E_MRCuZfFQ","cell_id":"d39f51ba33984676b8090f13670302f7","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3,"user_tz":240,"timestamp":1650987553645},"deepnote_cell_type":"code"},"outputs":[],"execution_count":4},{"cell_type":"code","source":"#Separamos en train y test!\n(X_train, X_test,y_train, y_test) = train_test_split(X,y,stratify=y,test_size=0.30,random_state=42)","metadata":{"id":"llUfX0h4ZfFS","cell_id":"0fda7684a0764fc5b1a8002e4f9aea66","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":452,"user_tz":240,"timestamp":1650987554825},"deepnote_cell_type":"code"},"outputs":[],"execution_count":5},{"cell_type":"code","source":"#Creamos un arbol de decisión sencillo y lo fiteamos\ntree = DecisionTreeClassifier(random_state=42)\ntree.fit(X_train, y_train)","metadata":{"id":"zorOnQFsZfFU","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"40794bcfed8a40f48a9d86975a4b4439","outputId":"31ac8a7f-7013-4468-a211-a91daca45eef","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":6,"user_tz":240,"timestamp":1650987555275},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"DecisionTreeClassifier(random_state=42)"},"metadata":{},"execution_count":6}],"execution_count":6},{"cell_type":"code","source":"y_test_pred = tree.predict(X_test) #Prediccion en Test","metadata":{"id":"iYsjzAYZZfFW","cell_id":"88848738f5d94ecb9a5befd0a679f4a6","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3,"user_tz":240,"timestamp":1650987556888},"deepnote_cell_type":"code"},"outputs":[],"execution_count":7},{"cell_type":"markdown","source":"A lo largo de este notebook, se solicita calcular las métricas requeridas como así también su correspondiente interpretación: \n\n1. Calcular la métrica Accuracy.","metadata":{"id":"UX-EZp8WZfFX","cell_id":"87b081d0461a4a4d8f4f3a562935fb57","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"###Completar\nfrom sklearn.metrics import accuracy_score\naccuracy_score(y_test,y_test_pred)","metadata":{"id":"5r5MtxEcZfFY","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"3fa7892f57754af7a6cb8371dd6ddd32","outputId":"3bcf8178-399d-487a-a84f-1b61a9a8a36d","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":7,"user_tz":240,"timestamp":1650987557347},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0.7714285714285715"},"metadata":{},"execution_count":8}],"execution_count":8},{"cell_type":"markdown","source":"2. Crear la Matriz de Confusión","metadata":{"id":"nDxtusl7ZfFe","cell_id":"b851c96ba5b54189b41e9a8251092c77","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"###Completar\nfrom sklearn.metrics import confusion_matrix\nconfusion_matrix(y_test, y_test_pred) ","metadata":{"id":"fItST2_oZfFe","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"2eff613e22f64e90a59004bab1cbff98","outputId":"c6ee9145-6390-455c-bb07-bce2e809b3d5","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":5,"user_tz":240,"timestamp":1650987558494},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array([[13, 3],\n [ 5, 14]])"},"metadata":{},"execution_count":9}],"execution_count":9},{"cell_type":"markdown","source":"3. Calcular la métrica Recall","metadata":{"id":"PmSE98PmZfFf","cell_id":"1d1c08517bdc40e9bfa7dd83ac5492d8","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"###Completar\nfrom sklearn.metrics import precision_score\nprecision_score(y_test, y_test_pred) ","metadata":{"id":"Umw82YDMZfFf","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"a71663a9de13401d85a84b249fdf1280","outputId":"a790de07-9c99-46b7-f4e5-445c4d77fb7a","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":8,"user_tz":240,"timestamp":1650987560899},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0.7222222222222222"},"metadata":{},"execution_count":10}],"execution_count":10},{"cell_type":"markdown","source":"4.Calcular la métrica F1 score","metadata":{"id":"P8smbNJCZfFg","cell_id":"fa85c58e99a5434f907ef964fd6e38bd","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"###Completar\nfrom sklearn.metrics import recall_score\nrecall_score(y_test, y_test_pred) ","metadata":{"id":"I25C2oeqZfFg","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"8b0d820d4a6d4ec5980384e4b5e0b998","outputId":"55f5bd07-16cc-4e01-d68c-5cd1f47d43ce","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":414,"user_tz":240,"timestamp":1650987561858},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0.8125"},"metadata":{},"execution_count":11}],"execution_count":11},{"cell_type":"code","source":"from sklearn.metrics import f1_score\nf1_score(y_test, y_test_pred) ","metadata":{"id":"-7t-mNeVZ0LP","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"03109e3dfd454735a36c91c8e7b9b8cf","outputId":"f7b7a86d-d54e-47bb-9acb-ddc8ae250b68","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3,"user_tz":240,"timestamp":1650987562288},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0.7647058823529411"},"metadata":{},"execution_count":12}],"execution_count":12},{"cell_type":"markdown","source":"# Random Forest","metadata":{"id":"XrKdMeNhZfFg","cell_id":"8ab4b19aab1e4b8ba0e3d4a13ea9bfd9","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"#Creamos un random forest!\nmodel = RandomForestClassifier(random_state=42, n_estimators=100,\n class_weight=\"balanced\", max_features=\"log2\")\nmodel.fit(X_train, y_train)","metadata":{"id":"rvPWppZ3ZfFh","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"7bdeb4ef85e144e9b28dc66608a89aa2","outputId":"b4e6e240-70b7-46ee-fe98-2655bba78a67","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":474,"user_tz":240,"timestamp":1650987564819},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"RandomForestClassifier(class_weight='balanced', max_features='log2',\n random_state=42)"},"metadata":{},"execution_count":13}],"execution_count":13},{"cell_type":"code","source":"y_test_pred = model.predict(X_test) #Prediccion en Test","metadata":{"id":"kMkzm2s6ZfFh","cell_id":"36a9cd12e2124a38a5b2b2f0207bb79f","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3,"user_tz":240,"timestamp":1650987567868},"deepnote_cell_type":"code"},"outputs":[],"execution_count":14},{"cell_type":"markdown","source":"1. Calcular la métrica Accuracy.","metadata":{"id":"wSoFd6EwZfFi","cell_id":"4bb48d8b52a04f7987958729704445ea","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"###Completar\nfrom sklearn.metrics import accuracy_score\naccuracy_score(y_test,y_test_pred)","metadata":{"id":"eqmKo_biZfFi","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"22cb0bb604d4415db9e954b63bf424d1","outputId":"8aad9863-425a-44fb-df3a-8faf2b86321d","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":10,"user_tz":240,"timestamp":1650987569031},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0.7714285714285715"},"metadata":{},"execution_count":15}],"execution_count":15},{"cell_type":"markdown","source":"2. Calcular la métrica Precision","metadata":{"id":"67wJXaM9ZfFi","cell_id":"3c20edf8edd94e3da1513e50a0ced4ed","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"###Completar\nfrom sklearn.metrics import precision_score\nprecision_score(y_test, y_test_pred) ","metadata":{"id":"ghzhWdFKZfFj","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"95fc77baff954ffda2f8abb321be2d77","outputId":"d0f0c9eb-66da-4d36-cec6-587fac4652ff","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":5,"user_tz":240,"timestamp":1650987569888},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0.7"},"metadata":{},"execution_count":16}],"execution_count":16},{"cell_type":"markdown","source":"3. Calcular la métrica Recall","metadata":{"id":"OAMYGqQ5ZfFj","cell_id":"5e336563098848a797a6b47b75e24a0d","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"###Completar\nfrom sklearn.metrics import recall_score\nrecall_score(y_test, y_test_pred) ","metadata":{"id":"x-SPudrjZfFj","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"8606d02468f64d75a7a9546a1f6ddfeb","outputId":"5c3a8bdb-1ccf-4e9f-960d-05cab1a39b2a","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":430,"user_tz":240,"timestamp":1650987571184},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0.875"},"metadata":{},"execution_count":17}],"execution_count":17},{"cell_type":"markdown","source":"4. Calcular la métrica F1 score","metadata":{"id":"P-OYhtk4ZfFk","cell_id":"161c994b768745299bf1c7845c7e55b3","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"###Completar\nfrom sklearn.metrics import f1_score\nf1_score(y_test, y_test_pred) ","metadata":{"id":"jvBtOaZiZfFk","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"bfebfec0f3cf43f988b852314e77b9f0","outputId":"04cf9487-be3a-42d5-d515-fb5471ecdca2","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":240,"timestamp":1650987572283},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0.7777777777777777"},"metadata":{},"execution_count":18}],"execution_count":18},{"cell_type":"markdown","source":"\nCreated in deepnote.com \nCreated in Deepnote","metadata":{"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"RF y Arboles (Ejemplo 2 - Alumnos).ipynb","provenance":[],"collapsed_sections":[]},"deepnote":{},"kernelspec":{"name":"python3","language":"python","display_name":"Python 3"},"varInspector":{"cols":{"lenVar":40,"lenName":16,"lenType":16},"kernels_config":{"r":{"library":"var_list.r","varRefreshCmd":"cat(var_dic_list()) ","delete_cmd_prefix":"rm(","delete_cmd_postfix":") "},"python":{"library":"var_list.py","varRefreshCmd":"print(var_dic_list())","delete_cmd_prefix":"del ","delete_cmd_postfix":""}},"window_display":false,"types_to_exclude":["module","function","builtin_function_or_method","instance","_Feature"]},"language_info":{"name":"python","version":"3.8.5","mimetype":"text/x-python","file_extension":".py","pygments_lexer":"ipython3","codemirror_mode":{"name":"ipython","version":3},"nbconvert_exporter":"python"},"deepnote_notebook_id":"a6fdd3d5ed1f4b0eb430b9fd4345c414","deepnote_execution_queue":[]}}