{"cells":[{"cell_type":"markdown","source":"# Xgboost - Clasificación","metadata":{"id":"aDll5YpdFlNF","cell_id":"a37dd8d1e70844e185cc7237f52b3dbd","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"from google.colab import drive\nimport os\ndrive.mount('/content/drive')\n# Establecer ruta de acceso en drive\nimport os\nprint(os.getcwd())\nos.chdir(\"/content/drive/My Drive\")\nprint(os.getcwd())","metadata":{"id":"SUC155scFoc0","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"4d565104c74045729aabd0a42c0309c6","outputId":"85939bbd-dce9-401b-da0c-3759ddd5af51","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":2145,"user_tz":240,"timestamp":1652654065938},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n/content/drive/My Drive\n/content/drive/My Drive\n"}],"execution_count":7},{"cell_type":"code","source":"!pip install xgboost","metadata":{"id":"q6dwM_FPLLkX","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"c678a5ee9c5b455f96a6fb7595c92ffa","outputId":"9ce03532-3b3c-4cd8-f4a5-a4873c01c0ce","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3697,"user_tz":240,"timestamp":1652654036966},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Requirement already satisfied: xgboost in /usr/local/lib/python3.7/dist-packages (0.90)\nRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from xgboost) (1.21.6)\nRequirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from xgboost) (1.4.1)\n"}],"execution_count":2},{"cell_type":"code","source":"import xgboost as xgb #pip install xgboost\nimport pandas as pd\nimport numpy as np\nfrom sklearn.linear_model import LinearRegression as LR\nfrom sklearn.model_selection import train_test_split\n\ndata = pd.read_csv('winequality-red.csv')\ndata","metadata":{"id":"GIdK0mGJFlNT","colab":{"height":423,"base_uri":"https://localhost:8080/"},"cell_id":"cbaf98dad5524a948fe61df0746d31f8","outputId":"45f28b10-ebce-485e-8f7d-0165dbc6ff8d","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":647,"user_tz":240,"timestamp":1652654072498},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n0 7.4 0.700 0.00 1.9 0.076 \n1 7.8 0.880 0.00 2.6 0.098 \n2 7.8 0.760 0.04 2.3 0.092 \n3 11.2 0.280 0.56 1.9 0.075 \n4 7.4 0.700 0.00 1.9 0.076 \n... ... ... ... ... ... \n1594 6.2 0.600 0.08 2.0 0.090 \n1595 5.9 0.550 0.10 2.2 0.062 \n1596 6.3 0.510 0.13 2.3 0.076 \n1597 5.9 0.645 0.12 2.0 0.075 \n1598 6.0 0.310 0.47 3.6 0.067 \n\n free sulfur dioxide total sulfur dioxide density pH sulphates \\\n0 11.0 34.0 0.99780 3.51 0.56 \n1 25.0 67.0 0.99680 3.20 0.68 \n2 15.0 54.0 0.99700 3.26 0.65 \n3 17.0 60.0 0.99800 3.16 0.58 \n4 11.0 34.0 0.99780 3.51 0.56 \n... ... ... ... ... ... \n1594 32.0 44.0 0.99490 3.45 0.58 \n1595 39.0 51.0 0.99512 3.52 0.76 \n1596 29.0 40.0 0.99574 3.42 0.75 \n1597 32.0 44.0 0.99547 3.57 0.71 \n1598 18.0 42.0 0.99549 3.39 0.66 \n\n alcohol quality \n0 9.4 5 \n1 9.8 5 \n2 9.8 5 \n3 9.8 6 \n4 9.4 5 \n... ... ... \n1594 10.5 5 \n1595 11.2 6 \n1596 11.0 6 \n1597 10.2 5 \n1598 11.0 6 \n\n[1599 rows x 12 columns]","text/html":"\n
\n | fixed acidity | \nvolatile acidity | \ncitric acid | \nresidual sugar | \nchlorides | \nfree sulfur dioxide | \ntotal sulfur dioxide | \ndensity | \npH | \nsulphates | \nalcohol | \nquality | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n7.4 | \n0.700 | \n0.00 | \n1.9 | \n0.076 | \n11.0 | \n34.0 | \n0.99780 | \n3.51 | \n0.56 | \n9.4 | \n5 | \n
1 | \n7.8 | \n0.880 | \n0.00 | \n2.6 | \n0.098 | \n25.0 | \n67.0 | \n0.99680 | \n3.20 | \n0.68 | \n9.8 | \n5 | \n
2 | \n7.8 | \n0.760 | \n0.04 | \n2.3 | \n0.092 | \n15.0 | \n54.0 | \n0.99700 | \n3.26 | \n0.65 | \n9.8 | \n5 | \n
3 | \n11.2 | \n0.280 | \n0.56 | \n1.9 | \n0.075 | \n17.0 | \n60.0 | \n0.99800 | \n3.16 | \n0.58 | \n9.8 | \n6 | \n
4 | \n7.4 | \n0.700 | \n0.00 | \n1.9 | \n0.076 | \n11.0 | \n34.0 | \n0.99780 | \n3.51 | \n0.56 | \n9.4 | \n5 | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
1594 | \n6.2 | \n0.600 | \n0.08 | \n2.0 | \n0.090 | \n32.0 | \n44.0 | \n0.99490 | \n3.45 | \n0.58 | \n10.5 | \n5 | \n
1595 | \n5.9 | \n0.550 | \n0.10 | \n2.2 | \n0.062 | \n39.0 | \n51.0 | \n0.99512 | \n3.52 | \n0.76 | \n11.2 | \n6 | \n
1596 | \n6.3 | \n0.510 | \n0.13 | \n2.3 | \n0.076 | \n29.0 | \n40.0 | \n0.99574 | \n3.42 | \n0.75 | \n11.0 | \n6 | \n
1597 | \n5.9 | \n0.645 | \n0.12 | \n2.0 | \n0.075 | \n32.0 | \n44.0 | \n0.99547 | \n3.57 | \n0.71 | \n10.2 | \n5 | \n
1598 | \n6.0 | \n0.310 | \n0.47 | \n3.6 | \n0.067 | \n18.0 | \n42.0 | \n0.99549 | \n3.39 | \n0.66 | \n11.0 | \n6 | \n
1599 rows × 12 columns
\n\n | fixed acidity | \nvolatile acidity | \ncitric acid | \nresidual sugar | \nchlorides | \nfree sulfur dioxide | \ntotal sulfur dioxide | \ndensity | \npH | \nsulphates | \nalcohol | \nquality | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n7.4 | \n0.700 | \n0.00 | \n1.9 | \n0.076 | \n11.0 | \n34.0 | \n0.99780 | \n3.51 | \n0.56 | \n9.4 | \n0 | \n
1 | \n7.8 | \n0.880 | \n0.00 | \n2.6 | \n0.098 | \n25.0 | \n67.0 | \n0.99680 | \n3.20 | \n0.68 | \n9.8 | \n0 | \n
2 | \n7.8 | \n0.760 | \n0.04 | \n2.3 | \n0.092 | \n15.0 | \n54.0 | \n0.99700 | \n3.26 | \n0.65 | \n9.8 | \n0 | \n
3 | \n11.2 | \n0.280 | \n0.56 | \n1.9 | \n0.075 | \n17.0 | \n60.0 | \n0.99800 | \n3.16 | \n0.58 | \n9.8 | \n1 | \n
4 | \n7.4 | \n0.700 | \n0.00 | \n1.9 | \n0.076 | \n11.0 | \n34.0 | \n0.99780 | \n3.51 | \n0.56 | \n9.4 | \n0 | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
1594 | \n6.2 | \n0.600 | \n0.08 | \n2.0 | \n0.090 | \n32.0 | \n44.0 | \n0.99490 | \n3.45 | \n0.58 | \n10.5 | \n0 | \n
1595 | \n5.9 | \n0.550 | \n0.10 | \n2.2 | \n0.062 | \n39.0 | \n51.0 | \n0.99512 | \n3.52 | \n0.76 | \n11.2 | \n1 | \n
1596 | \n6.3 | \n0.510 | \n0.13 | \n2.3 | \n0.076 | \n29.0 | \n40.0 | \n0.99574 | \n3.42 | \n0.75 | \n11.0 | \n1 | \n
1597 | \n5.9 | \n0.645 | \n0.12 | \n2.0 | \n0.075 | \n32.0 | \n44.0 | \n0.99547 | \n3.57 | \n0.71 | \n10.2 | \n0 | \n
1598 | \n6.0 | \n0.310 | \n0.47 | \n3.6 | \n0.067 | \n18.0 | \n42.0 | \n0.99549 | \n3.39 | \n0.66 | \n11.0 | \n1 | \n
1599 rows × 12 columns
\n\n | CRIM | \nZN | \nINDUS | \nCHAS | \nNOX | \nRM | \nAGE | \nDIS | \nRAD | \nTAX | \nPTRATIO | \nB | \nLSTAT | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n0.00632 | \n18.0 | \n2.31 | \n0.0 | \n0.538 | \n6.575 | \n65.2 | \n4.0900 | \n1.0 | \n296.0 | \n15.3 | \n396.90 | \n4.98 | \n
1 | \n0.02731 | \n0.0 | \n7.07 | \n0.0 | \n0.469 | \n6.421 | \n78.9 | \n4.9671 | \n2.0 | \n242.0 | \n17.8 | \n396.90 | \n9.14 | \n
2 | \n0.02729 | \n0.0 | \n7.07 | \n0.0 | \n0.469 | \n7.185 | \n61.1 | \n4.9671 | \n2.0 | \n242.0 | \n17.8 | \n392.83 | \n4.03 | \n
3 | \n0.03237 | \n0.0 | \n2.18 | \n0.0 | \n0.458 | \n6.998 | \n45.8 | \n6.0622 | \n3.0 | \n222.0 | \n18.7 | \n394.63 | \n2.94 | \n
4 | \n0.06905 | \n0.0 | \n2.18 | \n0.0 | \n0.458 | \n7.147 | \n54.2 | \n6.0622 | \n3.0 | \n222.0 | \n18.7 | \n396.90 | \n5.33 | \n