{"cells":[{"cell_type":"code","source":"from google.colab import drive\nimport os\ndrive.mount('/content/drive')\n# Establecer ruta de acceso en drive\nimport os\nprint(os.getcwd())\nos.chdir(\"/content/drive/My Drive\")\nprint(os.getcwd())","metadata":{"id":"N-aXaawsE735","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"3eebee2f5f934d6ea0591b979614234d","outputId":"62eb308d-02aa-42f9-a92f-db04e4cc36c2","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":2470,"user_tz":240,"timestamp":1652654218422},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n/content/drive/My Drive\n/content/drive/My Drive\n"}],"execution_count":10},{"cell_type":"code","source":"import numpy as np \nimport pandas as pd \nimport matplotlib.pyplot as plt \nimport seaborn as sns \n\ndf = pd.read_csv('Breast_cancer_data.csv')\ndf.head()","metadata":{"id":"NXgGXyKtE3AF","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"93d06bdddfdb4b93b9ba8e8a5482105c","outputId":"6ab28d83-46d3-4952-85ce-8e64ec853c6b","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":664,"user_tz":240,"timestamp":1652654220114},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" mean_radius mean_texture mean_perimeter mean_area mean_smoothness \\\n0 17.99 10.38 122.80 1001.0 0.11840 \n1 20.57 17.77 132.90 1326.0 0.08474 \n2 19.69 21.25 130.00 1203.0 0.10960 \n3 11.42 20.38 77.58 386.1 0.14250 \n4 20.29 14.34 135.10 1297.0 0.10030 \n\n diagnosis \n0 0 \n1 0 \n2 0 \n3 0 \n4 0 ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
mean_radiusmean_texturemean_perimetermean_areamean_smoothnessdiagnosis
017.9910.38122.801001.00.118400
120.5717.77132.901326.00.084740
219.6921.25130.001203.00.109600
311.4220.3877.58386.10.142500
420.2914.34135.101297.00.100300
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":11}],"execution_count":11},{"cell_type":"code","source":"#Info\ndf.info()","metadata":{"id":"jZCsSqjXE3AL","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"99d2d9c0f6be40f1ad3cfe6ee071050a","outputId":"ddefcb89-ff37-49e1-a36d-23c5b3c2964d","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":268,"user_tz":240,"timestamp":1652654222572},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"\nRangeIndex: 569 entries, 0 to 568\nData columns (total 6 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 mean_radius 569 non-null float64\n 1 mean_texture 569 non-null float64\n 2 mean_perimeter 569 non-null float64\n 3 mean_area 569 non-null float64\n 4 mean_smoothness 569 non-null float64\n 5 diagnosis 569 non-null int64 \ndtypes: float64(5), int64(1)\nmemory usage: 26.8 KB\n"}],"execution_count":12},{"cell_type":"code","source":"#Analizamos la variable target y su frecuencia\ndf['diagnosis'].value_counts()","metadata":{"id":"24okdbeFE3AM","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"621f894fedd742a798ec4b6375f78a88","outputId":"611b6c89-9d4d-4700-c581-c602f5297cbc","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":7,"user_tz":240,"timestamp":1652654222817},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"1 357\n0 212\nName: diagnosis, dtype: int64"},"metadata":{},"execution_count":13}],"execution_count":13},{"cell_type":"code","source":"#Definimos X e y\nX = df[['mean_radius','mean_texture','mean_perimeter','mean_area','mean_smoothness']]\ny = df['diagnosis']","metadata":{"id":"zMgPm_xLE3AO","cell_id":"7e630a04d83f4ae5a68841ab2b071cca","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":6,"user_tz":240,"timestamp":1652654225092},"deepnote_cell_type":"code"},"outputs":[],"execution_count":14},{"cell_type":"code","source":"X","metadata":{"id":"3X49vw_4FZRb","colab":{"height":423,"base_uri":"https://localhost:8080/"},"cell_id":"28799dab13994fd28ba477a2769b028e","outputId":"ce83d672-ec09-481b-874e-bbc5df337acf","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":7,"user_tz":240,"timestamp":1652654226392},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" mean_radius mean_texture mean_perimeter mean_area mean_smoothness\n0 17.99 10.38 122.80 1001.0 0.11840\n1 20.57 17.77 132.90 1326.0 0.08474\n2 19.69 21.25 130.00 1203.0 0.10960\n3 11.42 20.38 77.58 386.1 0.14250\n4 20.29 14.34 135.10 1297.0 0.10030\n.. ... ... ... ... ...\n564 21.56 22.39 142.00 1479.0 0.11100\n565 20.13 28.25 131.20 1261.0 0.09780\n566 16.60 28.08 108.30 858.1 0.08455\n567 20.60 29.33 140.10 1265.0 0.11780\n568 7.76 24.54 47.92 181.0 0.05263\n\n[569 rows x 5 columns]","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
mean_radiusmean_texturemean_perimetermean_areamean_smoothness
017.9910.38122.801001.00.11840
120.5717.77132.901326.00.08474
219.6921.25130.001203.00.10960
311.4220.3877.58386.10.14250
420.2914.34135.101297.00.10030
..................
56421.5622.39142.001479.00.11100
56520.1328.25131.201261.00.09780
56616.6028.08108.30858.10.08455
56720.6029.33140.101265.00.11780
5687.7624.5447.92181.00.05263
\n

569 rows × 5 columns

\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":15}],"execution_count":15},{"cell_type":"code","source":"y","metadata":{"id":"cNWH1eX6FaKv","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"ccf49a8bff0545c4b108cd3e801cd801","outputId":"a06c743d-fd41-4f9c-b450-90d7afb571b2","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":5,"user_tz":240,"timestamp":1652654228745},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0 0\n1 0\n2 0\n3 0\n4 0\n ..\n564 0\n565 0\n566 0\n567 0\n568 1\nName: diagnosis, Length: 569, dtype: int64"},"metadata":{},"execution_count":16}],"execution_count":16},{"cell_type":"code","source":"#Separamos en Train y Test\nfrom sklearn.model_selection import train_test_split\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)","metadata":{"id":"Wqt3BFEqE3AP","cell_id":"497556e69aa9467d81c90d84911cd57e","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":305,"user_tz":240,"timestamp":1652654230613},"deepnote_cell_type":"code"},"outputs":[],"execution_count":17},{"cell_type":"code","source":"!pip install lightgbm","metadata":{"id":"BbGunKHmKB-9","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"c91a3c14b3454bffb928f00b836a12ac","outputId":"2b0edaa9-fe89-4331-ba0d-6a80c572d4c6","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3779,"user_tz":240,"timestamp":1652654235087},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Requirement already satisfied: lightgbm in /usr/local/lib/python3.7/dist-packages (2.2.3)\nRequirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (from lightgbm) (1.0.2)\nRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from lightgbm) (1.21.6)\nRequirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from lightgbm) (1.4.1)\nRequirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->lightgbm) (1.1.0)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->lightgbm) (3.1.0)\n"}],"execution_count":18},{"cell_type":"code","source":"import lightgbm as lgb #pip install lightgbm\nclf = lgb.LGBMClassifier()\nclf.fit(X_train, y_train)\ny_pred=clf.predict(X_test)","metadata":{"id":"lM4KMo16E3AQ","cell_id":"4ad7f912dfa74668ad18e32a94f8ae13","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":355,"user_tz":240,"timestamp":1652654235433},"deepnote_cell_type":"code"},"outputs":[],"execution_count":19},{"cell_type":"code","source":"#Accuracy\nfrom sklearn.metrics import accuracy_score\naccuracy=accuracy_score(y_pred, y_test)\nprint('LightGBM Model accuracy score: {0:0.4f}'.format(accuracy_score(y_test, y_pred)))","metadata":{"id":"D9J_HLf8E3AQ","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"71f6c11502da476fb8120c0b456f48dd","outputId":"548c5fac-a623-4e38-de3c-4d64bcbe96d0","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":240,"timestamp":1652654239488},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"LightGBM Model accuracy score: 0.9591\n"}],"execution_count":20},{"cell_type":"markdown","source":"\nCreated in deepnote.com \nCreated in Deepnote","metadata":{"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"toc":{"sideBar":true,"nav_menu":{},"toc_cell":false,"title_cell":"Table of Contents","toc_position":{},"skip_h1_title":false,"title_sidebar":"Contents","base_numbering":1,"number_sections":true,"toc_window_display":false,"toc_section_display":true},"colab":{"name":"LightGBM - CoderHouse (Ejemplo 3).ipynb","provenance":[],"collapsed_sections":[]},"deepnote":{},"kernelspec":{"name":"python3","language":"python","display_name":"Python 3"},"varInspector":{"cols":{"lenVar":40,"lenName":16,"lenType":16},"kernels_config":{"r":{"library":"var_list.r","varRefreshCmd":"cat(var_dic_list()) ","delete_cmd_prefix":"rm(","delete_cmd_postfix":") "},"python":{"library":"var_list.py","varRefreshCmd":"print(var_dic_list())","delete_cmd_prefix":"del ","delete_cmd_postfix":""}},"window_display":false,"types_to_exclude":["module","function","builtin_function_or_method","instance","_Feature"]},"language_info":{"name":"python","version":"3.8.5","mimetype":"text/x-python","file_extension":".py","pygments_lexer":"ipython3","codemirror_mode":{"name":"ipython","version":3},"nbconvert_exporter":"python"},"deepnote_notebook_id":"cf373978742b4e28969514c568425a89","deepnote_execution_queue":[]}}