{"cells":[{"cell_type":"code","source":"#Importacion de las librerias\nimport numpy as np\nimport pandas as pd","metadata":{"id":"kHT2KF6uEzIi","cell_id":"48e138b094a64d6e83590c84322ced92","deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from google.colab import drive\nimport os\ndrive.mount('/content/gdrive')\n# Establecer ruta de acceso en dr\nimport os\nprint(os.getcwd())\nos.chdir(\"/content/gdrive/My Drive\")","metadata":{"id":"XJKp1PUsE0sA","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"d5b9df7977754c5e9816b1a97e21637a","outputId":"75a4f9f1-f670-4a88-c049-a2b433e6e2b0","executionInfo":{"user":{"userId":"04741209928239412574","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi4e7mWJaOA2l-1KUn-omyigRGSrm83lG6XLzS5=s64","displayName":"david francisco bustos usta"},"status":"ok","elapsed":18837,"user_tz":300,"timestamp":1633566709696},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Mounted at /content/gdrive\n/content\n"}],"execution_count":null},{"cell_type":"code","source":"#Importacion del conjunto de datos\nnames = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']\ndataset = pd.read_csv(\"iris.data\", names=names)","metadata":{"id":"qGlPuzcOEzIs","cell_id":"434e8003916441608adcbcaa517db40d","deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#Veamos el dataset\ndataset.head()","metadata":{"id":"tAtldIbJEzIt","colab":{"height":205,"base_uri":"https://localhost:8080/"},"cell_id":"2dbde7776ba241249cb1ed2433a31d85","outputId":"1056561d-02cb-4cbb-bb56-18e47682bf80","executionInfo":{"user":{"userId":"04741209928239412574","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi4e7mWJaOA2l-1KUn-omyigRGSrm83lG6XLzS5=s64","displayName":"david francisco bustos usta"},"status":"ok","elapsed":242,"user_tz":300,"timestamp":1633566738974},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
sepal-lengthsepal-widthpetal-lengthpetal-widthClass
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
\n
","text/plain":" sepal-length sepal-width petal-length petal-width Class\n0 5.1 3.5 1.4 0.2 Iris-setosa\n1 4.9 3.0 1.4 0.2 Iris-setosa\n2 4.7 3.2 1.3 0.2 Iris-setosa\n3 4.6 3.1 1.5 0.2 Iris-setosa\n4 5.0 3.6 1.4 0.2 Iris-setosa"},"metadata":{},"execution_count":4}],"execution_count":null},{"cell_type":"code","source":"#Preprocesamiento\nX = dataset.drop('Class', 1)\ny = dataset['Class']","metadata":{"id":"UspSOYGkEzIu","cell_id":"1115e6befb2b480da10220edbc58d659","deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#Separamos en train y test\nfrom sklearn.model_selection import train_test_split\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)","metadata":{"id":"Xzp1-sOXEzIu","cell_id":"0f48411d94fc4860a5609e75edd890d3","deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#Normalizamos los datos para que PCA funcione mejor!\nfrom sklearn.preprocessing import StandardScaler\n\nsc = StandardScaler()\nX_train = sc.fit_transform(X_train)\nX_test = sc.transform(X_test)","metadata":{"id":"tgg3dY8KEzIv","cell_id":"8cb7685b01e24dd1bce4db44726433e1","deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#Aplicacion de PCA\nfrom sklearn.decomposition import PCA\n\npca = PCA()\nX_train = pca.fit_transform(X_train)\nX_test = pca.transform(X_test)","metadata":{"id":"TjgPP6YzEzIw","cell_id":"9a57e172cfce4008bc36837459308845","deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#Análisis de la varianza explicada para cada componente\nexplained_variance = pca.explained_variance_ratio_\nexplained_variance","metadata":{"id":"Zqefiv9YEzIx","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"8149ee7faa544b8db72f25b14d84ee74","outputId":"af87ef5b-cdb7-493c-adac-47f6cd45e62e","executionInfo":{"user":{"userId":"04741209928239412574","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi4e7mWJaOA2l-1KUn-omyigRGSrm83lG6XLzS5=s64","displayName":"david francisco bustos usta"},"status":"ok","elapsed":230,"user_tz":300,"timestamp":1633566888659},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array([0.72317687, 0.23215265, 0.03983465, 0.00483583])"},"metadata":{},"execution_count":10}],"execution_count":null},{"cell_type":"markdown","source":"**Insights**:\n\nEl primer componente principal es responsable de la varianza del 72,22%. De manera similar, el segundo componente principal causa una variación del 23,9% en el conjunto de datos. En conjunto, podemos decir que (72,22 + 23,9) el 96,21% por ciento de la información de clasificación contenida en el conjunto de características es capturada por los dos primeros componentes principales.","metadata":{"id":"42KqzK0jEzIy","cell_id":"4674d68b267a434aaafa33d2e0bcc559","deepnote_cell_type":"markdown"}},{"cell_type":"markdown","source":"Para finalizar, usemos una sola componente para entrenar un modelo de Random Forest y evaluar qué tan bien funciona! ","metadata":{"id":"SvAqsGqfEzI0","cell_id":"3eccbaba58b34ae797e01c579c04e054","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"from sklearn.decomposition import PCA\n\npca = PCA(n_components=1)\nX_train = pca.fit_transform(X_train)\nX_test = pca.transform(X_test)","metadata":{"id":"Gfn4XfKEEzI1","cell_id":"c51d7c1f10c4439d93178214ea57a3fd","deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from sklearn.ensemble import RandomForestClassifier\n\n# Creación del modelo\nmodel_rf = RandomForestClassifier(max_depth=2, random_state=0)\nmodel_rf.fit(X_train, y_train)\n\n# Predicción\ny_pred = model_rf.predict(X_test)","metadata":{"id":"QvNUiA6cEzI1","cell_id":"4304fba7239e49e287b920496eefad62","deepnote_cell_type":"code"},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#Evaluación básica del modelo\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn.metrics import accuracy_score\n\naccuracy = accuracy_score(y_test, y_pred)\nprint('El accuracy del modelo es:', accuracy)","metadata":{"id":"phj_73NBEzI2","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"fbe94863de7c409fa3bf6567c35540ac","outputId":"e659258e-6c1f-4b6d-9115-db619cbd4dfa","executionInfo":{"user":{"userId":"04741209928239412574","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi4e7mWJaOA2l-1KUn-omyigRGSrm83lG6XLzS5=s64","displayName":"david francisco bustos usta"},"status":"ok","elapsed":409,"user_tz":300,"timestamp":1633044866566},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"El accuracy del modelo es: 0.9333333333333333\n"}],"execution_count":null},{"cell_type":"markdown","source":"**Insights**: Únicamente utilizando una componente tenemos un accuracy muy bueno para nuestro modelo 😉","metadata":{"id":"S_9zhSQPEzI3","cell_id":"2beea8bf38004ae5b636987288a47eb0","deepnote_cell_type":"markdown"}},{"cell_type":"markdown","source":"\nCreated in deepnote.com \nCreated in Deepnote","metadata":{"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"PCA - CoderHouse (Ejemplo 2).ipynb","provenance":[],"collapsed_sections":[]},"deepnote":{},"kernelspec":{"name":"python3","language":"python","display_name":"Python 3"},"varInspector":{"cols":{"lenVar":40,"lenName":16,"lenType":16},"kernels_config":{"r":{"library":"var_list.r","varRefreshCmd":"cat(var_dic_list()) ","delete_cmd_prefix":"rm(","delete_cmd_postfix":") "},"python":{"library":"var_list.py","varRefreshCmd":"print(var_dic_list())","delete_cmd_prefix":"del ","delete_cmd_postfix":""}},"window_display":false,"types_to_exclude":["module","function","builtin_function_or_method","instance","_Feature"]},"language_info":{"name":"python","version":"3.8.5","mimetype":"text/x-python","file_extension":".py","pygments_lexer":"ipython3","codemirror_mode":{"name":"ipython","version":3},"nbconvert_exporter":"python"},"deepnote_notebook_id":"7f569fbd2bde4a358ec4dcf3425f3c63","deepnote_execution_queue":[]}}