{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ca555af9", "metadata": { "execution": { "iopub.execute_input": "2022-04-20T18:28:17.790372Z", "iopub.status.busy": "2022-04-20T18:28:17.789010Z", "iopub.status.idle": "2022-04-20T18:28:20.247943Z", "shell.execute_reply": "2022-04-20T18:28:20.248470Z", "shell.execute_reply.started": "2022-04-20T09:21:59.649817Z" }, "id": "jfZyocATxtuY", "outputId": "241ce5e4-6e24-4b24-ea2f-fb71ec44c241", "papermill": { "duration": 2.481663, "end_time": "2022-04-20T18:28:20.248767", "exception": false, "start_time": "2022-04-20T18:28:17.767104", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to /usr/share/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n", "[nltk_data] Downloading package punkt to /usr/share/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#imports\n", "import pandas as pd\n", "from imblearn.pipeline import Pipeline, make_pipeline\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from imblearn.under_sampling import RandomUnderSampler\n", "from sklearn.preprocessing import MultiLabelBinarizer\n", "\n", "from sklearn.metrics import coverage_error\n", "from sklearn.metrics import label_ranking_average_precision_score\n", "\n", "\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.svm import LinearSVC\n", "from sklearn import metrics\n", "from nltk import word_tokenize\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import multilabel_confusion_matrix\n", "import matplotlib.pyplot as plt\n", "\n", "import nltk\n", "nltk.download('stopwords')\n", "from nltk.corpus import stopwords\n", "nltk.download('punkt')" ] }, { "cell_type": "markdown", "id": "43c1970f", "metadata": { "id": "_V7ars6WxrbQ", "papermill": { "duration": 0.016705, "end_time": "2022-04-20T18:28:20.282691", "exception": false, "start_time": "2022-04-20T18:28:20.265986", "status": "completed" }, "tags": [] }, "source": [ "# Data preparation" ] }, { "cell_type": "code", "execution_count": 2, "id": "4530543e", "metadata": { "execution": { "iopub.execute_input": "2022-04-20T18:28:20.325972Z", "iopub.status.busy": "2022-04-20T18:28:20.325191Z", "iopub.status.idle": "2022-04-20T18:28:21.450647Z", "shell.execute_reply": "2022-04-20T18:28:21.450097Z", "shell.execute_reply.started": "2022-04-20T09:22:00.035051Z" }, "papermill": { "duration": 1.15018, "end_time": "2022-04-20T18:28:21.450806", "exception": false, "start_time": "2022-04-20T18:28:20.300626", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | descripcion | \n", "03 | \n", "09 | \n", "14 | \n", "15 | \n", "16 | \n", "18 | \n", "19 | \n", "22 | \n", "24 | \n", "... | \n", "73 | \n", "75 | \n", "76 | \n", "77 | \n", "79 | \n", "80 | \n", "85 | \n", "90 | \n", "92 | \n", "98 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Contrato Administrativo de Servicios de diseño... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
1 | \n", "2019(Y)1535 Construcción escalera de emergenci... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
2 | \n", "Suministro de energía electrica de diversas in... | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
3 | \n", "Servicio desplazamiento del personal operativo... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
4 | \n", "Contrato de suministro de gas natural en los ... | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
5 rows × 46 columns
\n", "\n", " | 03 | \n", "09 | \n", "14 | \n", "15 | \n", "16 | \n", "18 | \n", "19 | \n", "22 | \n", "24 | \n", "30 | \n", "... | \n", "73 | \n", "75 | \n", "76 | \n", "77 | \n", "79 | \n", "80 | \n", "85 | \n", "90 | \n", "92 | \n", "98 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
2 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
3 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
4 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
72424 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
72425 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
72426 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
72427 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
72428 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
72429 rows × 45 columns
\n", "