{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys, os\n", "\n", "# Disable print\n", "def blockPrint():\n", " sys.stdout = open(os.devnull, 'w')\n", " \n", "# Restore print\n", "def enablePrint():\n", " sys.stdout = sys.__stdout__\n", "\n", "blockPrint()\n", "\n", "!python -m spacy download es_core_news_sm\n", "\n", "enablePrint()\n", "\n", "import pickle\n", "import re\n", "import numpy as np\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "from spacy.lang.es.stop_words import STOP_WORDS\n", "import es_core_news_sm\n", "import spacy\n", "nlp = spacy.load('es_core_news_sm')\n", "import pandas as pd\n", "from ipywidgets import widgets, interactive, Layout\n", "from IPython.display import clear_output\n", "\n", "ejemplos = pd.read_csv('data/ejemplos.csv')\n", "\n", "with open('src/Final_Models/W2Vmodel.pickle', 'rb') as f:\n", " W2V = pickle.load(f)\n", "with open('src/Final_Models/ML_model_est_Ao_SVM.pickle', 'rb') as f:\n", " model_est_Ao = pickle.load(f)\n", "with open('src/Final_Models/ML_model_est_Mv_SVM.pickle', 'rb') as f:\n", " model_est_Mv = pickle.load(f)\n", "with open('src/Final_Models/ML_model_insf_Ao_SVM.pickle', 'rb') as f:\n", " model_insf_Ao = pickle.load(f)\n", "with open('src/Final_Models/ML_model_insf_Mv_SVM.pickle', 'rb') as f:\n", " model_insf_Mv = pickle.load(f)\n", "with open('src/Final_Models/ML_model_prot_Ao_SVM.pickle', 'rb') as f:\n", " model_prot_Ao = pickle.load(f)\n", "with open('src/Final_Models/ML_model_prot_Mv_SVM.pickle', 'rb') as f:\n", " model_prot_Mv = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def multiple_replace(text):\n", " dic = {r\"cia a\" : \"ciaa\",\"cia m\" : \"ciam\",\"sis a\" : \"sisa\", \"sis m\" : \"sism\",\n", " 'á' : 'a', 'é' : 'e', 'í' : 'i', 'ó' : 'o', 'ú' : 'u' }\n", " # Create a regular expression from the dictionary keys\n", " regex = re.compile(\"(%s)\" % \"|\".join(map(re.escape, dic.keys())))\n", " # For each match, look-up corresponding value in dictionary\n", " return regex.sub(lambda mo: dic[mo.string[mo.start():mo.end()]], text.lower())\n", "\n", "def clean_text(text):\n", " # Create a regular expression from the dictionary keys\n", " if isinstance(text, str):\n", " dic = { r'[^\\w.]' : ' ', '[ 0-9 ]' : ''}\n", " regex = re.compile(r'(%s)' % \"|\".join(dic.keys()))\n", " lst = regex.sub(lambda mo: dic[[ k for k in dic if re.search(k, mo.string[mo.start():mo.end()])][0]], text).lower()\n", " else:\n", " lst = ' '\n", " return [el for el in multiple_replace(lst).split('.') if re.search(\"[a-z]\", el)]\n", "\n", "def tokenize_and_lemm_spacy(text):\n", " doc = nlp(text)\n", " lemmas = []\n", " for token in doc:\n", " if token.is_stop== False: # aprovechamos para eliminar ya las stopwords\n", " if token.is_alpha== True: # Nos quedamos solo con los tokens que contienen letras \n", " if token.pos_ not in ['CONJ', 'ADP', 'DET']: # eliminamos nombres propios, conjunciones, determinantes\n", " lemmas.append(token.lemma_.lower())\n", " return lemmas\n", "\n", "def tokenize_and_lemm_spacy_lst(lst):\n", " return [tokenize_and_lemm_spacy(el) for el in lst]\n", "\n", "def get_w2v_features(w2v_model, sentence_group):\n", " \"\"\" Transform a sentence_group (containing multiple lists\n", " of words) into a feature vector. It averages out all the\n", " word vectors of the sentence_group.\n", " \"\"\"\n", " words = np.concatenate(sentence_group) # words in text\n", " index2word_set = set(w2v_model.wv.vocab.keys()) # words known to model\n", " \n", " featureVec = np.zeros(w2v_model.vector_size, dtype=\"float32\")\n", " \n", " # Initialize a counter for number of words in a review\n", " nwords = 0\n", " # Loop over each word in the comment and, if it is in the model's vocabulary, add its feature vector to the total\n", " for word in words:\n", " if word in index2word_set: \n", " featureVec = np.add(featureVec, w2v_model[word])\n", " nwords += 1.\n", "\n", " # Divide the result by the number of words to get the average\n", " if nwords > 0:\n", " featureVec = np.divide(featureVec, nwords)\n", " return featureVec" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def get_resp(X, model, cut_off, valv):\n", " prob = model.predict_proba(X)\n", " if 'Nativa' in valv:\n", " if prob[:,1] > cut_off[valv]:\n", " respuesta = 'No'\n", " else:\n", " respuesta = 'Sí'\n", " else:\n", " if prob[:,1] > cut_off[valv]:\n", " respuesta = 'Sí'\n", " else:\n", " respuesta = 'No'\n", " return respuesta\n", "\n", "def get_dropdown(resp, valv):\n", " dropdown = widgets.Dropdown(\n", " options=['Sí', 'No'],\n", " value = resp,\n", " layout= Layout(width='90%')\n", " )\n", " return dropdown\n", "\n", "cut_off = {'Válvula Aórtica Nativa' : 0.7, 'Válvula Mitral Nativa' : 0.7, \n", " 'Estenosis Aórtica' : 0.9, 'Estenosis Mitral' : 0.7, \n", " 'Insuficiencia Aórtica' : 0.7, 'Insuficiencia Mitral' : 0.7}\n", "\n", "models = {'Válvula Aórtica Nativa' : model_prot_Ao, 'Válvula Mitral Nativa' : model_prot_Mv, \n", " 'Estenosis Aórtica' : model_est_Ao, 'Estenosis Mitral' : model_est_Mv, \n", " 'Insuficiencia Aórtica' : model_insf_Ao, 'Insuficiencia Mitral' : model_insf_Mv}\n", "\n", "dropdowns = {}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def callback(w):\n", " with output:\n", " clear_output()\n", " try:\n", " txt = tokenize_and_lemm_spacy_lst(clean_text(conclusion.value))\n", " w2v_features = list(map(lambda sen_group: get_w2v_features(W2V, sen_group), [txt]))\n", " X_w2v = np.array(list(map(np.array, w2v_features)))\n", " for valv in cut_off.keys():\n", " dropdowns[valv] = get_dropdown(get_resp(X_w2v, models[valv],cut_off, valv),valv)\n", " \n", " cols = [(valv, [dropdowns[valv]]) for i, valv in enumerate(dropdowns.keys())]\n", " \n", " vboxes = []\n", " for valv, data in cols:\n", " vboxes.append(widgets.VBox([widgets.HTML('%s' % valv)] + data, layout= Layout(width='200px')))\n", " \n", " display(widgets.HBox(vboxes, layout=widgets.Layout(width='70%')), widgets.Button(description='Commit'))\n", " except:\n", " pass" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def callback_ej(w):\n", " conclusion.value = ejemplos.sample(n=1).values[0][0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# App (Demo)\n", "\n", "[comment]: <> (

)\n", "\n", "## Conclusión Estudio Ecocardiográfico\n", "\n", "El botón ejemplo genera ejemplos aleatorios de reportes ecocardiográficos." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b1a016b8b3374872bf47cdeff1ce9f3b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(Button(description='Ejemplo', style=ButtonStyle()), VBox(children=(Textarea(value='', layout=La…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "conclusion = widgets.Textarea(description='', placeholder='Type something',\n", " continuous_update=True, disabled=False,\n", " layout= Layout(width='80%', height='140px'))\n", "\n", "tab2a = widgets.VBox(children=[conclusion])\n", "ejemplo = widgets.Button(description='Ejemplo')\n", "ejemplo.on_click(callback_ej)\n", "submit = widgets.Button(description='Submit')\n", "submit.on_click(callback)\n", "\n", "output = widgets.Output()\n", "gui = widgets.VBox(children=[ejemplo,tab2a, submit, output])\n", "gui" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Environment (conda_nlp)", "language": "python", "name": "conda_nlp" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }