{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys, os\n",
    "\n",
    "# Disable print\n",
    "def blockPrint():\n",
    "    sys.stdout = open(os.devnull, 'w')\n",
    "    \n",
    "# Restore print\n",
    "def enablePrint():\n",
    "    sys.stdout = sys.__stdout__\n",
    "\n",
    "blockPrint()\n",
    "\n",
    "!python -m spacy download es_core_news_sm\n",
    "\n",
    "enablePrint()\n",
    "\n",
    "import pickle\n",
    "import re\n",
    "import numpy as np\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "from spacy.lang.es.stop_words import STOP_WORDS\n",
    "import es_core_news_sm\n",
    "import spacy\n",
    "nlp = spacy.load('es_core_news_sm')\n",
    "import pandas as pd\n",
    "from ipywidgets import widgets, interactive, Layout\n",
    "from IPython.display import clear_output\n",
    "\n",
    "ejemplos = pd.read_csv('data/ejemplos.csv')\n",
    "\n",
    "with open('src/Final_Models/W2Vmodel.pickle', 'rb') as f:\n",
    "    W2V = pickle.load(f)\n",
    "with open('src/Final_Models/ML_model_est_Ao_SVM.pickle', 'rb') as f:\n",
    "    model_est_Ao = pickle.load(f)\n",
    "with open('src/Final_Models/ML_model_est_Mv_SVM.pickle', 'rb') as f:\n",
    "    model_est_Mv = pickle.load(f)\n",
    "with open('src/Final_Models/ML_model_insf_Ao_SVM.pickle', 'rb') as f:\n",
    "    model_insf_Ao = pickle.load(f)\n",
    "with open('src/Final_Models/ML_model_insf_Mv_SVM.pickle', 'rb') as f:\n",
    "    model_insf_Mv = pickle.load(f)\n",
    "with open('src/Final_Models/ML_model_prot_Ao_SVM.pickle', 'rb') as f:\n",
    "    model_prot_Ao = pickle.load(f)\n",
    "with open('src/Final_Models/ML_model_prot_Mv_SVM.pickle', 'rb') as f:\n",
    "    model_prot_Mv = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def multiple_replace(text):\n",
    "    dic = {r\"cia a\" : \"ciaa\",\"cia m\" : \"ciam\",\"sis a\" : \"sisa\", \"sis m\" : \"sism\",\n",
    "           'á' : 'a', 'é' : 'e', 'í' : 'i', 'ó' : 'o', 'ú' : 'u' }\n",
    "    # Create a regular expression  from the dictionary keys\n",
    "    regex = re.compile(\"(%s)\" % \"|\".join(map(re.escape, dic.keys())))\n",
    "    # For each match, look-up corresponding value in dictionary\n",
    "    return regex.sub(lambda mo: dic[mo.string[mo.start():mo.end()]], text.lower())\n",
    "\n",
    "def clean_text(text):\n",
    "    # Create a regular expression  from the dictionary keys\n",
    "    if isinstance(text, str):\n",
    "        dic = { r'[^\\w.]' : ' ', '[ 0-9 ]' : ''}\n",
    "        regex = re.compile(r'(%s)' % \"|\".join(dic.keys()))\n",
    "        lst = regex.sub(lambda mo: dic[[ k for k in dic if re.search(k, mo.string[mo.start():mo.end()])][0]], text).lower()\n",
    "    else:\n",
    "        lst = ' '\n",
    "    return [el for el in multiple_replace(lst).split('.') if re.search(\"[a-z]\", el)]\n",
    "\n",
    "def tokenize_and_lemm_spacy(text):\n",
    "    doc = nlp(text)\n",
    "    lemmas = []\n",
    "    for token in doc:\n",
    "        if token.is_stop== False: # aprovechamos para eliminar ya las stopwords\n",
    "            if token.is_alpha== True: # Nos quedamos solo con los tokens que contienen letras \n",
    "                if token.pos_ not in ['CONJ', 'ADP', 'DET']: # eliminamos nombres propios, conjunciones, determinantes\n",
    "                    lemmas.append(token.lemma_.lower())\n",
    "    return lemmas\n",
    "\n",
    "def tokenize_and_lemm_spacy_lst(lst):\n",
    "    return [tokenize_and_lemm_spacy(el) for el in lst]\n",
    "\n",
    "def get_w2v_features(w2v_model, sentence_group):\n",
    "    \"\"\" Transform a sentence_group (containing multiple lists\n",
    "    of words) into a feature vector. It averages out all the\n",
    "    word vectors of the sentence_group.\n",
    "    \"\"\"\n",
    "    words = np.concatenate(sentence_group)  # words in text\n",
    "    index2word_set = set(w2v_model.wv.vocab.keys())  # words known to model\n",
    "    \n",
    "    featureVec = np.zeros(w2v_model.vector_size, dtype=\"float32\")\n",
    "    \n",
    "    # Initialize a counter for number of words in a review\n",
    "    nwords = 0\n",
    "    # Loop over each word in the comment and, if it is in the model's vocabulary, add its feature vector to the total\n",
    "    for word in words:\n",
    "        if word in index2word_set: \n",
    "            featureVec = np.add(featureVec, w2v_model[word])\n",
    "            nwords += 1.\n",
    "\n",
    "    # Divide the result by the number of words to get the average\n",
    "    if nwords > 0:\n",
    "        featureVec = np.divide(featureVec, nwords)\n",
    "    return featureVec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_resp(X, model, cut_off, valv):\n",
    "    prob = model.predict_proba(X)\n",
    "    if 'Nativa' in valv:\n",
    "        if prob[:,1] > cut_off[valv]:\n",
    "            respuesta = 'No'\n",
    "        else:\n",
    "            respuesta = 'Sí'\n",
    "    else:\n",
    "        if prob[:,1] > cut_off[valv]:\n",
    "            respuesta = 'Sí'\n",
    "        else:\n",
    "            respuesta = 'No'\n",
    "    return respuesta\n",
    "\n",
    "def get_dropdown(resp, valv):\n",
    "    dropdown = widgets.Dropdown(\n",
    "        options=['Sí', 'No'],\n",
    "        value = resp,\n",
    "        layout= Layout(width='90%')\n",
    "    )\n",
    "    return dropdown\n",
    "\n",
    "cut_off = {'Válvula Aórtica Nativa' : 0.7, 'Válvula Mitral Nativa' : 0.7, \n",
    "           'Estenosis Aórtica' : 0.9, 'Estenosis Mitral' : 0.7, \n",
    "           'Insuficiencia Aórtica' : 0.7, 'Insuficiencia Mitral' : 0.7}\n",
    "\n",
    "models = {'Válvula Aórtica Nativa' : model_prot_Ao, 'Válvula Mitral Nativa' : model_prot_Mv, \n",
    "          'Estenosis Aórtica' : model_est_Ao, 'Estenosis Mitral' : model_est_Mv, \n",
    "          'Insuficiencia Aórtica' : model_insf_Ao, 'Insuficiencia Mitral' : model_insf_Mv}\n",
    "\n",
    "dropdowns = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def callback(w):\n",
    "    with output:\n",
    "        clear_output()\n",
    "        try:\n",
    "            txt = tokenize_and_lemm_spacy_lst(clean_text(conclusion.value))\n",
    "            w2v_features = list(map(lambda sen_group: get_w2v_features(W2V, sen_group), [txt]))\n",
    "            X_w2v = np.array(list(map(np.array, w2v_features)))\n",
    "            for valv in cut_off.keys():\n",
    "                dropdowns[valv] = get_dropdown(get_resp(X_w2v, models[valv],cut_off, valv),valv)\n",
    "            \n",
    "            cols = [(valv, [dropdowns[valv]]) for i, valv in enumerate(dropdowns.keys())]\n",
    "        \n",
    "            vboxes = []\n",
    "            for valv, data in cols:\n",
    "                vboxes.append(widgets.VBox([widgets.HTML('<b>%s</b>' % valv)] + data, layout= Layout(width='200px')))\n",
    "            \n",
    "            display(widgets.HBox(vboxes, layout=widgets.Layout(width='70%')), widgets.Button(description='Commit'))\n",
    "        except:\n",
    "            pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def callback_ej(w):\n",
    "    conclusion.value = ejemplos.sample(n=1).values[0][0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# App (Demo)\n",
    "\n",
    "[comment]: <> (<iframe src=\"https://giphy.com/embed/urZZ5RET1pfR6\" width=\"420\" height=\"300\" frameBorder=\"0\" class=\"giphy-embed\" allowFullScreen></iframe><p><a href=\"https://giphy.com/gifs/urZZ5RET1pfR6\"></a></p>)\n",
    "\n",
    "## Conclusión Estudio Ecocardiográfico\n",
    "\n",
    "El botón ejemplo genera ejemplos aleatorios de reportes ecocardiográficos."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b1a016b8b3374872bf47cdeff1ce9f3b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(Button(description='Ejemplo', style=ButtonStyle()), VBox(children=(Textarea(value='', layout=La…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "conclusion = widgets.Textarea(description='', placeholder='Type something',\n",
    "                               continuous_update=True, disabled=False,\n",
    "                               layout= Layout(width='80%', height='140px'))\n",
    "\n",
    "tab2a = widgets.VBox(children=[conclusion])\n",
    "ejemplo = widgets.Button(description='Ejemplo')\n",
    "ejemplo.on_click(callback_ej)\n",
    "submit = widgets.Button(description='Submit')\n",
    "submit.on_click(callback)\n",
    "\n",
    "output = widgets.Output()\n",
    "gui = widgets.VBox(children=[ejemplo,tab2a, submit, output])\n",
    "gui"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Environment (conda_nlp)",
   "language": "python",
   "name": "conda_nlp"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}