{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ¿Qué hacer cuando se produce un error?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Install the Transformers, Datasets, and Evaluate libraries to run this notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install datasets evaluate transformers[sentencepiece]\n",
    "!apt install git-lfs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You will need to setup git, adapt your email and name in the following cell."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!git config --global user.email \"you@example.com\"\n",
    "!git config --global user.name \"Your Name\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import notebook_login\n",
    "\n",
    "notebook_login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import notebook_login\n",
    "\n",
    "notebook_login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from distutils.dir_util import copy_tree\n",
    "from huggingface_hub import Repository, snapshot_download, create_repo, get_full_repo_name\n",
    "\n",
    "\n",
    "def copy_repository_template():\n",
    "    # Clona el repo y extrae la ruta local\n",
    "    template_repo_id = \"lewtun/distilbert-base-uncased-finetuned-squad-d5716d28\"\n",
    "    commit_hash = \"be3eaffc28669d7932492681cd5f3e8905e358b4\"\n",
    "    template_repo_dir = snapshot_download(template_repo_id, revision=commit_hash)\n",
    "    # Crea un repo vacío en el Hub\n",
    "    model_name = template_repo_id.split(\"/\")[1]\n",
    "    create_repo(model_name, exist_ok=True)\n",
    "    # Clona el repo vacío\n",
    "    new_repo_id = get_full_repo_name(model_name)\n",
    "    new_repo_dir = model_name\n",
    "    repo = Repository(local_dir=new_repo_dir, clone_from=new_repo_id)\n",
    "    # Copia los archivos\n",
    "    copy_tree(template_repo_dir, new_repo_dir)\n",
    "    # Envia (push) al Hub\n",
    "    repo.push_to_hub()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"\"\"\n",
       "OSError: Can't load config for 'lewtun/distillbert-base-uncased-finetuned-squad-d5716d28'. Make sure that:\n",
       "\n",
       "- 'lewtun/distillbert-base-uncased-finetuned-squad-d5716d28' is a correct model identifier listed on 'https://huggingface.co/models'\n",
       "\n",
       "- or 'lewtun/distillbert-base-uncased-finetuned-squad-d5716d28' is the correct path to a directory containing a config.json file\n",
       "\"\"\""
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from transformers import pipeline\n",
    "\n",
    "model_checkpoint = get_full_repo_name(\"distillbert-base-uncased-finetuned-squad-d5716d28\")\n",
    "reader = pipeline(\"question-answering\", model=model_checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"\"\"\n",
       "OSError: Can't load config for 'lewtun/distilbert-base-uncased-finetuned-squad-d5716d28'. Make sure that:\n",
       "\n",
       "- 'lewtun/distilbert-base-uncased-finetuned-squad-d5716d28' is a correct model identifier listed on 'https://huggingface.co/models'\n",
       "\n",
       "- or 'lewtun/distilbert-base-uncased-finetuned-squad-d5716d28' is the correct path to a directory containing a config.json file\n",
       "\"\"\""
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_checkpoint = get_full_repo_name(\"distilbert-base-uncased-finetuned-squad-d5716d28\")\n",
    "reader = pipeline(\"question-answering\", model=model_checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['.gitattributes', 'README.md', 'pytorch_model.bin', 'special_tokens_map.json', 'tokenizer_config.json', 'training_args.bin', 'vocab.txt']"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from huggingface_hub import list_repo_files\n",
    "\n",
    "list_repo_files(repo_id=model_checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoConfig\n",
    "\n",
    "pretrained_checkpoint = \"distilbert-base-uncased\"\n",
    "config = AutoConfig.from_pretrained(pretrained_checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "config.push_to_hub(model_checkpoint, commit_message=\"Add config.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'score': 0.38669535517692566,\n",
       " 'start': 34,\n",
       " 'end': 95,\n",
       " 'answer': 'the task of extracting an answer from a text given a question'}\n",
       " # la tarea de extraer una respuesta de un texto a una pregunta dada"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reader = pipeline(\"question-answering\", model=model_checkpoint, revision=\"main\")\n",
    "\n",
    "context = r\"\"\"\n",
    "Extractive Question Answering is the task of extracting an answer from a text\n",
    "given a question. An example of a question answering dataset is the SQuAD\n",
    "dataset, which is entirely based on that task. If you would like to fine-tune a\n",
    "model on a SQuAD task, you may leverage the\n",
    "examples/pytorch/question-answering/run_squad.py script.\n",
    "\n",
    "🤗 Transformers is interoperable with the PyTorch, TensorFlow, and JAX\n",
    "frameworks, so you can use your favourite tools for a wide variety of tasks!\n",
    "\"\"\"\n",
    "\n",
    "context_es = r\"\"\"\n",
    "La respuesta a preguntas es la extracción de una respuesta textual a partir de \n",
    "una pregunta. Un ejemplo de conjunto de datos de respuesta a preguntas es el \n",
    "dataset SQuAD, que se basa por completo en esta tarea. Si deseas afinar un modelo \n",
    "en una tarea SQuAD, puedes aprovechar el script\n",
    " examples/pytorch/question-answering/run_squad.py\n",
    "\n",
    "🤗 Transformers es interoperable con los frameworks PyTorch, TensorFlow y JAX, \n",
    "así que ¡puedes utilizar tus herramientas favoritas para una gran variedad de tareas!\n",
    "\"\"\"\n",
    "\n",
    "question = \"What is extractive question answering?\"\n",
    "# ¿Qué es la respuesta extractiva a preguntas?\n",
    "reader(question=question, context=context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = reader.tokenizer\n",
    "model = reader.model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"Which frameworks can I use?\"  # ¿Qué frameworks puedo usar?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"\"\"\n",
       "---------------------------------------------------------------------------\n",
       "AttributeError                            Traceback (most recent call last)\n",
       "/var/folders/28/k4cy5q7s2hs92xq7_h89_vgm0000gn/T/ipykernel_75743/2725838073.py in <module>\n",
       "      1 inputs = tokenizer(question, text, add_special_tokens=True)\n",
       "      2 input_ids = inputs[\"input_ids\"]\n",
       "----> 3 outputs = model(**inputs)\n",
       "      4 answer_start_scores = outputs.start_logits\n",
       "      5 answer_end_scores = outputs.end_logits\n",
       "\n",
       "~/miniconda3/envs/huggingface/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)\n",
       "   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n",
       "   1050                 or _global_forward_hooks or _global_forward_pre_hooks):\n",
       "-> 1051             return forward_call(*input, **kwargs)\n",
       "   1052         # Do not call functions when jit is used\n",
       "   1053         full_backward_hooks, non_full_backward_hooks = [], []\n",
       "\n",
       "~/miniconda3/envs/huggingface/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, start_positions, end_positions, output_attentions, output_hidden_states, return_dict)\n",
       "    723         return_dict = return_dict if return_dict is not None else self.config.use_return_dict\n",
       "    724\n",
       "--> 725         distilbert_output = self.distilbert(\n",
       "    726             input_ids=input_ids,\n",
       "    727             attention_mask=attention_mask,\n",
       "\n",
       "~/miniconda3/envs/huggingface/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)\n",
       "   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n",
       "   1050                 or _global_forward_hooks or _global_forward_pre_hooks):\n",
       "-> 1051             return forward_call(*input, **kwargs)\n",
       "   1052         # Do not call functions when jit is used\n",
       "   1053         full_backward_hooks, non_full_backward_hooks = [], []\n",
       "\n",
       "~/miniconda3/envs/huggingface/lib/python3.8/site-packages/transformers/models/distilbert/modeling_distilbert.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict)\n",
       "    471             raise ValueError(\"You cannot specify both input_ids and inputs_embeds at the same time\")\n",
       "    472         elif input_ids is not None:\n",
       "--> 473             input_shape = input_ids.size()\n",
       "    474         elif inputs_embeds is not None:\n",
       "    475             input_shape = inputs_embeds.size()[:-1]\n",
       "\n",
       "AttributeError: 'list' object has no attribute 'size'\n",
       "\"\"\""
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "inputs = tokenizer(question, context, add_special_tokens=True)\n",
    "input_ids = inputs[\"input_ids\"][0]\n",
    "outputs = model(**inputs)\n",
    "answer_start_scores = outputs.start_logits\n",
    "answer_end_scores = outputs.end_logits\n",
    "# Obtiene el comienzo más probable de la respuesta con el argmax de la puntuación\n",
    "answer_start = torch.argmax(answer_start_scores)\n",
    "# Obtiene el final más probable de la respuesta con el argmax de la puntuación\n",
    "answer_end = torch.argmax(answer_end_scores) + 1\n",
    "answer = tokenizer.convert_tokens_to_string(\n",
    "    tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])\n",
    ")\n",
    "print(f\"Question: {question}\")\n",
    "print(f\"Answer: {answer}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[101, 2029, 7705, 2015, 2064]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs[\"input_ids\"][:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(inputs[\"input_ids\"])"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "¿Qué hacer cuando se produce un error?",
   "provenance": []
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}