{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Install"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "conda install pytorch torchvision -c soumith"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-19T11:41:19.018807",
     "start_time": "2017-07-19T11:41:18.554327"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tutorial"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "http://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html\n",
    "\n",
    "http://pytorch.org/tutorials/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-19T11:41:38.380807",
     "start_time": "2017-07-19T11:41:38.375868"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "1.00000e-44 *\n",
      "  0.0000  0.0000  0.0000\n",
      "  0.0000  1.6816  0.0000\n",
      "  0.0000  0.0000  0.0000\n",
      "  0.0000  0.0000  0.0000\n",
      "  0.0000  0.0000  0.0000\n",
      "[torch.FloatTensor of size 5x3]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "x = torch.Tensor(5, 3)\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-19T11:50:33.223870",
     "start_time": "2017-07-19T11:50:33.219677"
    }
   },
   "source": [
    "# nn module\n",
    "\n",
    "http://pytorch.org/tutorials/beginner/pytorch_with_examples.html#nn-module"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-07-19T11:49:26.062175",
     "start_time": "2017-07-19T11:49:25.556717"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0, 717.2719116210938)\n",
      "(50, 35.097198486328125)\n",
      "(100, 1.8821511268615723)\n",
      "(150, 0.1728428155183792)\n",
      "(200, 0.02194761298596859)\n",
      "(250, 0.0034840735606849194)\n",
      "(300, 0.0006572074489668012)\n",
      "(350, 0.00014404028479475528)\n",
      "(400, 3.580378324841149e-05)\n",
      "(450, 9.810625670070294e-06)\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from torch.autograd import Variable\n",
    "\n",
    "# N is batch size; D_in is input dimension;\n",
    "# H is hidden dimension; D_out is output dimension.\n",
    "N, D_in, H, D_out = 64, 1000, 100, 10\n",
    "\n",
    "# Create random Tensors to hold inputs and outputs, and wrap them in Variables.\n",
    "x = Variable(torch.randn(N, D_in))\n",
    "y = Variable(torch.randn(N, D_out), requires_grad=False)\n",
    "\n",
    "# Use the nn package to define our model as a sequence of layers. nn.Sequential\n",
    "# is a Module which contains other Modules, and applies them in sequence to\n",
    "# produce its output. Each Linear Module computes output from input using a\n",
    "# linear function, and holds internal Variables for its weight and bias.\n",
    "model = torch.nn.Sequential(\n",
    "    torch.nn.Linear(D_in, H),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(H, D_out),\n",
    ")\n",
    "\n",
    "# The nn package also contains definitions of popular loss functions; in this\n",
    "# case we will use Mean Squared Error (MSE) as our loss function.\n",
    "loss_fn = torch.nn.MSELoss(size_average=False)\n",
    "\n",
    "learning_rate = 1e-4\n",
    "for t in range(500):\n",
    "    # Forward pass: compute predicted y by passing x to the model. Module objects\n",
    "    # override the __call__ operator so you can call them like functions. When\n",
    "    # doing so you pass a Variable of input data to the Module and it produces\n",
    "    # a Variable of output data.\n",
    "    y_pred = model(x)\n",
    "\n",
    "    # Compute and print loss. We pass Variables containing the predicted and true\n",
    "    # values of y, and the loss function returns a Variable containing the\n",
    "    # loss.\n",
    "    loss = loss_fn(y_pred, y)\n",
    "    if t%50 == 0:\n",
    "        print(t, loss.data[0])\n",
    "\n",
    "    # Zero the gradients before running the backward pass.\n",
    "    model.zero_grad()\n",
    "\n",
    "    # Backward pass: compute gradient of the loss with respect to all the learnable\n",
    "    # parameters of the model. Internally, the parameters of each Module are stored\n",
    "    # in Variables with requires_grad=True, so this call will compute gradients for\n",
    "    # all learnable parameters in the model.\n",
    "    loss.backward()\n",
    "\n",
    "    # Update the weights using gradient descent. Each parameter is a Variable, so\n",
    "    # we can access its data and gradients like we did before.\n",
    "    for param in model.parameters():\n",
    "        param.data -= learning_rate * param.grad.data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "https://github.com/huggingface/pytorch-transformers"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pytorch transformers\n",
    "\n",
    "A library of state-of-the-art pretrained models for Natural Language Processing (NLP) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-24T03:21:58.231454Z",
     "start_time": "2019-07-24T03:21:31.477599Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting pytorch-transformers\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/40/b5/2d78e74001af0152ee61d5ad4e290aec9a1e43925b21df2dc74ec100f1ab/pytorch_transformers-1.0.0-py3-none-any.whl (137kB)\n",
      "\u001b[K    100% |████████████████████████████████| 143kB 488kB/s ta 0:00:01\n",
      "\u001b[?25hCollecting sentencepiece (from pytorch-transformers)\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/99/8c/ca2c3ab61848526e85146aef40bfb7b399c7e70b1686a43b82d44cf1690f/sentencepiece-0.1.82-cp37-cp37m-macosx_10_6_x86_64.whl (1.1MB)\n",
      "\u001b[K    100% |████████████████████████████████| 1.1MB 11.9MB/s ta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: torch>=0.4.1 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (1.1.0)\n",
      "Requirement already satisfied: numpy in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (1.16.2)\n",
      "Requirement already satisfied: tqdm in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (4.31.1)\n",
      "Collecting boto3 (from pytorch-transformers)\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/39/82/608bb4a689dc543d09555e70ffc0e180bd72df76d53b68bf8891d7cbba91/boto3-1.9.194-py2.py3-none-any.whl (128kB)\n",
      "\u001b[K    100% |████████████████████████████████| 133kB 15.4MB/s ta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: requests in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (2.21.0)\n",
      "Collecting regex (from pytorch-transformers)\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/6f/4e/1b178c38c9a1a184288f72065a65ca01f3154df43c6ad898624149b8b4e0/regex-2019.06.08.tar.gz (651kB)\n",
      "\u001b[K    100% |████████████████████████████████| 655kB 14.1MB/s ta 0:00:01\n",
      "\u001b[?25hCollecting jmespath<1.0.0,>=0.7.1 (from boto3->pytorch-transformers)\n",
      "  Downloading https://files.pythonhosted.org/packages/83/94/7179c3832a6d45b266ddb2aac329e101367fbdb11f425f13771d27f225bb/jmespath-0.9.4-py2.py3-none-any.whl\n",
      "Collecting botocore<1.13.0,>=1.12.194 (from boto3->pytorch-transformers)\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/7b/9f/f7206b658d764f1258bd8af056c71fd0d9792973f88c3045ab2faefd2362/botocore-1.12.194-py2.py3-none-any.whl (5.6MB)\n",
      "\u001b[K    100% |████████████████████████████████| 5.6MB 5.3MB/s eta 0:00:01\n",
      "\u001b[?25hCollecting s3transfer<0.3.0,>=0.2.0 (from boto3->pytorch-transformers)\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/16/8a/1fc3dba0c4923c2a76e1ff0d52b305c44606da63f718d14d3231e21c51b0/s3transfer-0.2.1-py2.py3-none-any.whl (70kB)\n",
      "\u001b[K    100% |████████████████████████████████| 71kB 17.4MB/s ta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: urllib3<1.25,>=1.21.1 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (1.24.1)\n",
      "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (3.0.4)\n",
      "Requirement already satisfied: idna<2.9,>=2.5 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (2.8)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (2019.3.9)\n",
      "Requirement already satisfied: python-dateutil<3.0.0,>=2.1; python_version >= \"2.7\" in /Users/datalab/anaconda3/lib/python3.7/site-packages (from botocore<1.13.0,>=1.12.194->boto3->pytorch-transformers) (2.8.0)\n",
      "Requirement already satisfied: docutils<0.15,>=0.10 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from botocore<1.13.0,>=1.12.194->boto3->pytorch-transformers) (0.14)\n",
      "Requirement already satisfied: six>=1.5 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from python-dateutil<3.0.0,>=2.1; python_version >= \"2.7\"->botocore<1.13.0,>=1.12.194->boto3->pytorch-transformers) (1.12.0)\n",
      "Building wheels for collected packages: regex\n",
      "  Building wheel for regex (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Stored in directory: /Users/datalab/Library/Caches/pip/wheels/35/e4/80/abf3b33ba89cf65cd262af8a22a5a999cc28fbfabea6b38473\n",
      "Successfully built regex\n",
      "Installing collected packages: sentencepiece, jmespath, botocore, s3transfer, boto3, regex, pytorch-transformers\n",
      "Successfully installed boto3-1.9.194 botocore-1.12.194 jmespath-0.9.4 pytorch-transformers-1.0.0 regex-2019.6.8 s3transfer-0.2.1 sentencepiece-0.1.82\n"
     ]
    }
   ],
   "source": [
    "!pip install pytorch-transformers\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-25T07:55:16.859028Z",
     "start_time": "2019-07-25T07:55:16.353340Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "from pytorch_transformers import *\n",
    "\n",
    "# PyTorch-Transformers has a unified API\n",
    "# for 6 transformer architectures and 27 pretrained weights.\n",
    "#          Model          | Tokenizer          | Pretrained weights shortcut\n",
    "MODELS = [(BertModel,       BertTokenizer,      'bert-base-uncased'),\n",
    "          (OpenAIGPTModel,  OpenAIGPTTokenizer, 'openai-gpt'),\n",
    "          (GPT2Model,       GPT2Tokenizer,      'gpt2'),\n",
    "          (TransfoXLModel,  TransfoXLTokenizer, 'transfo-xl-wt103'),\n",
    "          (XLNetModel,      XLNetTokenizer,     'xlnet-base-cased'),\n",
    "          (XLMModel,        XLMTokenizer,       'xlm-mlm-enfr-1024')]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Quickstart"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-30T06:53:12.542237Z",
     "start_time": "2019-07-30T06:53:11.205291Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/datalab/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from pytorch_transformers import BertTokenizer, BertModel, BertForMaskedLM\n",
    "\n",
    "# OPTIONAL: if you want to have more information on what's happening under the hood, activate the logger as follows\n",
    "import logging\n",
    "logging.basicConfig(level=logging.INFO)\n",
    "\n",
    "# Load pre-trained model tokenizer (vocabulary)\n",
    "tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')\n",
    "\n",
    "# Tokenize input\n",
    "text = \"[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]\"\n",
    "tokenized_text = tokenizer.tokenize(text)\n",
    "\n",
    "# Mask a token that we will try to predict back with `BertForMaskedLM`\n",
    "masked_index = 8\n",
    "tokenized_text[masked_index] = '[MASK]'\n",
    "assert tokenized_text == ['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', '[MASK]', 'was', 'a', 'puppet', '##eer', '[SEP]']\n",
    "\n",
    "# Convert token to vocabulary indices\n",
    "indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)\n",
    "# Define sentence A and B indices associated to 1st and 2nd sentences (see paper)\n",
    "segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]\n",
    "\n",
    "# Convert inputs to PyTorch tensors\n",
    "tokens_tensor = torch.tensor([indexed_tokens])\n",
    "segments_tensors = torch.tensor([segments_ids])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-30T06:53:15.779534Z",
     "start_time": "2019-07-30T06:53:15.769669Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['[CLS]',\n",
       " 'who',\n",
       " 'was',\n",
       " 'jim',\n",
       " 'henson',\n",
       " '?',\n",
       " '[SEP]',\n",
       " 'jim',\n",
       " '[MASK]',\n",
       " 'was',\n",
       " 'a',\n",
       " 'puppet',\n",
       " '##eer',\n",
       " '[SEP]']"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenized_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-30T06:59:13.241708Z",
     "start_time": "2019-07-30T06:59:13.236345Z"
    }
   },
   "outputs": [],
   "source": [
    "?BertModel.from_pretrained"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-30T07:01:57.930107Z",
     "start_time": "2019-07-30T07:01:57.423263Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:pytorch_transformers.modeling_utils:loading configuration file /Users/datalab/bigdata/bert-base-uncased.bin\n"
     ]
    },
    {
     "ename": "UnicodeDecodeError",
     "evalue": "'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mUnicodeDecodeError\u001b[0m                        Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-13-f9dd09faa64c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBertModel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mu\"/Users/datalab/bigdata/bert-base-uncased.bin\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m    360\u001b[0m         \u001b[0;31m# Load config\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    361\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 362\u001b[0;31m             \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig_class\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpretrained_model_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    364\u001b[0m         \u001b[0;31m# Load model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *input, **kwargs)\u001b[0m\n\u001b[1;32m    137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    138\u001b[0m         \u001b[0;31m# Load config\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m         \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_json_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresolved_config_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    141\u001b[0m         \u001b[0;31m# Update config with kwargs if needed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_json_file\u001b[0;34m(cls, json_file)\u001b[0m\n\u001b[1;32m    163\u001b[0m         \u001b[0;34m\"\"\"Constructs a `BertConfig` from a json file of parameters.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    164\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 165\u001b[0;31m             \u001b[0mtext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    166\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, input, final)\u001b[0m\n\u001b[1;32m    320\u001b[0m         \u001b[0;31m# decode input (taking the buffer into account)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    321\u001b[0m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m         \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    323\u001b[0m         \u001b[0;31m# keep undecoded input until the next call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    324\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte"
     ]
    }
   ],
   "source": [
    "model = BertModel.from_pretrained(u\"/Users/datalab/bigdata/bert-base-uncased.bin\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-30T06:53:27.103659Z",
     "start_time": "2019-07-30T06:53:26.228049Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:pytorch_transformers.modeling_utils:loading configuration file /Users/datalab/bigdata/bert-base-uncased-pytorch_model.bin\n"
     ]
    },
    {
     "ename": "UnicodeDecodeError",
     "evalue": "'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mUnicodeDecodeError\u001b[0m                        Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-3-a62f7f60e32b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Load pre-trained model (weights)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBertModel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/Users/datalab/bigdata/bert-base-uncased-pytorch_model.bin'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m# Set the model in evaluation mode to desactivate the DropOut modules\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;31m# This is IMPORTANT to have reproductible results during evaluation!\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m    360\u001b[0m         \u001b[0;31m# Load config\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    361\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 362\u001b[0;31m             \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig_class\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpretrained_model_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    364\u001b[0m         \u001b[0;31m# Load model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *input, **kwargs)\u001b[0m\n\u001b[1;32m    137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    138\u001b[0m         \u001b[0;31m# Load config\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m         \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_json_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresolved_config_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    141\u001b[0m         \u001b[0;31m# Update config with kwargs if needed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_json_file\u001b[0;34m(cls, json_file)\u001b[0m\n\u001b[1;32m    163\u001b[0m         \u001b[0;34m\"\"\"Constructs a `BertConfig` from a json file of parameters.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    164\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 165\u001b[0;31m             \u001b[0mtext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    166\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, input, final)\u001b[0m\n\u001b[1;32m    320\u001b[0m         \u001b[0;31m# decode input (taking the buffer into account)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    321\u001b[0m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m         \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    323\u001b[0m         \u001b[0;31m# keep undecoded input until the next call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    324\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte"
     ]
    }
   ],
   "source": [
    "# Load pre-trained model (weights)\n",
    "model = BertModel.from_pretrained('/Users/datalab/bigdata/bert-base-uncased-pytorch_model.bin')\n",
    "\n",
    "# Set the model in evaluation mode to desactivate the DropOut modules\n",
    "# This is IMPORTANT to have reproductible results during evaluation!\n",
    "model.eval()\n",
    "\n",
    "# If you have a GPU, put everything on cuda\n",
    "# tokens_tensor = tokens_tensor.to('cuda')\n",
    "# segments_tensors = segments_tensors.to('cuda')\n",
    "# model.to('cuda') \n",
    "\n",
    "# Predict hidden states features for each layer\n",
    "with torch.no_grad():\n",
    "    # See the models docstrings for the detail of the inputs\n",
    "    outputs = model(tokens_tensor, token_type_ids=segments_tensors)\n",
    "    # PyTorch-Transformers models always output tuples.\n",
    "    # See the models docstrings for the detail of all the outputs\n",
    "    # In our case, the first element is the hidden state of the last layer of the Bert model\n",
    "    encoded_layers = outputs[0]\n",
    "# We have encoded our input sequence in a FloatTensor of shape (batch size, sequence length, model hidden dimension)\n",
    "assert tuple(encoded_layers.shape) == (1, len(indexed_tokens), model.config.hidden_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load pre-trained model (weights)\n",
    "model = BertForMaskedLM.from_pretrained('bert-base-uncased')\n",
    "model.eval()\n",
    "\n",
    "# If you have a GPU, put everything on cuda\n",
    "tokens_tensor = tokens_tensor.to('cuda')\n",
    "segments_tensors = segments_tensors.to('cuda')\n",
    "model.to('cuda')\n",
    "\n",
    "# Predict all tokens\n",
    "with torch.no_grad():\n",
    "    outputs = model(tokens_tensor, token_type_ids=segments_tensors)\n",
    "    predictions = outputs[0]\n",
    "\n",
    "# confirm we were able to predict 'henson'\n",
    "predicted_index = torch.argmax(predictions[0, masked_index]).item()\n",
    "predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]\n",
    "assert predicted_token == 'henson'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 0,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}