{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Install" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "conda install pytorch torchvision -c soumith" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Import" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T11:41:19.018807", "start_time": "2017-07-19T11:41:18.554327" }, "collapsed": true }, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Tutorial" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "http://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html\n", "\n", "http://pytorch.org/tutorials/" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T11:41:38.380807", "start_time": "2017-07-19T11:41:38.375868" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "1.00000e-44 *\n", " 0.0000 0.0000 0.0000\n", " 0.0000 1.6816 0.0000\n", " 0.0000 0.0000 0.0000\n", " 0.0000 0.0000 0.0000\n", " 0.0000 0.0000 0.0000\n", "[torch.FloatTensor of size 5x3]\n", "\n" ] } ], "source": [ "x = torch.Tensor(5, 3)\n", "print(x)" ] }, { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2017-07-19T11:50:33.223870", "start_time": "2017-07-19T11:50:33.219677" } }, "source": [ "# nn module\n", "\n", "http://pytorch.org/tutorials/beginner/pytorch_with_examples.html#nn-module" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2017-07-19T11:49:26.062175", "start_time": "2017-07-19T11:49:25.556717" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(0, 717.2719116210938)\n", "(50, 35.097198486328125)\n", "(100, 1.8821511268615723)\n", "(150, 0.1728428155183792)\n", "(200, 0.02194761298596859)\n", "(250, 0.0034840735606849194)\n", "(300, 0.0006572074489668012)\n", "(350, 0.00014404028479475528)\n", "(400, 3.580378324841149e-05)\n", "(450, 9.810625670070294e-06)\n" ] } ], "source": [ "import torch\n", "from torch.autograd import Variable\n", "\n", "# N is batch size; D_in is input dimension;\n", "# H is hidden dimension; D_out is output dimension.\n", "N, D_in, H, D_out = 64, 1000, 100, 10\n", "\n", "# Create random Tensors to hold inputs and outputs, and wrap them in Variables.\n", "x = Variable(torch.randn(N, D_in))\n", "y = Variable(torch.randn(N, D_out), requires_grad=False)\n", "\n", "# Use the nn package to define our model as a sequence of layers. nn.Sequential\n", "# is a Module which contains other Modules, and applies them in sequence to\n", "# produce its output. Each Linear Module computes output from input using a\n", "# linear function, and holds internal Variables for its weight and bias.\n", "model = torch.nn.Sequential(\n", " torch.nn.Linear(D_in, H),\n", " torch.nn.ReLU(),\n", " torch.nn.Linear(H, D_out),\n", ")\n", "\n", "# The nn package also contains definitions of popular loss functions; in this\n", "# case we will use Mean Squared Error (MSE) as our loss function.\n", "loss_fn = torch.nn.MSELoss(size_average=False)\n", "\n", "learning_rate = 1e-4\n", "for t in range(500):\n", " # Forward pass: compute predicted y by passing x to the model. Module objects\n", " # override the __call__ operator so you can call them like functions. When\n", " # doing so you pass a Variable of input data to the Module and it produces\n", " # a Variable of output data.\n", " y_pred = model(x)\n", "\n", " # Compute and print loss. We pass Variables containing the predicted and true\n", " # values of y, and the loss function returns a Variable containing the\n", " # loss.\n", " loss = loss_fn(y_pred, y)\n", " if t%50 == 0:\n", " print(t, loss.data[0])\n", "\n", " # Zero the gradients before running the backward pass.\n", " model.zero_grad()\n", "\n", " # Backward pass: compute gradient of the loss with respect to all the learnable\n", " # parameters of the model. Internally, the parameters of each Module are stored\n", " # in Variables with requires_grad=True, so this call will compute gradients for\n", " # all learnable parameters in the model.\n", " loss.backward()\n", "\n", " # Update the weights using gradient descent. Each parameter is a Variable, so\n", " # we can access its data and gradients like we did before.\n", " for param in model.parameters():\n", " param.data -= learning_rate * param.grad.data" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "https://github.com/huggingface/pytorch-transformers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Pytorch transformers\n", "\n", "A library of state-of-the-art pretrained models for Natural Language Processing (NLP) " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-07-24T03:21:58.231454Z", "start_time": "2019-07-24T03:21:31.477599Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting pytorch-transformers\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/40/b5/2d78e74001af0152ee61d5ad4e290aec9a1e43925b21df2dc74ec100f1ab/pytorch_transformers-1.0.0-py3-none-any.whl (137kB)\n", "\u001b[K 100% |████████████████████████████████| 143kB 488kB/s ta 0:00:01\n", "\u001b[?25hCollecting sentencepiece (from pytorch-transformers)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/99/8c/ca2c3ab61848526e85146aef40bfb7b399c7e70b1686a43b82d44cf1690f/sentencepiece-0.1.82-cp37-cp37m-macosx_10_6_x86_64.whl (1.1MB)\n", "\u001b[K 100% |████████████████████████████████| 1.1MB 11.9MB/s ta 0:00:01\n", "\u001b[?25hRequirement already satisfied: torch>=0.4.1 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (1.1.0)\n", "Requirement already satisfied: numpy in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (1.16.2)\n", "Requirement already satisfied: tqdm in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (4.31.1)\n", "Collecting boto3 (from pytorch-transformers)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/39/82/608bb4a689dc543d09555e70ffc0e180bd72df76d53b68bf8891d7cbba91/boto3-1.9.194-py2.py3-none-any.whl (128kB)\n", "\u001b[K 100% |████████████████████████████████| 133kB 15.4MB/s ta 0:00:01\n", "\u001b[?25hRequirement already satisfied: requests in /Users/datalab/anaconda3/lib/python3.7/site-packages (from pytorch-transformers) (2.21.0)\n", "Collecting regex (from pytorch-transformers)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/6f/4e/1b178c38c9a1a184288f72065a65ca01f3154df43c6ad898624149b8b4e0/regex-2019.06.08.tar.gz (651kB)\n", "\u001b[K 100% |████████████████████████████████| 655kB 14.1MB/s ta 0:00:01\n", "\u001b[?25hCollecting jmespath<1.0.0,>=0.7.1 (from boto3->pytorch-transformers)\n", " Downloading https://files.pythonhosted.org/packages/83/94/7179c3832a6d45b266ddb2aac329e101367fbdb11f425f13771d27f225bb/jmespath-0.9.4-py2.py3-none-any.whl\n", "Collecting botocore<1.13.0,>=1.12.194 (from boto3->pytorch-transformers)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/7b/9f/f7206b658d764f1258bd8af056c71fd0d9792973f88c3045ab2faefd2362/botocore-1.12.194-py2.py3-none-any.whl (5.6MB)\n", "\u001b[K 100% |████████████████████████████████| 5.6MB 5.3MB/s eta 0:00:01\n", "\u001b[?25hCollecting s3transfer<0.3.0,>=0.2.0 (from boto3->pytorch-transformers)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/16/8a/1fc3dba0c4923c2a76e1ff0d52b305c44606da63f718d14d3231e21c51b0/s3transfer-0.2.1-py2.py3-none-any.whl (70kB)\n", "\u001b[K 100% |████████████████████████████████| 71kB 17.4MB/s ta 0:00:01\n", "\u001b[?25hRequirement already satisfied: urllib3<1.25,>=1.21.1 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (1.24.1)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (3.0.4)\n", "Requirement already satisfied: idna<2.9,>=2.5 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (2.8)\n", "Requirement already satisfied: certifi>=2017.4.17 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from requests->pytorch-transformers) (2019.3.9)\n", "Requirement already satisfied: python-dateutil<3.0.0,>=2.1; python_version >= \"2.7\" in /Users/datalab/anaconda3/lib/python3.7/site-packages (from botocore<1.13.0,>=1.12.194->boto3->pytorch-transformers) (2.8.0)\n", "Requirement already satisfied: docutils<0.15,>=0.10 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from botocore<1.13.0,>=1.12.194->boto3->pytorch-transformers) (0.14)\n", "Requirement already satisfied: six>=1.5 in /Users/datalab/anaconda3/lib/python3.7/site-packages (from python-dateutil<3.0.0,>=2.1; python_version >= \"2.7\"->botocore<1.13.0,>=1.12.194->boto3->pytorch-transformers) (1.12.0)\n", "Building wheels for collected packages: regex\n", " Building wheel for regex (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Stored in directory: /Users/datalab/Library/Caches/pip/wheels/35/e4/80/abf3b33ba89cf65cd262af8a22a5a999cc28fbfabea6b38473\n", "Successfully built regex\n", "Installing collected packages: sentencepiece, jmespath, botocore, s3transfer, boto3, regex, pytorch-transformers\n", "Successfully installed boto3-1.9.194 botocore-1.12.194 jmespath-0.9.4 pytorch-transformers-1.0.0 regex-2019.6.8 s3transfer-0.2.1 sentencepiece-0.1.82\n" ] } ], "source": [ "!pip install pytorch-transformers\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-07-25T07:55:16.859028Z", "start_time": "2019-07-25T07:55:16.353340Z" } }, "outputs": [], "source": [ "import torch\n", "from pytorch_transformers import *\n", "\n", "# PyTorch-Transformers has a unified API\n", "# for 6 transformer architectures and 27 pretrained weights.\n", "# Model | Tokenizer | Pretrained weights shortcut\n", "MODELS = [(BertModel, BertTokenizer, 'bert-base-uncased'),\n", " (OpenAIGPTModel, OpenAIGPTTokenizer, 'openai-gpt'),\n", " (GPT2Model, GPT2Tokenizer, 'gpt2'),\n", " (TransfoXLModel, TransfoXLTokenizer, 'transfo-xl-wt103'),\n", " (XLNetModel, XLNetTokenizer, 'xlnet-base-cased'),\n", " (XLMModel, XLMTokenizer, 'xlm-mlm-enfr-1024')]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Quickstart" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-07-30T06:53:12.542237Z", "start_time": "2019-07-30T06:53:11.205291Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/datalab/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084\n" ] } ], "source": [ "import torch\n", "from pytorch_transformers import BertTokenizer, BertModel, BertForMaskedLM\n", "\n", "# OPTIONAL: if you want to have more information on what's happening under the hood, activate the logger as follows\n", "import logging\n", "logging.basicConfig(level=logging.INFO)\n", "\n", "# Load pre-trained model tokenizer (vocabulary)\n", "tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')\n", "\n", "# Tokenize input\n", "text = \"[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]\"\n", "tokenized_text = tokenizer.tokenize(text)\n", "\n", "# Mask a token that we will try to predict back with `BertForMaskedLM`\n", "masked_index = 8\n", "tokenized_text[masked_index] = '[MASK]'\n", "assert tokenized_text == ['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', '[MASK]', 'was', 'a', 'puppet', '##eer', '[SEP]']\n", "\n", "# Convert token to vocabulary indices\n", "indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)\n", "# Define sentence A and B indices associated to 1st and 2nd sentences (see paper)\n", "segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]\n", "\n", "# Convert inputs to PyTorch tensors\n", "tokens_tensor = torch.tensor([indexed_tokens])\n", "segments_tensors = torch.tensor([segments_ids])" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-07-30T06:53:15.779534Z", "start_time": "2019-07-30T06:53:15.769669Z" } }, "outputs": [ { "data": { "text/plain": [ "['[CLS]',\n", " 'who',\n", " 'was',\n", " 'jim',\n", " 'henson',\n", " '?',\n", " '[SEP]',\n", " 'jim',\n", " '[MASK]',\n", " 'was',\n", " 'a',\n", " 'puppet',\n", " '##eer',\n", " '[SEP]']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenized_text" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2019-07-30T06:59:13.241708Z", "start_time": "2019-07-30T06:59:13.236345Z" } }, "outputs": [], "source": [ "?BertModel.from_pretrained" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2019-07-30T07:01:57.930107Z", "start_time": "2019-07-30T07:01:57.423263Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:pytorch_transformers.modeling_utils:loading configuration file /Users/datalab/bigdata/bert-base-uncased.bin\n" ] }, { "ename": "UnicodeDecodeError", "evalue": "'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBertModel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mu\"/Users/datalab/bigdata/bert-base-uncased.bin\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 360\u001b[0m \u001b[0;31m# Load config\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 361\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 362\u001b[0;31m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig_class\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpretrained_model_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 364\u001b[0m \u001b[0;31m# Load model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *input, **kwargs)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;31m# Load config\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_json_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresolved_config_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;31m# Update config with kwargs if needed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_json_file\u001b[0;34m(cls, json_file)\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0;34m\"\"\"Constructs a `BertConfig` from a json file of parameters.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 165\u001b[0;31m \u001b[0mtext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 166\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, input, final)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;31m# decode input (taking the buffer into account)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 323\u001b[0m \u001b[0;31m# keep undecoded input until the next call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte" ] } ], "source": [ "model = BertModel.from_pretrained(u\"/Users/datalab/bigdata/bert-base-uncased.bin\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-07-30T06:53:27.103659Z", "start_time": "2019-07-30T06:53:26.228049Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:pytorch_transformers.modeling_utils:loading configuration file /Users/datalab/bigdata/bert-base-uncased-pytorch_model.bin\n" ] }, { "ename": "UnicodeDecodeError", "evalue": "'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Load pre-trained model (weights)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBertModel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/Users/datalab/bigdata/bert-base-uncased-pytorch_model.bin'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# Set the model in evaluation mode to desactivate the DropOut modules\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# This is IMPORTANT to have reproductible results during evaluation!\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 360\u001b[0m \u001b[0;31m# Load config\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 361\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 362\u001b[0;31m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig_class\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpretrained_model_name_or_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 364\u001b[0m \u001b[0;31m# Load model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *input, **kwargs)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;31m# Load config\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_json_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresolved_config_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;31m# Update config with kwargs if needed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pytorch_transformers/modeling_utils.py\u001b[0m in \u001b[0;36mfrom_json_file\u001b[0;34m(cls, json_file)\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0;34m\"\"\"Constructs a `BertConfig` from a json file of parameters.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 165\u001b[0;31m \u001b[0mtext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 166\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, input, final)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;31m# decode input (taking the buffer into account)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 323\u001b[0m \u001b[0;31m# keep undecoded input until the next call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x80 in position 0: invalid start byte" ] } ], "source": [ "# Load pre-trained model (weights)\n", "model = BertModel.from_pretrained('/Users/datalab/bigdata/bert-base-uncased-pytorch_model.bin')\n", "\n", "# Set the model in evaluation mode to desactivate the DropOut modules\n", "# This is IMPORTANT to have reproductible results during evaluation!\n", "model.eval()\n", "\n", "# If you have a GPU, put everything on cuda\n", "# tokens_tensor = tokens_tensor.to('cuda')\n", "# segments_tensors = segments_tensors.to('cuda')\n", "# model.to('cuda') \n", "\n", "# Predict hidden states features for each layer\n", "with torch.no_grad():\n", " # See the models docstrings for the detail of the inputs\n", " outputs = model(tokens_tensor, token_type_ids=segments_tensors)\n", " # PyTorch-Transformers models always output tuples.\n", " # See the models docstrings for the detail of all the outputs\n", " # In our case, the first element is the hidden state of the last layer of the Bert model\n", " encoded_layers = outputs[0]\n", "# We have encoded our input sequence in a FloatTensor of shape (batch size, sequence length, model hidden dimension)\n", "assert tuple(encoded_layers.shape) == (1, len(indexed_tokens), model.config.hidden_size)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load pre-trained model (weights)\n", "model = BertForMaskedLM.from_pretrained('bert-base-uncased')\n", "model.eval()\n", "\n", "# If you have a GPU, put everything on cuda\n", "tokens_tensor = tokens_tensor.to('cuda')\n", "segments_tensors = segments_tensors.to('cuda')\n", "model.to('cuda')\n", "\n", "# Predict all tokens\n", "with torch.no_grad():\n", " outputs = model(tokens_tensor, token_type_ids=segments_tensors)\n", " predictions = outputs[0]\n", "\n", "# confirm we were able to predict 'henson'\n", "predicted_index = torch.argmax(predictions[0, masked_index]).item()\n", "predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]\n", "assert predicted_token == 'henson'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 0, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 1 }