{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2022-10-22T21:58:13.927758Z", "iopub.status.busy": "2022-10-22T21:58:13.927382Z", "iopub.status.idle": "2022-10-22T21:58:16.109866Z", "shell.execute_reply": "2022-10-22T21:58:16.108310Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: tokenizer in /usr/local/lib/python3.9/dist-packages (3.4.2)\n", "Collecting datasets\n", " Downloading datasets-2.7.1-py3-none-any.whl (451 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m451.7/451.7 KB\u001B[0m \u001B[31m34.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hRequirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (0.1.97)\n", "Requirement already satisfied: protobuf==3.20.0 in /usr/local/lib/python3.9/dist-packages (3.20.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from datasets) (1.23.5)\n", "Collecting fsspec[http]>=2021.11.1\n", " Downloading fsspec-2022.11.0-py3-none-any.whl (139 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m139.5/139.5 KB\u001B[0m \u001B[31m38.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hRequirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.9/dist-packages (from datasets) (4.64.1)\n", "Collecting xxhash\n", " Downloading xxhash-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m212.0/212.0 KB\u001B[0m \u001B[31m46.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hCollecting multiprocess\n", " Downloading multiprocess-0.70.14-py39-none-any.whl (132 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m132.9/132.9 KB\u001B[0m \u001B[31m37.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hCollecting pyarrow>=6.0.0\n", " Downloading pyarrow-10.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.9 MB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m35.9/35.9 MB\u001B[0m \u001B[31m71.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m00:01\u001B[0m00:01\u001B[0m\n", "\u001B[?25hCollecting pandas\n", " Downloading pandas-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.2 MB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m12.2/12.2 MB\u001B[0m \u001B[31m127.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m00:01\u001B[0m00:01\u001B[0m\n", "\u001B[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from datasets) (6.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from datasets) (22.0)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.2.0 in /usr/local/lib/python3.9/dist-packages (from datasets) (0.11.1)\n", "Collecting aiohttp\n", " Downloading aiohttp-3.8.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.0/1.0 MB\u001B[0m \u001B[31m84.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hCollecting dill<0.3.7\n", " Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m110.5/110.5 KB\u001B[0m \u001B[31m27.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hRequirement already satisfied: requests>=2.19.0 in /usr/lib/python3/dist-packages (from datasets) (2.22.0)\n", "Collecting responses<0.19\n", " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", "Collecting aiosignal>=1.1.2\n", " Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", "Collecting yarl<2.0,>=1.0\n", " Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m264.6/264.6 KB\u001B[0m \u001B[31m62.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hCollecting frozenlist>=1.1.1\n", " Downloading frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (158 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m158.8/158.8 KB\u001B[0m \u001B[31m31.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets) (22.1.0)\n", "Collecting async-timeout<5.0,>=4.0.0a3\n", " Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", "Collecting charset-normalizer<3.0,>=2.0\n", " Downloading charset_normalizer-2.1.1-py3-none-any.whl (39 kB)\n", "Collecting multidict<7.0,>=4.5\n", " Downloading multidict-6.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m114.2/114.2 KB\u001B[0m \u001B[31m28.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hRequirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (4.4.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (3.8.2)\n", "Collecting urllib3>=1.25.10\n", " Downloading urllib3-1.26.13-py2.py3-none-any.whl (140 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m140.6/140.6 KB\u001B[0m \u001B[31m21.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->datasets) (2.8.2)\n", "Collecting pytz>=2020.1\n", " Downloading pytz-2022.6-py2.py3-none-any.whl (498 kB)\n", "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m498.1/498.1 KB\u001B[0m \u001B[31m55.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", "\u001B[?25hRequirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.14.0)\n", "Requirement already satisfied: idna>=2.0 in /usr/lib/python3/dist-packages (from yarl<2.0,>=1.0->aiohttp->datasets) (2.8)\n", "Installing collected packages: pytz, xxhash, urllib3, pyarrow, multidict, fsspec, frozenlist, dill, charset-normalizer, async-timeout, yarl, responses, pandas, multiprocess, aiosignal, aiohttp, datasets\n", " Attempting uninstall: urllib3\n", " Found existing installation: urllib3 1.25.8\n", " Uninstalling urllib3-1.25.8:\n", " Successfully uninstalled urllib3-1.25.8\n", "Successfully installed aiohttp-3.8.3 aiosignal-1.3.1 async-timeout-4.0.2 charset-normalizer-2.1.1 datasets-2.7.1 dill-0.3.6 frozenlist-1.3.3 fsspec-2022.11.0 multidict-6.0.3 multiprocess-0.70.14 pandas-1.5.2 pyarrow-10.0.1 pytz-2022.6 responses-0.18.0 urllib3-1.26.13 xxhash-3.1.0 yarl-1.8.2\n", "\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\u001B[33m\n", "\u001B[0m\u001B[33mWARNING: You are using pip version 22.0.4; however, version 22.3.1 is available.\n", "You should consider upgrading via the '/usr/bin/python3.9 -m pip install --upgrade pip' command.\u001B[0m\u001B[33m\n", "\u001B[0mThu Dec 15 13:32:01 2022 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 470.57.02 Driver Version: 470.57.02 CUDA Version: 11.6 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 NVIDIA A10G On | 00000000:00:1E.0 Off | 0 |\n", "| 0% 29C P0 69W / 300W | 2154MiB / 22731MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "+-----------------------------------------------------------------------------+\n" ] } ], "source": [ "! pip install tokenizer datasets sentencepiece protobuf==3.20.0\n", "! nvidia-smi" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2022-10-22T21:58:16.116662Z", "iopub.status.busy": "2022-10-22T21:58:16.116143Z", "iopub.status.idle": "2022-10-22T21:58:17.279364Z", "shell.execute_reply": "2022-10-22T21:58:17.278660Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.13) or chardet (3.0.4) doesn't match a supported version!\n", " warnings.warn(\"urllib3 ({}) or chardet ({}) doesn't match a supported \"\n" ] } ], "source": [ "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n", "from datasets import load_dataset\n", "import time\n", "import torch" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2022-10-22T21:58:17.284240Z", "iopub.status.busy": "2022-10-22T21:58:17.283941Z", "iopub.status.idle": "2022-10-22T21:58:24.270745Z", "shell.execute_reply": "2022-10-22T21:58:24.269721Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fd37851a08234e43b091d332a8ab7348", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/882 [00:00