{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Behind the pipeline (PyTorch)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Install the Transformers, Datasets, and Evaluate libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install datasets evaluate transformers[sentencepiece]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'label': 'POSITIVE', 'score': 0.9598047137260437},\n", " {'label': 'NEGATIVE', 'score': 0.9994558095932007}]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import pipeline\n", "\n", "classifier = pipeline(\"sentiment-analysis\")\n", "classifier(\n", " [\n", " \"I've been waiting for a HuggingFace course my whole life.\",\n", " \"I hate this so much!\",\n", " ]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoTokenizer\n", "\n", "checkpoint = \"distilbert-base-uncased-finetuned-sst-2-english\"\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{\n", " 'input_ids': tensor([\n", " [ 101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102],\n", " [ 101, 1045, 5223, 2023, 2061, 2172, 999, 102, 0, 0, 0, 0, 0, 0, 0, 0]\n", " ]), \n", " 'attention_mask': tensor([\n", " [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n", " [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]\n", " ])\n", "}" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_inputs = [\n", " \"I've been waiting for a HuggingFace course my whole life.\",\n", " \"I hate this so much!\",\n", "]\n", "inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors=\"pt\")\n", "print(inputs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoModel\n", "\n", "checkpoint = \"distilbert-base-uncased-finetuned-sst-2-english\"\n", "model = AutoModel.from_pretrained(checkpoint)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([2, 16, 768])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "outputs = model(**inputs)\n", "print(outputs.last_hidden_state.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoModelForSequenceClassification\n", "\n", "checkpoint = \"distilbert-base-uncased-finetuned-sst-2-english\"\n", "model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n", "outputs = model(**inputs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([2, 2])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(outputs.logits.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-1.5607, 1.6123],\n", " [ 4.1692, -3.3464]], grad_fn=)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(outputs.logits)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[4.0195e-02, 9.5980e-01],\n", " [9.9946e-01, 5.4418e-04]], grad_fn=)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "\n", "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n", "print(predictions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 'NEGATIVE', 1: 'POSITIVE'}" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.config.id2label" ] } ], "metadata": { "colab": { "name": "Behind the pipeline (PyTorch)", "provenance": [] } }, "nbformat": 4, "nbformat_minor": 4 }