{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook regroups the code sample of the video below, which is a part of the [Hugging Face course](https://huggingface.co/course)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form" }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#@title\n", "from IPython.display import HTML\n", "\n", "HTML('')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Install the Transformers and Datasets libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! pip install datasets transformers[sentencepiece]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "import torch\n", "\n", "model = AutoModelForCausalLM.from_pretrained(\"gpt2\")\n", "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n", "\n", "inputs = tokenizer(\"Hugging Face is a startup based in New York City and Paris\",\n", " return_tensors=\"pt\")\n", "\n", "loss = model(input_ids=inputs[\"input_ids\"],\n", " labels=inputs[\"input_ids\"]).loss\n", "\n", "ppl = torch.exp(loss)\n", "\n", "print(f\"Perplexity: {ppl.item():.2f}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "What is perplexity?", "provenance": [] } }, "nbformat": 4, "nbformat_minor": 4 }