{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Uncomment and run this cell if you're on Colab or Kaggle\n", "# !git clone https://github.com/nlp-with-transformers/notebooks.git\n", "# %cd notebooks\n", "# from install import *\n", "# install_requirements()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#hide\n", "from utils import *\n", "setup_chapter()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Hello Transformers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"transformer-timeline\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## The Encoder-Decoder Framework" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"rnn\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"enc-dec\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Attention Mechanisms" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"enc-dec-attn\" " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"attention-alignment\" " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"transformer-self-attn\" " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Transfer Learning in NLP" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"transfer-learning\" " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"ulmfit\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Hugging Face Transformers: Bridging the Gap" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## A Tour of Transformer Applications" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "text = \"\"\"Dear Amazon, last week I ordered an Optimus Prime action figure \\\n", "from your online store in Germany. Unfortunately, when I opened the package, \\\n", "I discovered to my horror that I had been sent an action figure of Megatron \\\n", "instead! As a lifelong enemy of the Decepticons, I hope you can understand my \\\n", "dilemma. To resolve the issue, I demand an exchange of Megatron for the \\\n", "Optimus Prime figure I ordered. Enclosed are copies of my records concerning \\\n", "this purchase. I expect to hear from you soon. Sincerely, Bumblebee.\"\"\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Text Classification" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#hide_output\n", "from transformers import pipeline\n", "\n", "classifier = pipeline(\"text-classification\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelscore
0NEGATIVE0.901546
\n", "
" ], "text/plain": [ " label score\n", "0 NEGATIVE 0.901546" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "outputs = classifier(text)\n", "pd.DataFrame(outputs) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Named Entity Recognition" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entity_groupscorewordstartend
0ORG0.879010Amazon511
1MISC0.990859Optimus Prime3649
2LOC0.999755Germany9097
3MISC0.556569Mega208212
4PER0.590256##tron212216
5ORG0.669692Decept253259
6MISC0.498350##icons259264
7MISC0.775361Megatron350358
8MISC0.987854Optimus Prime367380
9PER0.812096Bumblebee502511
\n", "
" ], "text/plain": [ " entity_group score word start end\n", "0 ORG 0.879010 Amazon 5 11\n", "1 MISC 0.990859 Optimus Prime 36 49\n", "2 LOC 0.999755 Germany 90 97\n", "3 MISC 0.556569 Mega 208 212\n", "4 PER 0.590256 ##tron 212 216\n", "5 ORG 0.669692 Decept 253 259\n", "6 MISC 0.498350 ##icons 259 264\n", "7 MISC 0.775361 Megatron 350 358\n", "8 MISC 0.987854 Optimus Prime 367 380\n", "9 PER 0.812096 Bumblebee 502 511" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ner_tagger = pipeline(\"ner\", aggregation_strategy=\"simple\")\n", "outputs = ner_tagger(text)\n", "pd.DataFrame(outputs) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Question Answering " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
scorestartendanswer
00.631291335358an exchange of Megatron
\n", "
" ], "text/plain": [ " score start end answer\n", "0 0.631291 335 358 an exchange of Megatron" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reader = pipeline(\"question-answering\")\n", "question = \"What does the customer want?\"\n", "outputs = reader(question=question, context=text)\n", "pd.DataFrame([outputs]) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Summarization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Bumblebee ordered an Optimus Prime action figure from your online store in\n", "Germany. Unfortunately, when I opened the package, I discovered to my horror\n", "that I had been sent an action figure of Megatron instead.\n" ] } ], "source": [ "summarizer = pipeline(\"summarization\")\n", "outputs = summarizer(text, max_length=45, clean_up_tokenization_spaces=True)\n", "print(outputs[0]['summary_text'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Translation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sehr geehrter Amazon, letzte Woche habe ich eine Optimus Prime Action Figur aus\n", "Ihrem Online-Shop in Deutschland bestellt. Leider, als ich das Paket öffnete,\n", "entdeckte ich zu meinem Entsetzen, dass ich stattdessen eine Action Figur von\n", "Megatron geschickt worden war! Als lebenslanger Feind der Decepticons, Ich\n", "hoffe, Sie können mein Dilemma verstehen. Um das Problem zu lösen, Ich fordere\n", "einen Austausch von Megatron für die Optimus Prime Figur habe ich bestellt.\n", "Anbei sind Kopien meiner Aufzeichnungen über diesen Kauf. Ich erwarte, bald von\n", "Ihnen zu hören. Aufrichtig, Bumblebee.\n" ] } ], "source": [ "translator = pipeline(\"translation_en_to_de\", \n", " model=\"Helsinki-NLP/opus-mt-en-de\")\n", "outputs = translator(text, clean_up_tokenization_spaces=True, min_length=100)\n", "print(outputs[0]['translation_text'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Text Generation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#hide\n", "from transformers import set_seed\n", "set_seed(42) # Set the seed to get reproducible results" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dear Amazon, last week I ordered an Optimus Prime action figure from your online\n", "store in Germany. Unfortunately, when I opened the package, I discovered to my\n", "horror that I had been sent an action figure of Megatron instead! As a lifelong\n", "enemy of the Decepticons, I hope you can understand my dilemma. To resolve the\n", "issue, I demand an exchange of Megatron for the Optimus Prime figure I ordered.\n", "Enclosed are copies of my records concerning this purchase. I expect to hear\n", "from you soon. Sincerely, Bumblebee.\n", "\n", "Customer service response:\n", "Dear Bumblebee, I am sorry to hear that your order was mixed up. The order was\n", "completely mislabeled, which is very common in our online store, but I can\n", "appreciate it because it was my understanding from this site and our customer\n", "service of the previous day that your order was not made correct in our mind and\n", "that we are in a process of resolving this matter. We can assure you that your\n", "order\n" ] } ], "source": [ "generator = pipeline(\"text-generation\")\n", "response = \"Dear Bumblebee, I am sorry to hear that your order was mixed up.\"\n", "prompt = text + \"\\n\\nCustomer service response:\\n\" + response\n", "outputs = generator(prompt, max_length=200)\n", "print(outputs[0]['generated_text'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## The Hugging Face Ecosystem" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"ecosystem\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The Hugging Face Hub" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"hub-overview\" " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\"hub-model-card\" " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Hugging Face Tokenizers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Hugging Face Datasets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Hugging Face Accelerate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Main Challenges with Transformers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Conclusion" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 4 }