{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2b6e79bc",
   "metadata": {},
   "source": [
    "<div style=\"width: 100%; overflow: hidden;\">\n",
    "    <div style=\"width: 150px; float: left;\"> <img src=\"https://raw.githubusercontent.com/DataForScience/Networks/master/data/D4Sci_logo_ball.png\" alt=\"Data For Science, Inc\" align=\"left\" border=\"0\" width=150px> </div>\n",
    "    <div style=\"float: left; margin-left: 10px;\"> <h1>ChatGPT And Friends</h1>\n",
    "<h1>HuggingFace BERT</h1>\n",
    "        <p>Bruno Gonçalves<br/>\n",
    "        <a href=\"http://www.data4sci.com/\">www.data4sci.com</a><br/>\n",
    "            @bgoncalves, @data4sci</p></div>\n",
    "</div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d9c10129",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "from pprint import pprint\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt \n",
    "\n",
    "from ipywidgets import interact\n",
    "\n",
    "import transformers\n",
    "from transformers import pipeline\n",
    "from transformers import set_seed\n",
    "set_seed(42) # Set the seed to get reproducible results\n",
    "\n",
    "import os\n",
    "import gzip\n",
    "\n",
    "import tqdm as tq\n",
    "from tqdm.notebook import tqdm\n",
    "tqdm.pandas()\n",
    "\n",
    "import networkx as nx\n",
    "\n",
    "import watermark\n",
    "\n",
    "%load_ext watermark\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8e9080b1",
   "metadata": {},
   "source": [
    "We start by printing out the versions of the libraries we're using for future reference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f403bf31",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Python implementation: CPython\n",
      "Python version       : 3.11.7\n",
      "IPython version      : 8.12.3\n",
      "\n",
      "Compiler    : Clang 14.0.6 \n",
      "OS          : Darwin\n",
      "Release     : 23.6.0\n",
      "Machine     : arm64\n",
      "Processor   : arm\n",
      "CPU cores   : 16\n",
      "Architecture: 64bit\n",
      "\n",
      "Git hash: b44900a26f10de8fbaf559b307e69185828b77b4\n",
      "\n",
      "watermark   : 2.4.3\n",
      "numpy       : 1.26.4\n",
      "pandas      : 2.2.3\n",
      "transformers: 4.41.1\n",
      "matplotlib  : 3.8.0\n",
      "tqdm        : 4.66.4\n",
      "networkx    : 3.3\n",
      "json        : 2.0.9\n",
      "\n"
     ]
    }
   ],
   "source": [
    "%watermark -n -v -m -g -iv"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a25b4af6",
   "metadata": {},
   "source": [
    "Load default figure style"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f20a41d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.style.use('d4sci.mplstyle')\n",
    "colors = plt.rcParams['axes.prop_cycle'].by_key()['color']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18721cee",
   "metadata": {},
   "source": [
    "# Code"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b88ce2aa",
   "metadata": {},
   "source": [
    "### Unmasking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "630bb007",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']\n",
      "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
     ]
    }
   ],
   "source": [
    "unmasker = pipeline('fill-mask', model='bert-base-uncased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "bfd9dd97",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'score': 0.31824299693107605,\n",
       "  'token': 2064,\n",
       "  'token_str': 'can',\n",
       "  'sequence': 'artificial intelligence can take over the world.'},\n",
       " {'score': 0.18299730122089386,\n",
       "  'token': 2097,\n",
       "  'token_str': 'will',\n",
       "  'sequence': 'artificial intelligence will take over the world.'},\n",
       " {'score': 0.0560012087225914,\n",
       "  'token': 2000,\n",
       "  'token_str': 'to',\n",
       "  'sequence': 'artificial intelligence to take over the world.'},\n",
       " {'score': 0.045194774866104126,\n",
       "  'token': 2015,\n",
       "  'token_str': '##s',\n",
       "  'sequence': 'artificial intelligences take over the world.'},\n",
       " {'score': 0.045152731239795685,\n",
       "  'token': 2052,\n",
       "  'token_str': 'would',\n",
       "  'sequence': 'artificial intelligence would take over the world.'}]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unmasker(\"Artificial Intelligence [MASK] take over the world.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b18f9f0e",
   "metadata": {},
   "outputs": [],
   "source": [
    "?unmasker"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "faa7638f",
   "metadata": {},
   "source": [
    "### Model Bias"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "934fe9a6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'score': 0.09747567027807236,\n",
       "  'token': 10533,\n",
       "  'token_str': 'carpenter',\n",
       "  'sequence': 'the man worked as a carpenter.'},\n",
       " {'score': 0.05238327011466026,\n",
       "  'token': 15610,\n",
       "  'token_str': 'waiter',\n",
       "  'sequence': 'the man worked as a waiter.'},\n",
       " {'score': 0.04962737113237381,\n",
       "  'token': 13362,\n",
       "  'token_str': 'barber',\n",
       "  'sequence': 'the man worked as a barber.'},\n",
       " {'score': 0.03788601979613304,\n",
       "  'token': 15893,\n",
       "  'token_str': 'mechanic',\n",
       "  'sequence': 'the man worked as a mechanic.'},\n",
       " {'score': 0.037680596113204956,\n",
       "  'token': 18968,\n",
       "  'token_str': 'salesman',\n",
       "  'sequence': 'the man worked as a salesman.'}]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unmasker(\"The man worked as a [MASK].\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "64c7a87a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'score': 0.21981653571128845,\n",
       "  'token': 6821,\n",
       "  'token_str': 'nurse',\n",
       "  'sequence': 'the woman worked as a nurse.'},\n",
       " {'score': 0.1597415953874588,\n",
       "  'token': 13877,\n",
       "  'token_str': 'waitress',\n",
       "  'sequence': 'the woman worked as a waitress.'},\n",
       " {'score': 0.11547262966632843,\n",
       "  'token': 10850,\n",
       "  'token_str': 'maid',\n",
       "  'sequence': 'the woman worked as a maid.'},\n",
       " {'score': 0.03796852380037308,\n",
       "  'token': 19215,\n",
       "  'token_str': 'prostitute',\n",
       "  'sequence': 'the woman worked as a prostitute.'},\n",
       " {'score': 0.030423782765865326,\n",
       "  'token': 5660,\n",
       "  'token_str': 'cook',\n",
       "  'sequence': 'the woman worked as a cook.'}]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unmasker(\"The woman worked as a [MASK].\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dd933eee",
   "metadata": {},
   "source": [
    "### Named Entity Recognition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "0d1b4a6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"\"\"Dear Amazon, last week I ordered an Optimus Prime action figure \\\n",
    "from your online store in Germany. Unfortunately, when I opened the package, \\\n",
    "I discovered to my horror that I had been sent an action figure of Megatron \\\n",
    "instead! As a lifelong enemy of the Decepticons, I hope you can understand my \\\n",
    "dilemma. To resolve the issue, I demand an exchange of Megatron for the \\\n",
    "Optimus Prime figure I ordered. Enclosed are copies of my records concerning \\\n",
    "this purchase. I expect to hear from you soon. Sincerely, Bumblebee.\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "28ec1272",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).\n",
      "Using a pipeline without specifying a model name and revision in production is not recommended.\n",
      "/opt/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
      "  warnings.warn(\n",
      "Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\n",
      "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
     ]
    }
   ],
   "source": [
    "ner_tagger = pipeline(\"ner\", aggregation_strategy=\"simple\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "29f3837b",
   "metadata": {},
   "outputs": [],
   "source": [
    "?ner_tagger"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "7e99987c",
   "metadata": {},
   "outputs": [],
   "source": [
    "outputs = ner_tagger(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "66fbe51d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'entity_group': 'ORG',\n",
       "  'score': 0.8790102,\n",
       "  'word': 'Amazon',\n",
       "  'start': 5,\n",
       "  'end': 11},\n",
       " {'entity_group': 'MISC',\n",
       "  'score': 0.9908588,\n",
       "  'word': 'Optimus Prime',\n",
       "  'start': 36,\n",
       "  'end': 49},\n",
       " {'entity_group': 'LOC',\n",
       "  'score': 0.9997547,\n",
       "  'word': 'Germany',\n",
       "  'start': 90,\n",
       "  'end': 97},\n",
       " {'entity_group': 'MISC',\n",
       "  'score': 0.5565716,\n",
       "  'word': 'Mega',\n",
       "  'start': 208,\n",
       "  'end': 212},\n",
       " {'entity_group': 'PER',\n",
       "  'score': 0.59025526,\n",
       "  'word': '##tron',\n",
       "  'start': 212,\n",
       "  'end': 216},\n",
       " {'entity_group': 'ORG',\n",
       "  'score': 0.66969275,\n",
       "  'word': 'Decept',\n",
       "  'start': 253,\n",
       "  'end': 259},\n",
       " {'entity_group': 'MISC',\n",
       "  'score': 0.4983484,\n",
       "  'word': '##icons',\n",
       "  'start': 259,\n",
       "  'end': 264},\n",
       " {'entity_group': 'MISC',\n",
       "  'score': 0.7753625,\n",
       "  'word': 'Megatron',\n",
       "  'start': 350,\n",
       "  'end': 358},\n",
       " {'entity_group': 'MISC',\n",
       "  'score': 0.98785394,\n",
       "  'word': 'Optimus Prime',\n",
       "  'start': 367,\n",
       "  'end': 380},\n",
       " {'entity_group': 'PER',\n",
       "  'score': 0.8120968,\n",
       "  'word': 'Bumblebee',\n",
       "  'start': 502,\n",
       "  'end': 511}]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "cdd13436",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>entity_group</th>\n",
       "      <th>score</th>\n",
       "      <th>word</th>\n",
       "      <th>start</th>\n",
       "      <th>end</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ORG</td>\n",
       "      <td>0.879010</td>\n",
       "      <td>Amazon</td>\n",
       "      <td>5</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>MISC</td>\n",
       "      <td>0.990859</td>\n",
       "      <td>Optimus Prime</td>\n",
       "      <td>36</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>LOC</td>\n",
       "      <td>0.999755</td>\n",
       "      <td>Germany</td>\n",
       "      <td>90</td>\n",
       "      <td>97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>MISC</td>\n",
       "      <td>0.556572</td>\n",
       "      <td>Mega</td>\n",
       "      <td>208</td>\n",
       "      <td>212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PER</td>\n",
       "      <td>0.590255</td>\n",
       "      <td>##tron</td>\n",
       "      <td>212</td>\n",
       "      <td>216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>ORG</td>\n",
       "      <td>0.669693</td>\n",
       "      <td>Decept</td>\n",
       "      <td>253</td>\n",
       "      <td>259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>MISC</td>\n",
       "      <td>0.498348</td>\n",
       "      <td>##icons</td>\n",
       "      <td>259</td>\n",
       "      <td>264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>MISC</td>\n",
       "      <td>0.775362</td>\n",
       "      <td>Megatron</td>\n",
       "      <td>350</td>\n",
       "      <td>358</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>MISC</td>\n",
       "      <td>0.987854</td>\n",
       "      <td>Optimus Prime</td>\n",
       "      <td>367</td>\n",
       "      <td>380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>PER</td>\n",
       "      <td>0.812097</td>\n",
       "      <td>Bumblebee</td>\n",
       "      <td>502</td>\n",
       "      <td>511</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  entity_group     score           word  start  end\n",
       "0          ORG  0.879010         Amazon      5   11\n",
       "1         MISC  0.990859  Optimus Prime     36   49\n",
       "2          LOC  0.999755        Germany     90   97\n",
       "3         MISC  0.556572           Mega    208  212\n",
       "4          PER  0.590255         ##tron    212  216\n",
       "5          ORG  0.669693         Decept    253  259\n",
       "6         MISC  0.498348        ##icons    259  264\n",
       "7         MISC  0.775362       Megatron    350  358\n",
       "8         MISC  0.987854  Optimus Prime    367  380\n",
       "9          PER  0.812097      Bumblebee    502  511"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(outputs)    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94a0f4a5",
   "metadata": {},
   "source": [
    "### Question Answering "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fb57546d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).\n",
      "Using a pipeline without specifying a model name and revision in production is not recommended.\n",
      "/opt/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "reader = pipeline(\"question-answering\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "0d533a7a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>score</th>\n",
       "      <th>start</th>\n",
       "      <th>end</th>\n",
       "      <th>answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.631292</td>\n",
       "      <td>335</td>\n",
       "      <td>358</td>\n",
       "      <td>an exchange of Megatron</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      score  start  end                   answer\n",
       "0  0.631292    335  358  an exchange of Megatron"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "question = \"What does the customer want?\"\n",
    "outputs = reader(question=question, context=text)\n",
    "pd.DataFrame([outputs])    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b4341d24",
   "metadata": {},
   "source": [
    "### Translation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "a93866a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "translator = pipeline(\"translation_en_to_it\", \n",
    "                      model=\"Helsinki-NLP/opus-mt-en-it\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "1b9c9ff0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cara Amazon, la scorsa settimana ho ordinato una figura d'azione Optimus Prime dal tuo negozio online in Germania. Purtroppo, quando ho aperto il pacchetto, ho scoperto al mio orrore che ero stato inviato una figura d'azione di Megatron invece! Come un nemico per tutta la vita dei Decepticon, spero che si può capire il mio dilemma. Per risolvere il problema, chiedo uno scambio di Megatron per la figura di Optimus Prime ho ordinato. In allegato sono copie dei miei record riguardanti questo acquisto. Mi aspetto di sentire da voi presto. Cordialmente, Bumblebee.\n"
     ]
    }
   ],
   "source": [
    "outputs = translator(text, clean_up_tokenization_spaces=True, min_length=100)\n",
    "print(outputs[0]['translation_text'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3711dc05",
   "metadata": {},
   "source": [
    "Caro Amazon, la settimana scorsa ho ordinato un action figure di Optimus Prime dal tuo negozio online in Germania. Sfortunatamente, quando ho aperto il pacco, ho scoperto con orrore che mi era stata invece inviata una action figure di Megatron! Essendo un nemico da sempre dei Decepticon, spero che tu possa capire il mio dilemma. Per risolvere il problema, chiedo uno scambio di Megatron con la figura di Optimus Prime che ho ordinato. In allegato sono presenti copie dei miei documenti relativi a questo acquisto. Mi aspetto di sentirti presto. Cordiali saluti, Bombo."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "32aa3ebe",
   "metadata": {},
   "source": [
    "### Text Generation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "c53f2314",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "No model was supplied, defaulted to openai-community/gpt2 and revision 6c0e608 (https://huggingface.co/openai-community/gpt2).\n",
      "Using a pipeline without specifying a model name and revision in production is not recommended.\n",
      "/opt/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "generator = pipeline(\"text-generation\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "39c5bfe0",
   "metadata": {},
   "outputs": [],
   "source": [
    "response = \"Dear Bumblebee, I am sorry to hear that your order was mixed up.\"\n",
    "prompt = text + \"\\n\\nCustomer service response:\\n\" + response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "160b8f28",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dear Amazon, last week I ordered an Optimus Prime action figure from your online store in Germany. Unfortunately, when I opened the package, I discovered to my horror that I had been sent an action figure of Megatron instead! As a lifelong enemy of the Decepticons, I hope you can understand my dilemma. To resolve the issue, I demand an exchange of Megatron for the Optimus Prime figure I ordered. Enclosed are copies of my records concerning this purchase. I expect to hear from you soon. Sincerely, Bumblebee.\n",
      "\n",
      "Customer service response:\n",
      "Dear Bumblebee, I am sorry to hear that your order was mixed up.\n"
     ]
    }
   ],
   "source": [
    "print(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "edfeb274",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dear Amazon, last week I ordered an Optimus Prime action figure from your online store in Germany. Unfortunately, when I opened the package, I discovered to my horror that I had been sent an action figure of Megatron instead! As a lifelong enemy of the Decepticons, I hope you can understand my dilemma. To resolve the issue, I demand an exchange of Megatron for the Optimus Prime figure I ordered. Enclosed are copies of my records concerning this purchase. I expect to hear from you soon. Sincerely, Bumblebee.\n",
      "\n",
      "Customer service response:\n",
      "Dear Bumblebee, I am sorry to hear that your order was mixed up. This isn't a problem for me. My order simply came with an incorrect name. I ordered as \"Megatron, Bumblebee\". I was able to get back all the Transformers figures I already purchased from your online store. The instructions on the front of the package says this. All I can say is that I'm sure Megatron was not included with the Optimus Prime figure but just one (not a single) part. It's quite a shock when you know as you are one of us. Thank you.\n",
      "\n",
      "Bumblebee! I am hoping that the shipping will be less than $5.00. Please contact me on the comments section below. I'm sure that you will be getting the exact items I received. I am simply told that I received the correct order. I think that you are a bit mistaken!\n",
      "\n",
      "Unfortunately you are not receiving the packaging we have in our possession. Please contact me after you find out why this is.\n"
     ]
    }
   ],
   "source": [
    "outputs = generator(prompt, max_length=1000)\n",
    "print(outputs[0]['generated_text'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18d3e7ce",
   "metadata": {},
   "source": [
    "<center>\n",
    "     <img src=\"https://raw.githubusercontent.com/DataForScience/Networks/master/data/D4Sci_logo_full.png\" alt=\"Data For Science, Inc\" align=\"center\" border=\"0\" width=300px> \n",
    "</center>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}