{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Subword Tokenization\n",
    "\n",
    "Implementation from [Neural Machine Translation of Rare Words with Subword Units](https://aclanthology.org/P16-1162/) (Sennrich et al., ACL 2016)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('e', 's')\n",
      "('es', 't')\n",
      "('est', '</w>')\n",
      "('l', 'o')\n",
      "('lo', 'w')\n",
      "{'low </w>': 5, 'low e r </w>': 2, 'n e w est</w>': 6, 'w i d est</w>': 3}\n"
     ]
    }
   ],
   "source": [
    "import re, collections\n",
    "\n",
    "# Count number of pairs\n",
    "def get_stats(vocab):\n",
    "    pairs = collections.defaultdict(int)\n",
    "    for word, freq in vocab.items():\n",
    "        symbols = word.split()\n",
    "        for i in range(len(symbols)-1):\n",
    "            pairs[symbols[i],symbols[i+1]] += freq\n",
    "    return pairs\n",
    "\n",
    "# Merge most frequent pairs in the vocabulary\n",
    "def merge_vocab(pair, v_in):\n",
    "    v_out = {}\n",
    "    bigram = re.escape(' '.join(pair))\n",
    "    p = re.compile(r'(?<!\\S)' + bigram + r'(?!\\S)')  \n",
    "    for word in v_in:\n",
    "        w_out = p.sub(''.join(pair), word)\n",
    "        v_out[w_out] = v_in[word]\n",
    "    return v_out\n",
    "\n",
    "# A simplified vocabulary, which you would collect from a real-word text corpus\n",
    "vocab = {'l o w </w>' : 5, 'l o w e r </w>' : 2,\n",
    "'n e w e s t </w>':6, 'w i d e s t </w>' : 3}\n",
    "\n",
    "num_merges = 5\n",
    "for i in range(num_merges):\n",
    "    pairs = get_stats(vocab)\n",
    "    best = max(pairs, key=pairs.get)\n",
    "    vocab = merge_vocab(best, vocab)\n",
    "    print(best)\n",
    "print(vocab)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using a Pre-trained Tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tiktoken\n",
    "enc = tiktoken.encoding_for_model(\"gpt-3.5-turbo\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[3617, 2448, 79706, 3576, 269, 273, 84314]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "token_ids = enc.encode('Prüfungsvorleistung')\n",
    "token_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Pr', 'ü', 'fung', 'sv', 'or', 'le', 'istung']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[enc.decode([i]) for i in token_ids]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Phenotyping with LLMs\n",
    "\n",
    "We will show how to use ChatGPT through the OpenAPI API for zero-shot and few-shot smoking status classification, which is a kind of phenotyping task. Note: if you want to run the notebook yourself, make sure to provide an API key: https://github.com/openai/openai-python#usage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "executionInfo": {
     "elapsed": 314,
     "status": "ok",
     "timestamp": 1695628755333,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "7KXiGjwnjiUt"
   },
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "import os\n",
    "\n",
    "client = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "executionInfo": {
     "elapsed": 209,
     "status": "ok",
     "timestamp": 1695628769597,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "7uAVp_-Fk85I"
   },
   "outputs": [],
   "source": [
    "# Helper function to send messages to OpenAI API (ChatGPT model)\n",
    "def get_completion(prompt, model=\"gpt-3.5-turbo\"):\n",
    "    messages = [{\"role\": \"user\", \"content\": prompt}]\n",
    "    response = client.chat.completions.create(\n",
    "        messages=messages,\n",
    "        model=\"gpt-3.5-turbo\",\n",
    "        temperature=0, # this is the degree of randomness of the model's output\n",
    "    )\n",
    "    return response.choices[0].message.content.replace('```', '')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "5LGDhQR3lpd-"
   },
   "source": [
    "## Zero-Shot Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"Social History: No alcohol use and quit tobacco greater than 25 years ago with a 10-pack year smoking history.\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prompt 1\n",
    "\n",
    "Describes the task"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1695628771499,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "zcnhvq-Zlh62"
   },
   "outputs": [],
   "source": [
    "prompt1 = \"What is the smoking status of the person described in this clinical note? ```{}```\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "executionInfo": {
     "elapsed": 3,
     "status": "ok",
     "timestamp": 1695628771816,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "p3RLmCCJl8G4",
    "outputId": "3aec847a-42de-4272-bfbe-7b458883bc99"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'What is the smoking status of the person described in this clinical note? ```Social History: No alcohol use and quit tobacco greater than 25 years ago with a 10-pack year smoking history.```'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt1.format(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "executionInfo": {
     "elapsed": 2321,
     "status": "ok",
     "timestamp": 1695628774408,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "3MZBDOS2l9yJ",
    "outputId": "6419a982-b798-4981-86f9-62ebdf07f1a9"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'The smoking status of the person described in this clinical note is that they quit tobacco greater than 25 years ago.'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_completion(prompt1.format(text))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prompt 2\n",
    "\n",
    "Describes the task and valid response options (for classification)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1695628774408,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "w_A8dWBBmA54"
   },
   "outputs": [],
   "source": [
    "prompt2 = (\"What is the smoking status of the person described in this clinical note?\"\n",
    "\" The valid options are: smoker, non-smoker, ex-smoker \"\n",
    "\" Input: ```{}```\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "executionInfo": {
     "elapsed": 2501,
     "status": "ok",
     "timestamp": 1695628776908,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "UA71CCE4m2w_",
    "outputId": "540fce65-dbf6-434c-8017-017a40e2713b"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'The smoking status of the person described in this clinical note is \"ex-smoker\".'"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_completion(prompt2.format(text))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prompt 3\n",
    "\n",
    "Describes the task, valid response options, and output format (JSON)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "executionInfo": {
     "elapsed": 5,
     "status": "ok",
     "timestamp": 1695628776909,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "zvAhQpPZm4y1"
   },
   "outputs": [],
   "source": [
    "prompt3 = (\"What is the smoking status of the person described in this clinical note?\"\n",
    "\" The valid options are: current smoker, non-smoker, ex-smoker \"\n",
    "\" Please return the answer as a JSON of the format {{ label : <label> }} without any explanations.\"\n",
    "\" Input: ```{}```\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "executionInfo": {
     "elapsed": 873,
     "status": "ok",
     "timestamp": 1695628777778,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "XMWxVolZnKTp",
    "outputId": "36e29547-ae8f-48aa-b203-23f45d150a9a"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"label\": \"ex-smoker\"}'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = get_completion(prompt3.format(text))\n",
    "response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3,
     "status": "ok",
     "timestamp": 1695628777778,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "jKhIm1f3nLtr",
    "outputId": "dd20c148-6a9c-46f3-8e00-a09ad2f7b848"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'label': 'ex-smoker'}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import json\n",
    "json.loads(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prompt 4\n",
    "\n",
    "In addition to the previous prompt, we ask the model to perform an annotational task, i.e., retrieving a piece of text that justifies the classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1695628777778,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "Jgs874lznr-7"
   },
   "outputs": [],
   "source": [
    "prompt4 = (\"What is the smoking status of the person described in this clinical note?\"\n",
    "\" The valid options are: current smoker, non-smoker, ex-smoker \"\n",
    "\" Please return the answer as a JSON of the format {{ label : <label>, evidence: <keyphrase> }} without any explanations. \"\n",
    "\" 'evidence' should contain the shortest possible substring from the input that can be used to justify the label.\"\n",
    "\" Input: ```{}```\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "executionInfo": {
     "elapsed": 2376,
     "status": "ok",
     "timestamp": 1695628780152,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "QyhMYKH2oXmU",
    "outputId": "efd6c108-4d8c-4a98-bbfd-eced028c69d3"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"label\": \"ex-smoker\", \"evidence\": \"quit tobacco greater than 25 years ago\"}'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_completion(prompt4.format(text))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "IcSWUjo0ohux"
   },
   "source": [
    "## In-Context Learning\n",
    "\n",
    "Instead of describing the task in detail, we can provide some training example inside the prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "executionInfo": {
     "elapsed": 3,
     "status": "ok",
     "timestamp": 1695628780152,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "-tK1q_9yoZ4b"
   },
   "outputs": [],
   "source": [
    "prompt_few_shot = ('Your task is to determine the smoking status of the person described in a clinical note.\\n'\n",
    "'Please return the answer as a JSON of the format {{ label : <label>, evidence: <keyphrase> }} without any explanations.\\n'\n",
    "'Here are some examples:\\n'\n",
    "'Input: ```Smoker until 1999``` Output: ```{{ \"label\" : \"ex-smoker\", \"keyphrase\": \"Smoker until 1999\"}}```\\n'\n",
    "'Input: ```… SOCIAL HISTORY: Widowed since 1972, no tobacco, no alcohol, lives alone.``` Output: ```{{ \"label\" : \"non-smoker\", \"keyphrase\": \"no tobacco\"}}```\\n'\n",
    "'Input: ```He is a heavy smoker and drinks 2–3 shots per day at times.``` Output: ```{{ \"label\" : \"current smoker\", \"keyphrase\": \"heavy smoker\"}}```\\n'\n",
    "'Input: ```{}``` Output: ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3,
     "status": "ok",
     "timestamp": 1695628780152,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "of1C-YtEpgnN",
    "outputId": "c2987b64-e9d9-420e-a28a-de7b07c3b2bd"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Your task is to determine the smoking status of the person described in a clinical note.\n",
      "Please return the answer as a JSON of the format { label : <label>, evidence: <keyphrase> } without any explanations.\n",
      "Here are some examples:\n",
      "Input: ```Smoker until 1999``` Output: ```{ \"label\" : \"ex-smoker\", \"keyphrase\": \"Smoker until 1999\"}```\n",
      "Input: ```… SOCIAL HISTORY: Widowed since 1972, no tobacco, no alcohol, lives alone.``` Output: ```{ \"label\" : \"non-smoker\", \"keyphrase\": \"no tobacco\"}```\n",
      "Input: ```He is a heavy smoker and drinks 2–3 shots per day at times.``` Output: ```{ \"label\" : \"current smoker\", \"keyphrase\": \"heavy smoker\"}```\n",
      "Input: ```Social History: No alcohol use and quit tobacco greater than 25 years ago with a 10-pack year smoking history.``` Output: \n"
     ]
    }
   ],
   "source": [
    "print(prompt_few_shot.format(text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "executionInfo": {
     "elapsed": 1876,
     "status": "ok",
     "timestamp": 1695628782025,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "KfscSFszoft4",
    "outputId": "5404c81f-9d61-40a2-af02-d52295e2584f"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{ \"label\" : \"ex-smoker\", \"keyphrase\": \"quit tobacco greater than 25 years ago\" }'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = get_completion(prompt_few_shot.format(text))\n",
    "response"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "9P8A4s4wrSbl"
   },
   "source": [
    "### A More Complicated Example\n",
    "\n",
    "We will use a German-language case report from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7988258/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "executionInfo": {
     "elapsed": 6,
     "status": "ok",
     "timestamp": 1695628782026,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "EX0CwRrzrTup"
   },
   "outputs": [],
   "source": [
    "german_text_long = \"\"\"\n",
    "Fallschilderung\n",
    "Anamnese. Der 67-jährige Patient ist bekannt in Ihrer allgemeininternistischen Praxis.\n",
    "Letzte Vorstellung vor 2 Monaten: Erstdiagnose Magenkarzinom, Beginn neoadjuvante Chemotherapie (FLOT-Protokoll mit 5‑Fluorouracil, Folinsäure, Oxaliplatin und Docetaxel) in potenziell kurativem Setting im lokalen Klinikum\n",
    "Heutige Vorstellung: seit etwa 10 Tagen zunehmende Dyspnoe (erst unter Belastung, mittlerweile auch in Ruhe, Orthopnoe nachts).\n",
    "Gelegentlich etwas trockener Husten, zweimalig Temperatur von 37,7 °C in den letzten 10 Tagen.\n",
    "In 3 Tagen steht der nächste Chemotherapiezyklus an. Der Patient legt einen Arztbrief vor.\n",
    "Inhalt: stationäre Behandlung vor 3 Wochen aufgrund einer Pneumonie (linksseitig)\n",
    "\n",
    "Vorerkrankungen. Linksherzinsuffizienz („heart failure with preserved ejection fraction“ [HFpEF], linksventrikuläre Ejektionsfraktion 50 %), chronisch-obstruktive Lungenerkrankung im Stadium I nach Global Initiative for Chronic Obstructive Lung Disease (GOLD), Risikoklasse A, florider Nikotinabusus (kumulativ 20 Packungsjahre), Magenkarzinom Stadium IIA\n",
    "Körperliche Untersuchung. Auskultation: beidseits vesikuläres Atemgeräusch mit gering verlängertem Exspirium und basaler Dämpfung links, keine Rasselgeräusche. Perkussion: sonorer Klopfschall bis auf links basal – hier hyposonor, Lungengrenze links nicht atemverschieblich. Herztöne rhythmisch, rein und normofrequent. Knöchelödeme\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Your task is to determine the smoking status of the person described in a clinical note.\n",
      "Please return the answer as a JSON of the format { label : <label>, evidence: <keyphrase> } without any explanations.\n",
      "Here are some examples:\n",
      "Input: ```Smoker until 1999``` Output: ```{ \"label\" : \"ex-smoker\", \"keyphrase\": \"Smoker until 1999\"}```\n",
      "Input: ```… SOCIAL HISTORY: Widowed since 1972, no tobacco, no alcohol, lives alone.``` Output: ```{ \"label\" : \"non-smoker\", \"keyphrase\": \"no tobacco\"}```\n",
      "Input: ```He is a heavy smoker and drinks 2–3 shots per day at times.``` Output: ```{ \"label\" : \"current smoker\", \"keyphrase\": \"heavy smoker\"}```\n",
      "Input: ```\n",
      "Fallschilderung\n",
      "Anamnese. Der 67-jährige Patient ist bekannt in Ihrer allgemeininternistischen Praxis.\n",
      "Letzte Vorstellung vor 2 Monaten: Erstdiagnose Magenkarzinom, Beginn neoadjuvante Chemotherapie (FLOT-Protokoll mit 5‑Fluorouracil, Folinsäure, Oxaliplatin und Docetaxel) in potenziell kurativem Setting im lokalen Klinikum\n",
      "Heutige Vorstellung: seit etwa 10 Tagen zunehmende Dyspnoe (erst unter Belastung, mittlerweile auch in Ruhe, Orthopnoe nachts).\n",
      "Gelegentlich etwas trockener Husten, zweimalig Temperatur von 37,7 °C in den letzten 10 Tagen.\n",
      "In 3 Tagen steht der nächste Chemotherapiezyklus an. Der Patient legt einen Arztbrief vor.\n",
      "Inhalt: stationäre Behandlung vor 3 Wochen aufgrund einer Pneumonie (linksseitig)\n",
      "\n",
      "Vorerkrankungen. Linksherzinsuffizienz („heart failure with preserved ejection fraction“ [HFpEF], linksventrikuläre Ejektionsfraktion 50 %), chronisch-obstruktive Lungenerkrankung im Stadium I nach Global Initiative for Chronic Obstructive Lung Disease (GOLD), Risikoklasse A, florider Nikotinabusus (kumulativ 20 Packungsjahre), Magenkarzinom Stadium IIA\n",
      "Körperliche Untersuchung. Auskultation: beidseits vesikuläres Atemgeräusch mit gering verlängertem Exspirium und basaler Dämpfung links, keine Rasselgeräusche. Perkussion: sonorer Klopfschall bis auf links basal – hier hyposonor, Lungengrenze links nicht atemverschieblich. Herztöne rhythmisch, rein und normofrequent. Knöchelödeme``` Output: \n"
     ]
    }
   ],
   "source": [
    "print(prompt_few_shot.format(german_text_long))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "executionInfo": {
     "elapsed": 1885,
     "status": "ok",
     "timestamp": 1695628783907,
     "user": {
      "displayName": "Florian Borchert",
      "userId": "04915685144402388535"
     },
     "user_tz": -120
    },
    "id": "mTCmrM-vsrBQ",
    "outputId": "b764ecf1-9d8d-42c3-8c42-243c4fd38960"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"label\": \"current smoker\", \"keyphrase\": \"florider Nikotinabusus\"}'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_completion(prompt_few_shot.format(german_text_long))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "id": "-zVFjJkQ4kmA"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{ \"label\" : \"unknown\", \"keyphrase\": \"\"}'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_completion(prompt_few_shot.format(\"Der Patient litt unter Kopfschmerzen.\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyNjjAZZDtqYgtRgmaunruDS",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python [conda env:dm4dh]",
   "language": "python",
   "name": "conda-env-dm4dh-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}