{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib inline\n",
    "import os\n",
    "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\";\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"\"  # Enforce CPU usage\n",
    "from psutil import cpu_count  # Do \"pip install psutil\" if not already installed\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "\n",
    "# Constants from the performance optimization available in onnxruntime\n",
    "# It needs to be done before importing onnxruntime\n",
    "os.environ[\"OMP_NUM_THREADS\"] = str(cpu_count(logical=True))\n",
    "os.environ[\"OMP_WAIT_POLICY\"] = 'ACTIVE'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ONNX and TensorFlow Lite Support in `ktrain`\n",
    "\n",
    "As of v0.24.x, `predictors` in **ktrain** provide built-in support for exports to [ONNX](https://github.com/onnx/onnx) and [TensorFlow Lite](https://www.tensorflow.org/lite) formats.  This allows you to more easily take a **ktrain**-trained model and use it to make predictions *outside* of **ktrain** (or even TensorFlow) in deployment scenarios. In this notebook, we will show a text classification example of this.\n",
    "\n",
    "Let us begin by loading a previously trained `Predictor` instance, which consists of both the **DistilBert** model and its associated `Preprocessor` instance.  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertForSequenceClassification object at 0x7f929b30a710>\n",
      "<ktrain.text.preprocessor.Transformer object at 0x7f93ed5b88d0>\n"
     ]
    }
   ],
   "source": [
    "import ktrain\n",
    "predictor = ktrain.load_predictor('/tmp/my_distilbert_predictor')\n",
    "print(predictor.model)\n",
    "print(predictor.preproc)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The cell above assumes that the model was previously trained on the 20 Newsgroup corpus using a GPU (e.g., on Google Colab).  The files in question can be easily created with **ktrain**:\n",
    "\n",
    "```python\n",
    "# install ktrain\n",
    "!pip install ktrain\n",
    "\n",
    "# load text data\n",
    "categories = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']\n",
    "from sklearn.datasets import fetch_20newsgroups\n",
    "train_b = fetch_20newsgroups(subset='train', categories=categories, shuffle=True)\n",
    "test_b = fetch_20newsgroups(subset='test',categories=categories, shuffle=True)\n",
    "(x_train, y_train) = (train_b.data, train_b.target)\n",
    "(x_test, y_test) = (test_b.data, test_b.target)\n",
    "\n",
    "# build, train, and validate model (Transformer is wrapper around transformers library)\n",
    "import ktrain\n",
    "from ktrain import text\n",
    "MODEL_NAME = 'distilbert-base-uncased'\n",
    "t = text.Transformer(MODEL_NAME, maxlen=500, class_names=train_b.target_names)\n",
    "trn = t.preprocess_train(x_train, y_train)\n",
    "val = t.preprocess_test(x_test, y_test)\n",
    "model = t.get_classifier()\n",
    "learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=6)\n",
    "learner.fit_onecycle(5e-5, 1)\n",
    "\n",
    "# save predictor\n",
    "predictor = ktrain.get_predictor(learner.model, t)\n",
    "predictor.save('/tmp/my_distilbert_predictor')\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TensorFlow Lite Inferences\n",
    "\n",
    "Here, we export our model to TensorFlow LITE and use it to make predictions *without* **ktrain**."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "converting to TFLite format ... this may take a few moments...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:absl:Using experimental converter: If you encountered a problem please file a bug. You can opt-out by setting experimental_new_converter=False\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "done.\n",
      "\n",
      "text input: I received a chest x-ray at the hospital.\n",
      "\n",
      "predicted logits: [[-1.7620689  -0.70500606  3.0325098  -1.3406671 ]]\n",
      "\n",
      "predicted class: sci.med\n"
     ]
    }
   ],
   "source": [
    "# export TensorFlow Lite model\n",
    "tflite_model_path = '/tmp/model.tflite'\n",
    "tflite_model_path = predictor.export_model_to_tflite(tflite_model_path)\n",
    "\n",
    "# load interpreter\n",
    "interpreter = tf.lite.Interpreter(model_path=tflite_model_path)\n",
    "interpreter.allocate_tensors()\n",
    "input_details = interpreter.get_input_details()\n",
    "output_details = interpreter.get_output_details()\n",
    "\n",
    "# set maxlen, class_names, and tokenizer (use settings employed when training the model - see above)\n",
    "maxlen = 500                                                                       # from above\n",
    "class_names = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian'] # from above\n",
    "from transformers import AutoTokenizer\n",
    "tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')\n",
    "\n",
    "# preprocess and predict outside of ktrain\n",
    "doc = 'I received a chest x-ray at the hospital.'\n",
    "inputs = tokenizer(doc, max_length=maxlen, padding='max_length', truncation=True, return_tensors=\"tf\")\n",
    "interpreter.set_tensor(input_details[0]['index'], inputs['attention_mask'])\n",
    "interpreter.set_tensor(input_details[1]['index'], inputs['input_ids'])\n",
    "interpreter.invoke()\n",
    "output_tflite = interpreter.get_tensor(output_details[0]['index'])\n",
    "print()\n",
    "print('text input: %s' % (doc))\n",
    "print()\n",
    "print('predicted logits: %s' % (output_tflite))\n",
    "print()\n",
    "print(\"predicted class: %s\" % ( class_names[np.argmax(output_tflite[0])]) )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ONNX Inferences\n",
    "\n",
    "Here, we will export our trained model to ONNX and make predictions *outside* of both **ktrain** and **TensorFlow** using the ONNX runtime. Please ensure the ONNX libraries are installed before proceeding with:\n",
    "```\n",
    "pip install -q --upgrade onnxruntime==1.5.1 onnxruntime-tools onnx keras2onnx\n",
    "```\n",
    "\n",
    "It is possible to transform a TensorFlow model directly to ONNX using: `predictor.export_model_to_onnx(onnx_model_path)`, similar to what was done for TFLite above.  However, for **transformers** models like the **DistilBERT** text classifier used in this example, it is recommended that the model first be converted to PyTorch and then to ONNX for better performance of the final ONNX model. \n",
    "\n",
    "In the cell below, we use `AutoModelForSequenceClassification.from_pretrained` to load our classifier as a PyTorch model before converting to ONNX.  We, then, use our ONNX model to make predictions **without** the need for ktrain or TensorFlow or PyTorch.  This is well-suited for deployments that require smaller footprints (e.g., Heroku)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ONNX opset version set to: 11\n",
      "Loading pipeline (model: /tmp/my_distilbert_predictor_pt, tokenizer: distilbert-base-uncased)\n",
      "Creating folder /tmp/my_distilbert_predictor_pt_onnx\n",
      "Using framework PyTorch: 1.8.0\n",
      "Found input input_ids with shape: {0: 'batch', 1: 'sequence'}\n",
      "Found input attention_mask with shape: {0: 'batch', 1: 'sequence'}\n",
      "Found output output_0 with shape: {0: 'batch'}\n",
      "Ensuring inputs are in correct order\n",
      "head_mask is not present in the generated input list.\n",
      "Generated inputs order: ['input_ids', 'attention_mask']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/amaiya/venv-tf23/lib/python3.6/site-packages/transformers/modeling_utils.py:1760: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
      "  input_tensor.shape[chunk_dim] == tensor_shape for input_tensor in input_tensors\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimized model has been written at /tmp/my_distilbert_predictor_pt_onnx/model-optimized.onnx: ✔\n",
      "/!\\ Optimized model contains hardware specific operators which might not be portable. /!\\\n",
      "As of onnxruntime 1.4.0, models larger than 2GB will fail to quantize due to protobuf constraint.\n",
      "This limitation will be removed in the next release of onnxruntime.\n",
      "Warning: onnxruntime.quantization.quantize is deprecated.\n",
      "         Please use quantize_static for static quantization, quantize_dynamic for dynamic quantization.\n",
      "Quantized model has been written at /tmp/my_distilbert_predictor_pt_onnx/model-optimized-quantized.onnx: ✔\n",
      "\n",
      "\n",
      "predicted class: sci.med\n"
     ]
    }
   ],
   "source": [
    "# set maxlen, class_names, and tokenizer (use settings employed when training the model - see above)\n",
    "model_name = 'distilbert-base-uncased'\n",
    "maxlen = 500                                                                       # from above\n",
    "class_names = ['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian'] # from above\n",
    "from transformers import AutoTokenizer\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
    "\n",
    "\n",
    "# imports\n",
    "import numpy as np\n",
    "from transformers.convert_graph_to_onnx import convert, optimize, quantize\n",
    "from transformers import AutoModelForSequenceClassification\n",
    "from pathlib import Path\n",
    "\n",
    "# paths\n",
    "predictor_path = '/tmp/my_distilbert_predictor'\n",
    "pt_path = predictor_path+'_pt'\n",
    "pt_onnx_path = pt_path +'_onnx/model.onnx'\n",
    "\n",
    "# convert to ONNX\n",
    "AutoModelForSequenceClassification.from_pretrained(predictor_path, \n",
    "                                                   from_tf=True).save_pretrained(pt_path)\n",
    "convert(framework='pt', model=pt_path,output=Path(pt_onnx_path), opset=11, \n",
    "        tokenizer=model_name, pipeline_name='sentiment-analysis')\n",
    "pt_onnx_quantized_path = quantize(optimize(Path(pt_onnx_path)))\n",
    "\n",
    "# create ONNX session\n",
    "def create_onnx_session(onnx_model_path, provider='CPUExecutionProvider'):\n",
    "    \"\"\"\n",
    "    Creates ONNX inference session from provided onnx_model_path\n",
    "    \"\"\"\n",
    "\n",
    "    from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers\n",
    "    assert provider in get_all_providers(), f\"provider {provider} not found, {get_all_providers()}\"\n",
    "\n",
    "    # Few properties that might have an impact on performances (provided by MS)\n",
    "    options = SessionOptions()\n",
    "    options.intra_op_num_threads = 0\n",
    "    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL\n",
    "\n",
    "    # Load the model as a graph and prepare the CPU backend \n",
    "    session = InferenceSession(onnx_model_path, options, providers=[provider])\n",
    "    session.disable_fallback()\n",
    "    return session\n",
    "sess = create_onnx_session(pt_onnx_quantized_path.as_posix())\n",
    "\n",
    "# tokenize document and make prediction\n",
    "tokens = tokenizer.encode_plus('I received a chest x-ray at the hospital.', max_length=maxlen, truncation=True)\n",
    "tokens = {name: np.atleast_2d(value) for name, value in tokens.items()}\n",
    "print()\n",
    "print()\n",
    "print(\"predicted class: %s\" % (class_names[np.argmax(sess.run(None, tokens)[0])]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}