{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "01412caf",
   "metadata": {},
   "source": [
    "# Hello Object Detection\n",
    "\n",
    "A very basic introduction to using object detection models with OpenVINO™.\n",
    "\n",
    "The [horizontal-text-detection-0001](https://docs.openvino.ai/latest/omz_models_model_horizontal_text_detection_0001.html) model from [Open Model Zoo](https://github.com/openvinotoolkit/open_model_zoo/) is used. It detects horizontal text in images and returns a blob of data in the shape of `[100, 5]`. Each detected text box is stored in the `[x_min, y_min, x_max, y_max, conf]` format, where the\n",
    "`(x_min, y_min)` are the coordinates of the top left bounding box corner, `(x_max, y_max)` are the coordinates of the bottom right bounding box corner and `conf` is the confidence for the predicted class."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "740bfdd8",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "73d7aedb",
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "from openvino.runtime import Core"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "85b48949",
   "metadata": {},
   "source": [
    "## Load the Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99737c61",
   "metadata": {},
   "outputs": [],
   "source": [
    "ie = Core()\n",
    "\n",
    "model = ie.read_model(model=\"model/horizontal-text-detection-0001.xml\")\n",
    "compiled_model = ie.compile_model(model=model, device_name=\"CPU\")\n",
    "\n",
    "input_layer_ir = compiled_model.input(0)\n",
    "output_layer_ir = compiled_model.output(\"boxes\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "705ce668",
   "metadata": {},
   "source": [
    "## Load an Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc1cfeaf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Text detection models expect an image in BGR format.\n",
    "image = cv2.imread(\"data/intel_rnb.jpg\")\n",
    "\n",
    "# N,C,H,W = batch size, number of channels, height, width.\n",
    "N, C, H, W = input_layer_ir.shape\n",
    "\n",
    "# Resize the image to meet network expected input sizes.\n",
    "resized_image = cv2.resize(image, (W, H))\n",
    "\n",
    "# Reshape to the network input shape.\n",
    "input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)\n",
    "\n",
    "plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f9fcaba9",
   "metadata": {},
   "source": [
    "## Do Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "363ca630",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create an inference request.\n",
    "boxes = compiled_model([input_image])[output_layer_ir]\n",
    "\n",
    "# Remove zero only boxes.\n",
    "boxes = boxes[~np.all(boxes == 0, axis=1)]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "09dfac5d",
   "metadata": {},
   "source": [
    "## Visualize Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c6a52b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:\n",
    "# The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function\n",
    "def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):\n",
    "    # Define colors for boxes and descriptions.\n",
    "    colors = {\"red\": (255, 0, 0), \"green\": (0, 255, 0)}\n",
    "\n",
    "    # Fetch the image shapes to calculate a ratio.\n",
    "    (real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2]\n",
    "    ratio_x, ratio_y = real_x / resized_x, real_y / resized_y\n",
    "\n",
    "    # Convert the base image from BGR to RGB format.\n",
    "    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)\n",
    "\n",
    "    # Iterate through non-zero boxes.\n",
    "    for box in boxes:\n",
    "        # Pick a confidence factor from the last place in an array.\n",
    "        conf = box[-1]\n",
    "        if conf > threshold:\n",
    "            # Convert float to int and multiply corner position of each box by x and y ratio.\n",
    "            # If the bounding box is found at the top of the image, \n",
    "            # position the upper box bar little lower to make it visible on the image. \n",
    "            (x_min, y_min, x_max, y_max) = [\n",
    "                int(max(corner_position * ratio_y, 10)) if idx % 2 \n",
    "                else int(corner_position * ratio_x)\n",
    "                for idx, corner_position in enumerate(box[:-1])\n",
    "            ]\n",
    "\n",
    "            # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.\n",
    "            rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors[\"green\"], 3)\n",
    "\n",
    "            # Add text to the image based on position and confidence.\n",
    "            # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.\n",
    "            if conf_labels:\n",
    "                rgb_image = cv2.putText(\n",
    "                    rgb_image,\n",
    "                    f\"{conf:.2f}\",\n",
    "                    (x_min, y_min - 10),\n",
    "                    cv2.FONT_HERSHEY_SIMPLEX,\n",
    "                    0.8,\n",
    "                    colors[\"red\"],\n",
    "                    1,\n",
    "                    cv2.LINE_AA,\n",
    "                )\n",
    "\n",
    "    return rgb_image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14476f74",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10, 6))\n",
    "plt.axis(\"off\")\n",
    "plt.imshow(convert_result_to_image(image, resized_image, boxes, conf_labels=False));"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "ae617ccb002f72b3ab6d0069d721eac67ac2a969e83c083c4321cfcab0437cd1"
  },
  "kernelspec": {
   "display_name": "openvino_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}