{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Custom Panels for Object Detection.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "DWrniJ0sSrwP",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "126c0c65-0781-4a7c-e3f1-6b00af076838"
      },
      "source": [
        "!pip install comet_ml"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: comet_ml in /usr/local/lib/python3.6/dist-packages (3.2.5)\n",
            "Requirement already satisfied: dulwich>=0.20.6; python_version >= \"3.0\" in /usr/local/lib/python3.6/dist-packages (from comet_ml) (0.20.11)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from comet_ml) (1.15.0)\n",
            "Requirement already satisfied: websocket-client>=0.55.0 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (0.57.0)\n",
            "Requirement already satisfied: netifaces>=0.10.7 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (0.10.9)\n",
            "Requirement already satisfied: jsonschema!=3.1.0,>=2.6.0 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (2.6.0)\n",
            "Requirement already satisfied: requests>=2.18.4 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (2.23.0)\n",
            "Requirement already satisfied: everett[ini]>=1.0.1; python_version >= \"3.0\" in /usr/local/lib/python3.6/dist-packages (from comet_ml) (1.0.3)\n",
            "Requirement already satisfied: wurlitzer>=1.0.2 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (2.0.1)\n",
            "Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (7.352.0)\n",
            "Requirement already satisfied: wrapt>=1.11.2 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (1.12.1)\n",
            "Requirement already satisfied: certifi in /usr/local/lib/python3.6/dist-packages (from dulwich>=0.20.6; python_version >= \"3.0\"->comet_ml) (2020.6.20)\n",
            "Requirement already satisfied: urllib3>=1.24.1 in /usr/local/lib/python3.6/dist-packages (from dulwich>=0.20.6; python_version >= \"3.0\"->comet_ml) (1.24.3)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.18.4->comet_ml) (2.10)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.18.4->comet_ml) (3.0.4)\n",
            "Requirement already satisfied: configobj; extra == \"ini\" in /usr/local/lib/python3.6/dist-packages (from everett[ini]>=1.0.1; python_version >= \"3.0\"->comet_ml) (5.0.6)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HTnv-b3_iXuR"
      },
      "source": [
        ""
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "SWK2SCVhhyOg",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "2ffa8f7f-d42a-4b80-fc51-34f4c422c21a"
      },
      "source": [
        "import comet_ml\n",
        "import getpass, os\n",
        "os.environ[\"COMET_API_KEY\"] = getpass.getpass(\"Paste your COMET API KEY: \")"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Paste your COMET API KEY: ··········\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "AquTBLqgh11e",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "fe964b27-044d-4117-80b1-7ef265802b79"
      },
      "source": [
        "experiment = comet_ml.Experiment(project_name='object-detection')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "COMET INFO: Experiment is live on comet.ml https://www.comet.ml/team-comet-ml/object-detection/d85481761aab443ea99d82a2d2e07b02\n",
            "\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Osc8cW-KFyrZ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "42121a40-0716-4e42-e5eb-3167ff9f64b5"
      },
      "source": [
        "# download the Penn-Fudan dataset\n",
        "!wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip .\n",
        "# extract it in the current folder\n",
        "!unzip PennFudanPed.zip"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "--2020-11-16 17:28:46--  https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip\n",
            "Resolving www.cis.upenn.edu (www.cis.upenn.edu)... 158.130.69.163, 2607:f470:8:64:5ea5::d\n",
            "Connecting to www.cis.upenn.edu (www.cis.upenn.edu)|158.130.69.163|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 53723336 (51M) [application/zip]\n",
            "Saving to: ‘PennFudanPed.zip.5’\n",
            "\n",
            "PennFudanPed.zip.5  100%[===================>]  51.23M  88.8MB/s    in 0.6s    \n",
            "\n",
            "2020-11-16 17:28:47 (88.8 MB/s) - ‘PennFudanPed.zip.5’ saved [53723336/53723336]\n",
            "\n",
            "--2020-11-16 17:28:47--  http://./\n",
            "Resolving . (.)... failed: No address associated with hostname.\n",
            "wget: unable to resolve host address ‘.’\n",
            "FINISHED --2020-11-16 17:28:47--\n",
            "Total wall clock time: 0.7s\n",
            "Downloaded: 1 files, 51M in 0.6s (88.8 MB/s)\n",
            "Archive:  PennFudanPed.zip\n",
            "replace PennFudanPed/added-object-list.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: "
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "v7FLHFtPJQH1"
      },
      "source": [
        "import torch, torchvision\n",
        "from torchvision import datasets, transforms\n",
        "\n",
        "preprocess = transforms.Compose([\n",
        "  transforms.ToTensor(),\n",
        "])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gENmSr7YGfu3"
      },
      "source": [
        "import os\n",
        "import numpy as np\n",
        "import torch\n",
        "import torch.utils.data\n",
        "from PIL import Image\n",
        "\n",
        "class PennFudanDataset(torch.utils.data.Dataset):\n",
        "    def __init__(self, root, transforms=None):\n",
        "        self.root = root\n",
        "        self.transforms = transforms\n",
        "        # load all image files, sorting them to\n",
        "        # ensure that they are aligned\n",
        "        self.imgs = list(sorted(os.listdir(os.path.join(root, \"PNGImages\"))))\n",
        "        self.masks = list(sorted(os.listdir(os.path.join(root, \"PedMasks\"))))\n",
        "\n",
        "    def __getitem__(self, idx):\n",
        "        # load images ad masks\n",
        "        img_path = os.path.join(self.root, \"PNGImages\", self.imgs[idx])\n",
        "        mask_path = os.path.join(self.root, \"PedMasks\", self.masks[idx])\n",
        "        img = Image.open(img_path).convert(\"RGB\")\n",
        "        # note that we haven't converted the mask to RGB,\n",
        "        # because each color corresponds to a different instance\n",
        "        # with 0 being background\n",
        "        mask = Image.open(mask_path)\n",
        "\n",
        "        mask = np.array(mask)\n",
        "        # instances are encoded as different colors\n",
        "        obj_ids = np.unique(mask)\n",
        "        # first id is the background, so remove it\n",
        "        obj_ids = obj_ids[1:]\n",
        "\n",
        "        # split the color-encoded mask into a set\n",
        "        # of binary masks\n",
        "        masks = mask == obj_ids[:, None, None]\n",
        "\n",
        "        # get bounding box coordinates for each mask\n",
        "        num_objs = len(obj_ids)\n",
        "        boxes = []\n",
        "        for i in range(num_objs):\n",
        "            pos = np.where(masks[i])\n",
        "            xmin = np.min(pos[1])\n",
        "            xmax = np.max(pos[1])\n",
        "            ymin = np.min(pos[0])\n",
        "            ymax = np.max(pos[0])\n",
        "            boxes.append([xmin, ymin, xmax, ymax])\n",
        "\n",
        "        boxes = torch.as_tensor(boxes, dtype=torch.float32)\n",
        "        # there is only one class\n",
        "        labels = torch.ones((num_objs,), dtype=torch.int64)\n",
        "        masks = torch.as_tensor(masks, dtype=torch.uint8)\n",
        "\n",
        "        image_id = torch.tensor([idx])\n",
        "        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])\n",
        "        # suppose all instances are not crowd\n",
        "        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)\n",
        "\n",
        "        target = {}\n",
        "        target[\"boxes\"] = boxes\n",
        "        target[\"labels\"] = labels\n",
        "        target[\"masks\"] = masks\n",
        "        target[\"image_id\"] = image_id\n",
        "        target[\"area\"] = area\n",
        "        target[\"iscrowd\"] = iscrowd\n",
        "\n",
        "        if self.transforms is not None:\n",
        "            img = self.transforms(img)\n",
        "\n",
        "        return img, target\n",
        "\n",
        "    def __len__(self):\n",
        "        return len(self.imgs)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5klC67mNTcuQ"
      },
      "source": [
        "def format_predictions_and_labels(img_ids, predictions, labels, label_map):\n",
        "  data = {}\n",
        "  \n",
        "  for idx, img_id in enumerate(img_ids):\n",
        "      prediction = predictions[idx]\n",
        "      label = labels[idx]\n",
        "      \n",
        "      predicted_boxes = prediction[\"boxes\"].numpy().tolist()\n",
        "      predicted_scores = prediction[\"scores\"].numpy().tolist()\n",
        "      predicted_classes = prediction[\"labels\"].numpy().tolist()\n",
        "\n",
        "      label_boxes =  label[\"boxes\"].numpy().tolist()\n",
        "    \n",
        "      data.setdefault(img_id, []) \n",
        "      for label_box in label_boxes:\n",
        "        x, y, x2, y2 = label_box\n",
        "        data[img_id].append({\n",
        "            \"label\": \"ground-truth\",\n",
        "            \"score\": 100, \n",
        "            \"box\": {\"x\": x, \"y\": y, \"x2\": x2, \"y2\": y2},\n",
        "        })\n",
        "    \n",
        "      for predicted_box, predicted_score, predicted_class in zip(predicted_boxes, predicted_scores, predicted_classes):\n",
        "        x, y, x2, y2 = predicted_box\n",
        "        data[img_id].append({\n",
        "            \"label\": label_map[predicted_class - 1],\n",
        "            \"box\": {\"x\": x, \"y\": y, \"x2\": x2, \"y2\": y2},\n",
        "            \"score\": predicted_score * 100 \n",
        "        })\n",
        "\n",
        "  return data"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xTSDHef3GkF4"
      },
      "source": [
        "dataset = PennFudanDataset('./PennFudanPed', transforms=preprocess)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Cwp2jyH9BH6i"
      },
      "source": [
        "model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Tj0JFR8q6e9s",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "15ede7be-f390-480a-e56f-29b9c8072218"
      },
      "source": [
        "!curl 'https://raw.githubusercontent.com/amikelive/coco-labels/master/coco-labels-paper.txt' -o coco-labels.txt"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
            "\r  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0\r100   702  100   702    0     0   4943      0 --:--:-- --:--:-- --:--:--  4978\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "c5GOzuML6jEc"
      },
      "source": [
        "label_map = {}\n",
        "file = open(\"./coco-labels.txt\", 'r') \n",
        "lines = file.readlines() \n",
        "for idx, line in enumerate(lines):\n",
        "  label_map[idx] = line.replace('\\n', '')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "99lTJLfTBNB7"
      },
      "source": [
        "from torchvision import transforms\n",
        "model.eval()\n",
        "\n",
        "start_id = 100\n",
        "end_id = 110\n",
        "\n",
        "img_ids = [i for i in range(start_id, end_id)]\n",
        "labels = []\n",
        "predictions = []\n",
        "\n",
        "for img_id in img_ids:\n",
        "  img, label = dataset[img_id]\n",
        "  \n",
        "  labels.append(label)\n",
        "  with torch.no_grad():\n",
        "    prediction = model([img])\n",
        "    predictions.append(prediction[0])\n",
        "\n",
        "  experiment.log_image(image_data=transforms.ToPILImage()(img), name=str(img_id))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4aTxB2qcghMY"
      },
      "source": [
        "metadata = format_predictions_and_labels(img_ids, predictions, labels, label_map)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "U00wCqS4mE5M",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "05cd778b-9092-48c5-8eb7-ccf6da7d201c"
      },
      "source": [
        "# Log the annotation JSON:\n",
        "experiment.log_asset_data(metadata, \"image-metadata.json\")"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'api': 'https://www.comet.ml/api/rest/v2/experiment/asset/get-asset?assetId=1843c310ac2e4a698eb37d95c8cf11f7&experimentKey=d85481761aab443ea99d82a2d2e07b02',\n",
              " 'assetId': '1843c310ac2e4a698eb37d95c8cf11f7',\n",
              " 'web': 'https://www.comet.ml/api/asset/download?assetId=1843c310ac2e4a698eb37d95c8cf11f7&experimentKey=d85481761aab443ea99d82a2d2e07b02'}"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "u9vo2fWyoLlX"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}