{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Custom Panels for Object Detection.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "DWrniJ0sSrwP", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "126c0c65-0781-4a7c-e3f1-6b00af076838" }, "source": [ "!pip install comet_ml" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Requirement already satisfied: comet_ml in /usr/local/lib/python3.6/dist-packages (3.2.5)\n", "Requirement already satisfied: dulwich>=0.20.6; python_version >= \"3.0\" in /usr/local/lib/python3.6/dist-packages (from comet_ml) (0.20.11)\n", "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from comet_ml) (1.15.0)\n", "Requirement already satisfied: websocket-client>=0.55.0 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (0.57.0)\n", "Requirement already satisfied: netifaces>=0.10.7 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (0.10.9)\n", "Requirement already satisfied: jsonschema!=3.1.0,>=2.6.0 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (2.6.0)\n", "Requirement already satisfied: requests>=2.18.4 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (2.23.0)\n", "Requirement already satisfied: everett[ini]>=1.0.1; python_version >= \"3.0\" in /usr/local/lib/python3.6/dist-packages (from comet_ml) (1.0.3)\n", "Requirement already satisfied: wurlitzer>=1.0.2 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (2.0.1)\n", "Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (7.352.0)\n", "Requirement already satisfied: wrapt>=1.11.2 in /usr/local/lib/python3.6/dist-packages (from comet_ml) (1.12.1)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.6/dist-packages (from dulwich>=0.20.6; python_version >= \"3.0\"->comet_ml) (2020.6.20)\n", "Requirement already satisfied: urllib3>=1.24.1 in /usr/local/lib/python3.6/dist-packages (from dulwich>=0.20.6; python_version >= \"3.0\"->comet_ml) (1.24.3)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.18.4->comet_ml) (2.10)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.18.4->comet_ml) (3.0.4)\n", "Requirement already satisfied: configobj; extra == \"ini\" in /usr/local/lib/python3.6/dist-packages (from everett[ini]>=1.0.1; python_version >= \"3.0\"->comet_ml) (5.0.6)\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "HTnv-b3_iXuR" }, "source": [ "" ] }, { "cell_type": "code", "metadata": { "id": "SWK2SCVhhyOg", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "2ffa8f7f-d42a-4b80-fc51-34f4c422c21a" }, "source": [ "import comet_ml\n", "import getpass, os\n", "os.environ[\"COMET_API_KEY\"] = getpass.getpass(\"Paste your COMET API KEY: \")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Paste your COMET API KEY: ··········\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "AquTBLqgh11e", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "fe964b27-044d-4117-80b1-7ef265802b79" }, "source": [ "experiment = comet_ml.Experiment(project_name='object-detection')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "COMET INFO: Experiment is live on comet.ml https://www.comet.ml/team-comet-ml/object-detection/d85481761aab443ea99d82a2d2e07b02\n", "\n" ], "name": "stderr" } ] }, { "cell_type": "code", "metadata": { "id": "Osc8cW-KFyrZ", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "42121a40-0716-4e42-e5eb-3167ff9f64b5" }, "source": [ "# download the Penn-Fudan dataset\n", "!wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip .\n", "# extract it in the current folder\n", "!unzip PennFudanPed.zip" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "--2020-11-16 17:28:46-- https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip\n", "Resolving www.cis.upenn.edu (www.cis.upenn.edu)... 158.130.69.163, 2607:f470:8:64:5ea5::d\n", "Connecting to www.cis.upenn.edu (www.cis.upenn.edu)|158.130.69.163|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 53723336 (51M) [application/zip]\n", "Saving to: ‘PennFudanPed.zip.5’\n", "\n", "PennFudanPed.zip.5 100%[===================>] 51.23M 88.8MB/s in 0.6s \n", "\n", "2020-11-16 17:28:47 (88.8 MB/s) - ‘PennFudanPed.zip.5’ saved [53723336/53723336]\n", "\n", "--2020-11-16 17:28:47-- http://./\n", "Resolving . (.)... failed: No address associated with hostname.\n", "wget: unable to resolve host address ‘.’\n", "FINISHED --2020-11-16 17:28:47--\n", "Total wall clock time: 0.7s\n", "Downloaded: 1 files, 51M in 0.6s (88.8 MB/s)\n", "Archive: PennFudanPed.zip\n", "replace PennFudanPed/added-object-list.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: " ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "v7FLHFtPJQH1" }, "source": [ "import torch, torchvision\n", "from torchvision import datasets, transforms\n", "\n", "preprocess = transforms.Compose([\n", " transforms.ToTensor(),\n", "])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "gENmSr7YGfu3" }, "source": [ "import os\n", "import numpy as np\n", "import torch\n", "import torch.utils.data\n", "from PIL import Image\n", "\n", "class PennFudanDataset(torch.utils.data.Dataset):\n", " def __init__(self, root, transforms=None):\n", " self.root = root\n", " self.transforms = transforms\n", " # load all image files, sorting them to\n", " # ensure that they are aligned\n", " self.imgs = list(sorted(os.listdir(os.path.join(root, \"PNGImages\"))))\n", " self.masks = list(sorted(os.listdir(os.path.join(root, \"PedMasks\"))))\n", "\n", " def __getitem__(self, idx):\n", " # load images ad masks\n", " img_path = os.path.join(self.root, \"PNGImages\", self.imgs[idx])\n", " mask_path = os.path.join(self.root, \"PedMasks\", self.masks[idx])\n", " img = Image.open(img_path).convert(\"RGB\")\n", " # note that we haven't converted the mask to RGB,\n", " # because each color corresponds to a different instance\n", " # with 0 being background\n", " mask = Image.open(mask_path)\n", "\n", " mask = np.array(mask)\n", " # instances are encoded as different colors\n", " obj_ids = np.unique(mask)\n", " # first id is the background, so remove it\n", " obj_ids = obj_ids[1:]\n", "\n", " # split the color-encoded mask into a set\n", " # of binary masks\n", " masks = mask == obj_ids[:, None, None]\n", "\n", " # get bounding box coordinates for each mask\n", " num_objs = len(obj_ids)\n", " boxes = []\n", " for i in range(num_objs):\n", " pos = np.where(masks[i])\n", " xmin = np.min(pos[1])\n", " xmax = np.max(pos[1])\n", " ymin = np.min(pos[0])\n", " ymax = np.max(pos[0])\n", " boxes.append([xmin, ymin, xmax, ymax])\n", "\n", " boxes = torch.as_tensor(boxes, dtype=torch.float32)\n", " # there is only one class\n", " labels = torch.ones((num_objs,), dtype=torch.int64)\n", " masks = torch.as_tensor(masks, dtype=torch.uint8)\n", "\n", " image_id = torch.tensor([idx])\n", " area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])\n", " # suppose all instances are not crowd\n", " iscrowd = torch.zeros((num_objs,), dtype=torch.int64)\n", "\n", " target = {}\n", " target[\"boxes\"] = boxes\n", " target[\"labels\"] = labels\n", " target[\"masks\"] = masks\n", " target[\"image_id\"] = image_id\n", " target[\"area\"] = area\n", " target[\"iscrowd\"] = iscrowd\n", "\n", " if self.transforms is not None:\n", " img = self.transforms(img)\n", "\n", " return img, target\n", "\n", " def __len__(self):\n", " return len(self.imgs)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "5klC67mNTcuQ" }, "source": [ "def format_predictions_and_labels(img_ids, predictions, labels, label_map):\n", " data = {}\n", " \n", " for idx, img_id in enumerate(img_ids):\n", " prediction = predictions[idx]\n", " label = labels[idx]\n", " \n", " predicted_boxes = prediction[\"boxes\"].numpy().tolist()\n", " predicted_scores = prediction[\"scores\"].numpy().tolist()\n", " predicted_classes = prediction[\"labels\"].numpy().tolist()\n", "\n", " label_boxes = label[\"boxes\"].numpy().tolist()\n", " \n", " data.setdefault(img_id, []) \n", " for label_box in label_boxes:\n", " x, y, x2, y2 = label_box\n", " data[img_id].append({\n", " \"label\": \"ground-truth\",\n", " \"score\": 100, \n", " \"box\": {\"x\": x, \"y\": y, \"x2\": x2, \"y2\": y2},\n", " })\n", " \n", " for predicted_box, predicted_score, predicted_class in zip(predicted_boxes, predicted_scores, predicted_classes):\n", " x, y, x2, y2 = predicted_box\n", " data[img_id].append({\n", " \"label\": label_map[predicted_class - 1],\n", " \"box\": {\"x\": x, \"y\": y, \"x2\": x2, \"y2\": y2},\n", " \"score\": predicted_score * 100 \n", " })\n", "\n", " return data" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "xTSDHef3GkF4" }, "source": [ "dataset = PennFudanDataset('./PennFudanPed', transforms=preprocess)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Cwp2jyH9BH6i" }, "source": [ "model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Tj0JFR8q6e9s", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "15ede7be-f390-480a-e56f-29b9c8072218" }, "source": [ "!curl 'https://raw.githubusercontent.com/amikelive/coco-labels/master/coco-labels-paper.txt' -o coco-labels.txt" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r100 702 100 702 0 0 4943 0 --:--:-- --:--:-- --:--:-- 4978\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "c5GOzuML6jEc" }, "source": [ "label_map = {}\n", "file = open(\"./coco-labels.txt\", 'r') \n", "lines = file.readlines() \n", "for idx, line in enumerate(lines):\n", " label_map[idx] = line.replace('\\n', '')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "99lTJLfTBNB7" }, "source": [ "from torchvision import transforms\n", "model.eval()\n", "\n", "start_id = 100\n", "end_id = 110\n", "\n", "img_ids = [i for i in range(start_id, end_id)]\n", "labels = []\n", "predictions = []\n", "\n", "for img_id in img_ids:\n", " img, label = dataset[img_id]\n", " \n", " labels.append(label)\n", " with torch.no_grad():\n", " prediction = model([img])\n", " predictions.append(prediction[0])\n", "\n", " experiment.log_image(image_data=transforms.ToPILImage()(img), name=str(img_id))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "4aTxB2qcghMY" }, "source": [ "metadata = format_predictions_and_labels(img_ids, predictions, labels, label_map)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "U00wCqS4mE5M", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "05cd778b-9092-48c5-8eb7-ccf6da7d201c" }, "source": [ "# Log the annotation JSON:\n", "experiment.log_asset_data(metadata, \"image-metadata.json\")" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'api': 'https://www.comet.ml/api/rest/v2/experiment/asset/get-asset?assetId=1843c310ac2e4a698eb37d95c8cf11f7&experimentKey=d85481761aab443ea99d82a2d2e07b02',\n", " 'assetId': '1843c310ac2e4a698eb37d95c8cf11f7',\n", " 'web': 'https://www.comet.ml/api/asset/download?assetId=1843c310ac2e4a698eb37d95c8cf11f7&experimentKey=d85481761aab443ea99d82a2d2e07b02'}" ] }, "metadata": { "tags": [] }, "execution_count": 11 } ] }, { "cell_type": "code", "metadata": { "id": "u9vo2fWyoLlX" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }