{ "cells": [ { "cell_type": "markdown", "id": "2f4gei2uzYR_", "metadata": { "id": "2f4gei2uzYR_" }, "source": [ "# Analyzing Object Detection Dataset\n", "In this tutorial, we will analyze an object detection dataset with bounding boxes and identify potential issues." ] }, { "cell_type": "markdown", "id": "3c6dffd7", "metadata": { "id": "3c6dffd7" }, "source": [ "## Installation & Setting Up\n", "This notebook is written to be run on Google Colab. If you're running fastdup locally, view the installation instructions for your operating system here." ] }, { "cell_type": "code", "execution_count": 2, "id": "ab4d5ab5", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ab4d5ab5", "outputId": "b06a9af0-3270-4736-8a70-b726bc2f0ad6" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: pip in /usr/local/lib/python3.10/dist-packages (23.1.2)\n", "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: fastdup in /usr/local/lib/python3.10/dist-packages (1.2)\n", "Collecting fastdup\n", " Downloading fastdup-1.3-cp310-cp310-manylinux_2_31_x86_64.whl (75.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.1/75.1 MB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: plotly in /usr/local/lib/python3.10/dist-packages (5.13.1)\n", "Collecting plotly\n", " Downloading plotly-5.14.1-py2.py3-none-any.whl (15.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.3/15.3 MB\u001b[0m \u001b[31m68.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: gdown in /usr/local/lib/python3.10/dist-packages (4.6.6)\n", "Collecting gdown\n", " Downloading gdown-4.7.1-py3-none-any.whl (15 kB)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from fastdup) (1.22.4)\n", "Requirement already satisfied: opencv-python-headless in /usr/local/lib/python3.10/dist-packages (from fastdup) (4.7.0.72)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from fastdup) (23.1)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from fastdup) (1.5.3)\n", "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from fastdup) (8.4.0)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from fastdup) (6.0)\n", "Requirement already satisfied: requests==2.28.1 in /usr/local/lib/python3.10/dist-packages (from fastdup) (2.28.1)\n", "Requirement already satisfied: sentry-sdk in /usr/local/lib/python3.10/dist-packages (from fastdup) (1.23.1)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from fastdup) (4.65.0)\n", "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.10/dist-packages (from requests==2.28.1->fastdup) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests==2.28.1->fastdup) (3.4)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests==2.28.1->fastdup) (1.26.15)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests==2.28.1->fastdup) (2022.12.7)\n", "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly) (8.2.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from gdown) (3.12.0)\n", "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from gdown) (1.16.0)\n", "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown) (4.11.2)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown) (2.4.1)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->fastdup) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->fastdup) (2022.7.1)\n", "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests==2.28.1->fastdup) (1.7.1)\n", "Installing collected packages: plotly, gdown, fastdup\n", " Attempting uninstall: plotly\n", " Found existing installation: plotly 5.13.1\n", " Uninstalling plotly-5.13.1:\n", " Successfully uninstalled plotly-5.13.1\n", " Attempting uninstall: gdown\n", " Found existing installation: gdown 4.6.6\n", " Uninstalling gdown-4.6.6:\n", " Successfully uninstalled gdown-4.6.6\n", " Attempting uninstall: fastdup\n", " Found existing installation: fastdup 1.2\n", " Uninstalling fastdup-1.2:\n", " Successfully uninstalled fastdup-1.2\n", "Successfully installed fastdup-1.3 gdown-4.7.1 plotly-5.14.1\n" ] } ], "source": [ "!pip install -U fastdup plotly gdown" ] }, { "cell_type": "markdown", "id": "e2f993a2", "metadata": { "id": "e2f993a2" }, "source": [ "## Download Dataset\n", "We will be using the mini-coco dataset for this tutorial." ] }, { "cell_type": "code", "execution_count": 2, "id": "c2a8065c", "metadata": { "id": "c2a8065c", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "c09e4af9-f4ed-4bc9-8dba-283081babfff" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Downloading...\n", "From: https://drive.google.com/uc?id=1iSXVTlkV1_DhdYpVDqsjlT4NJFQ7OkyK\n", "To: /content/coco_minitrain_25k.zip\n", "100% 4.90G/4.90G [00:48<00:00, 100MB/s] \n", "Downloading...\n", "From: https://drive.google.com/uc?id=1i12p23cXlqp1QrXjAD_vu467r4q67Mq9\n", "To: /content/coco_minitrain_25k/annotations/coco_minitrain2017.csv\n", "100% 9.43M/9.43M [00:00<00:00, 11.7MB/s]\n" ] } ], "source": [ "# Download images from mini-coco\n", "!gdown --fuzzy https://drive.google.com/file/d/1iSXVTlkV1_DhdYpVDqsjlT4NJFQ7OkyK/view\n", "!unzip -qq coco_minitrain_25k.zip\n", "\n", "# Download csv annotations\n", "!cd coco_minitrain_25k/annotations && gdown --fuzzy https://drive.google.com/file/d/1i12p23cXlqp1QrXjAD_vu467r4q67Mq9/view" ] }, { "cell_type": "markdown", "id": "8391ef5e", "metadata": { "id": "8391ef5e" }, "source": [ "## Load annotations\n", "We will use a simple converter to convert the COCO format JSON annotaion file into the fastdup annotation dataframe. This converter is applicable to any dataset which uses COCO format." ] }, { "cell_type": "code", "execution_count": 3, "id": "99be6880", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "id": "99be6880", "outputId": "6517edac-9b73-4229-c44e-01c5aef3b41b" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'1.3'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 3 } ], "source": [ "import fastdup\n", "fastdup.__version__" ] }, { "cell_type": "code", "execution_count": 4, "id": "c5ab5207-601d-437a-a719-5d3a2bde5340", "metadata": { "id": "c5ab5207-601d-437a-a719-5d3a2bde5340" }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 5, "id": "989deea7", "metadata": { "id": "989deea7" }, "outputs": [], "source": [ "coco_csv = 'coco_minitrain_25k/annotations/coco_minitrain2017.csv'\n", "coco_annotations = pd.read_csv(coco_csv, header=None, names=['filename', 'col_x', 'row_y',\n", " 'width', 'height', 'label', 'ext'])\n", "\n", "coco_annotations['split'] = 'train' # Only train files were loaded\n", "coco_annotations['filename'] = coco_annotations['filename'].apply(lambda x: 'coco_minitrain_25k/images/train2017/'+x)\n", "coco_annotations = coco_annotations.drop_duplicates()\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "3a55e773", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "3a55e773", "outputId": "d9cb3bcf-38e4-4b03-ff34-c2fa984d1fcd" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " filename col_x row_y width height label ext split\n", "0 coco_minitrain_25k/images/train2017/000000131075.jpg 20.23 55.98 313.49 326.50 tv 0 train\n", "1 coco_minitrain_25k/images/train2017/000000131075.jpg 176.90 381.12 286.20 136.63 laptop 0 train\n", "2 coco_minitrain_25k/images/train2017/000000131075.jpg 369.96 361.35 72.76 73.91 laptop 0 train" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
filenamecol_xrow_ywidthheightlabelextsplit
0coco_minitrain_25k/images/train2017/000000131075.jpg20.2355.98313.49326.50tv0train
1coco_minitrain_25k/images/train2017/000000131075.jpg176.90381.12286.20136.63laptop0train
2coco_minitrain_25k/images/train2017/000000131075.jpg369.96361.3572.7673.91laptop0train
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 6 } ], "source": [ "coco_annotations.head(3)" ] }, { "cell_type": "markdown", "id": "1149696e", "metadata": { "id": "1149696e" }, "source": [ "## Run fastdup" ] }, { "cell_type": "code", "execution_count": 7, "id": "604e19f2", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "604e19f2", "outputId": "2c5cbb8e-3310-402a-82b9-497dd1897388", "scrolled": true }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n", "fastdup C++ info received: 2023-05-20 04:46:25 [INFO] Going to loop over dir /tmp/tmpaeboyuub.csv\n", "2023-05-20 04:46:26 [INFO] Found total 10000 images to run on, 10000 train, 0 test, name list 10000, counter 10000 \n", "2023-05-20 04:48:59 [ERROR] Error: found invalid bounding box for image coco_minitrain_25k/images/train2017/000000528201.jpg. Please check bounding box file 264 341 0 5\n", "Error: found invalid bounding box for image coco_minitrain_25k/images/train2017/000000528201.jpg. Please check bounding box file 264 341 0 5\n", " \n", "\n", "FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n", "fastdup C++ info received: 2023-05-20 04:50:46 [INFO] Going to loop over dir /tmp/crops_input.csv\n", "2023-05-20 04:50:46 [INFO] Found total 9999 images to run on, 9999 train, 0 test, name list 9999, counter 9999 \n", "2023-05-20 04:50:46 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:47 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:47 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:47 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:47 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:47 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:47 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:47 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:47 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - file does not existMissing file missing_file - file does not exist2023-05-20 04:50:48 [ERROR] Missing file missing_file - fil \n", "\n", "\n", " ########################################################################################\n", "\n", "Dataset Analysis Summary: \n", "\n", " Dataset contains 183544 images\n", " Valid images are 4.94% (9,067) of the data, invalid are 95.06% (174,477) of the data\n", " For a detailed analysis, use `.invalid_instances()`.\n", "\n", " Similarity: 0.26% (476) belong to 5 similarity clusters (components).\n", " 99.74% (183,068) images do not belong to any similarity cluster.\n", " Largest cluster has 1,940 (1.06%) images.\n", " For a detailed analysis, use `.connected_components()`\n", "(similarity threshold used is 0.9, connected component threshold used is 0.96).\n", "\n", " Outliers: 0.67% (1,228) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n", " For a detailed list of outliers, use `.outliers()`.\n" ] } ], "source": [ "# Run fastdup with annotations\n", "# This may take a while on a colab node with 2 cores..\n", "input_dir = '.'\n", "work_dir = 'fastdup_minicoco'\n", "\n", "fd = fastdup.create(work_dir=work_dir, input_dir=input_dir)\n", "fd.run(annotations=coco_annotations, overwrite=True, num_images=10000)" ] }, { "cell_type": "markdown", "id": "3b4f5823", "metadata": { "id": "3b4f5823" }, "source": [ "## Class distribution\n", "The dataset contains 25k images and 183k objects, an average of 7.3 objects per image. \n", "\n", "Interestingly, we see a highly unbalanced class distribution, where all 80 coco classes are present here, but there is a strong balance towards the person class, that accounts for over 56k instances (30.6%). Car and Chair classes also contain over 8k instances each, while at the bottom of the list the toaster and hair drier classes contain as few as 40 instances. \n", "\n", "Using `Plotly` we get a useful interactive histogram. " ] }, { "cell_type": "code", "execution_count": 8, "id": "f87b7057", "metadata": { "id": "f87b7057", "outputId": "fd417b92-da68-4e00-982a-2f44f780b9e9", "colab": { "base_uri": "https://localhost:8080/", "height": 542 } }, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", "\n", "\n", "
\n", "
\n", "\n", "" ] }, "metadata": {} } ], "source": [ "import plotly.express as px\n", "fig = px.histogram(coco_annotations, x=\"label\")\n", "fig.show()" ] }, { "cell_type": "markdown", "id": "953af7a0", "metadata": { "id": "953af7a0" }, "source": [ "*italicized text*## Component gallery" ] }, { "cell_type": "code", "execution_count": 16, "id": "2e3bff96", "metadata": { "id": "2e3bff96", "scrolled": true, "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "f2c620cf-06ca-49c3-d37b-5bcc280eb89e" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "laptop\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "100%|██████████| 20/20 [00:00<00:00, 25.52it/s]\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Finished OK. Components are stored as image files fastdup_minicoco/galleries/components_[index].jpg\n", "Stored components visual view in fastdup_minicoco/galleries/components.html\n", "Execution time in seconds 3.1\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Components Report\n", " \n", " \n", "\n", "\n", "\n", "
\n", "
\n", "
\n", " \n", " \"logo\"\n", " \n", "
\n", " \n", "
\n", "
\n", "
\n", "

Components Report

, Sorted by size descending

\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7112
num_images2
mean_distance1.0
size374544.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
dining table1
pizza1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component9578
num_images2
mean_distance0.9889
size325440.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bed1
dog1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component2495
num_images2
mean_distance1.0
size307200.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bus1
truck1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component6395
num_images2
mean_distance1.0
size307200.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
person1
sandwich1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component8156
num_images2
mean_distance0.9753
size305280.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
couch1
dog1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component526
num_images2
mean_distance0.9831
size304320.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bottle1
refrigerator1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component1546
num_images2
mean_distance0.9771
size300000.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
chair1
person1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4038
num_images2
mean_distance0.9706
size300000.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
dining table1
pizza1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4852
num_images2
mean_distance0.9692
size292031.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
person1
umbrella1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component891
num_images2
mean_distance0.9853
size283200.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
hot dog1
sandwich1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component1245
num_images2
mean_distance0.9906
size279680.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
cat1
sink1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4244
num_images2
mean_distance1.0
size279680.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
dining table1
pizza1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7021
num_images2
mean_distance1.0
size274560.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bowl1
dining table1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component1358
num_images2
mean_distance0.9716
size273520.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
orange1
person1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component8428
num_images2
mean_distance1.0
size273280.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
dining table1
knife1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7228
num_images2
mean_distance1.0
size273280.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bed1
person1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4633
num_images2
mean_distance0.9762
size270400.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
laptop1
person1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component6632
num_images2
mean_distance1.0
size267520.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
person1
skis1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component5727
num_images2
mean_distance0.9827
size266240.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
cake1
dining table1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4745
num_images2
mean_distance0.9735
size264718.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bed1
dog1
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", " \n", "
\n", " \n", " " ] }, "metadata": {} } ], "source": [ "# sorting by largest objects\n", "fd.vis.component_gallery(metric='size', max_width=900)" ] }, { "cell_type": "markdown", "id": "b0de3e2b", "metadata": { "id": "b0de3e2b" }, "source": [ "## Outliers" ] }, { "cell_type": "code", "execution_count": 19, "id": "b656ac35", "metadata": { "id": "b656ac35", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "d126a3ae-d357-49fc-b51d-2d67b93a5bfe" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "100%|██████████| 20/20 [00:00<00:00, 3189.46it/s]\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Stored outliers visual view in fastdup_minicoco/galleries/outliers.html\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Outliers Report\n", " \n", " \n", "\n", "\n", "\n", "
\n", "
\n", "
\n", " \n", " \"logo\"\n", " \n", "
\n", " \n", "
\n", "
\n", "
\n", "

Outliers Report

Showing image outliers, one per row

\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.578825
Path/crops/coco_minitrain_25kimagestrain2017000000527858jpg_153_82_301_343jpg
labelperson
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.59066
Path/crops/coco_minitrain_25kimagestrain2017000000109532jpg_545_313_61_79jpg
labelchair
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.5913
Path/crops/coco_minitrain_25kimagestrain2017000000394197jpg_0_337_69_106jpg
labelcouch
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.591662
Path/crops/coco_minitrain_25kimagestrain2017000000525646jpg_513_365_126_59jpg
labeldining table
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.592576
Path/crops/coco_minitrain_25kimagestrain2017000000132262jpg_1_1_152_257jpg
labelparking meter
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.593357
Path/crops/coco_minitrain_25kimagestrain2017000000530289jpg_113_313_82_57jpg
labelclock
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.600772
Path/crops/coco_minitrain_25kimagestrain2017000000137420jpg_269_19_219_108jpg
labelpotted plant
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.606285
Path/crops/coco_minitrain_25kimagestrain2017000000524476jpg_0_1_201_140jpg
labelbed
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.607465
Path/crops/coco_minitrain_25kimagestrain2017000000526057jpg_1_407_76_72jpg
labelbench
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.61094
Path/crops/coco_minitrain_25kimagestrain2017000000398571jpg_241_134_110_52jpg
labelkite
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.61095
Path/crops/coco_minitrain_25kimagestrain2017000000328069jpg_0_2_639_417jpg
labelcat
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.61257
Path/crops/coco_minitrain_25kimagestrain2017000000131330jpg_137_156_210_423jpg
labelpotted plant
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.619667
Path/crops/coco_minitrain_25kimagestrain2017000000004576jpg_256_373_136_50jpg
labelbicycle
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.620645
Path/crops/coco_minitrain_25kimagestrain2017000000527529jpg_164_71_242_298jpg
labelhandbag
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.620674
Path/crops/coco_minitrain_25kimagestrain2017000000132816jpg_0_481_426_158jpg
labeldining table
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.622403
Path/crops/coco_minitrain_25kimagestrain2017000000395283jpg_186_107_52_81jpg
labelkite
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.624153
Path/crops/coco_minitrain_25kimagestrain2017000000262786jpg_170_18_92_29jpg
labelsink
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.624903
Path/crops/coco_minitrain_25kimagestrain2017000000135878jpg_290_159_347_178jpg
labelbird
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.625989
Path/crops/coco_minitrain_25kimagestrain2017000000268071jpg_2_26_589_426jpg
labelelephant
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.626685
Path/crops/coco_minitrain_25kimagestrain2017000000525675jpg_342_61_58_52jpg
labelchair
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", " \n", "
\n", " \n", " " ] }, "metadata": {} } ], "source": [ "# visualize outliers\n", "fd.vis.outliers_gallery()" ] }, { "cell_type": "markdown", "id": "c0f1fade", "metadata": { "id": "c0f1fade" }, "source": [ "## Size and shape issues\n", "Objects come in various shapes and sizes, and sometimes objects might be incorrectly labeled or too small to be useful. We will now find the smallest, narrowest and widest objects, and asses their usefulness. " ] }, { "cell_type": "code", "execution_count": 22, "id": "a2d00424", "metadata": { "id": "a2d00424", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ba812fe8-0f9f-4e14-ad38-96b4bb751ac9" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " col_x_x row_y_x width_x height_x label ext split index filename crop_filename col_x_y row_y_y width_y height_y error_code is_valid fd_index\n", "0 20.23 55.98 313.49 326.50 tv 0 train 0 coco_minitrain_25k/images/train2017/000000131075.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000131075.jpg_20_55_313_326.jpg NaN NaN NaN NaN VALID True 0\n", "1 176.90 381.12 286.20 136.63 laptop 0 train 1 coco_minitrain_25k/images/train2017/000000131075.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000131075.jpg_176_381_286_136.jpg NaN NaN NaN NaN VALID True 1\n", "2 369.96 361.35 72.76 73.91 laptop 0 train 2 coco_minitrain_25k/images/train2017/000000131075.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000131075.jpg_369_361_72_73.jpg NaN NaN NaN NaN VALID True 2\n", "3 411.68 417.87 66.32 129.44 chair 0 train 3 coco_minitrain_25k/images/train2017/000000131075.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000131075.jpg_411_417_66_129.jpg NaN NaN NaN NaN VALID True 3\n", "4 367.31 363.25 72.27 67.01 tv 0 train 4 coco_minitrain_25k/images/train2017/000000131075.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000131075.jpg_367_363_72_67.jpg NaN NaN NaN NaN VALID True 4\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ ":3: SettingWithCopyWarning:\n", "\n", "\n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "\n", ":4: SettingWithCopyWarning:\n", "\n", "\n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "\n" ] } ], "source": [ "annot = fd.annotations()\n", "print(annot.head())\n", "annot['area'] = annot['width_x'] * annot['height_x']\n", "annot['aspect'] = annot['width_x'] / annot['height_x']" ] }, { "cell_type": "code", "execution_count": 23, "id": "3298e003", "metadata": { "id": "3298e003" }, "outputs": [], "source": [ "# Smallest 5% of objects:\n", "smallest_objects = annot[annot['area'] < annot['area'].quantile(0.05)].sort_values(by=['area'])\n", "\n", "# 5% of extreme aspect ratios\n", "aspect_ratio_objects = annot[(annot['aspect'] < annot['aspect'].quantile(0.05))\n", " | (annot['aspect'] > annot['aspect'].quantile(0.95))].sort_values(by=['aspect'])\n" ] }, { "cell_type": "code", "execution_count": 24, "id": "a4470f45", "metadata": { "id": "a4470f45", "outputId": "51e772c2-9306-4510-defb-71f21a98757a", "colab": { "base_uri": "https://localhost:8080/", "height": 207 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col_x_x row_y_x width_x height_x label ext split index filename crop_filename col_x_y row_y_y width_y height_y error_code is_valid fd_index area aspect\n", "7882 510.70 100.68 10.13 10.13 cup 0 train 7882 coco_minitrain_25k/images/train2017/000000267216.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000267216.jpg_510_100_10_10.jpg NaN NaN NaN NaN VALID True 7882 102.6169 1.000000\n", "3856 203.70 339.19 10.51 10.01 car 0 train 3856 coco_minitrain_25k/images/train2017/000000002529.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000002529.jpg_203_339_10_10.jpg NaN NaN NaN NaN VALID True 3856 105.2051 1.049950\n", "1003 511.08 171.01 10.44 10.12 person 0 train 1003 coco_minitrain_25k/images/train2017/000000393978.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000393978.jpg_511_171_10_10.jpg NaN NaN NaN NaN VALID True 1003 105.6528 1.031621" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col_x_xrow_y_xwidth_xheight_xlabelextsplitindexfilenamecrop_filenamecol_x_yrow_y_ywidth_yheight_yerror_codeis_validfd_indexareaaspect
7882510.70100.6810.1310.13cup0train7882coco_minitrain_25k/images/train2017/000000267216.jpgfastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000267216.jpg_510_100_10_10.jpgNaNNaNNaNNaNVALIDTrue7882102.61691.000000
3856203.70339.1910.5110.01car0train3856coco_minitrain_25k/images/train2017/000000002529.jpgfastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000002529.jpg_203_339_10_10.jpgNaNNaNNaNNaNVALIDTrue3856105.20511.049950
1003511.08171.0110.4410.12person0train1003coco_minitrain_25k/images/train2017/000000393978.jpgfastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000393978.jpg_511_171_10_10.jpgNaNNaNNaNNaNVALIDTrue1003105.65281.031621
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 24 } ], "source": [ "# let's see the smallest objects\n", "smallest_objects.head(3)" ] }, { "cell_type": "code", "execution_count": 25, "id": "6d634d18", "metadata": { "id": "6d634d18", "outputId": "00b680f2-f783-41b6-8232-e99c749d019d", "colab": { "base_uri": "https://localhost:8080/", "height": 207 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col_x_x row_y_x width_x height_x label ext split index filename crop_filename col_x_y row_y_y width_y height_y error_code is_valid fd_index area aspect\n", "3642 1.92 136.50 11.51 263.87 person 0 train 3642 coco_minitrain_25k/images/train2017/000000002444.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000002444.jpg_1_136_11_263.jpg NaN NaN NaN NaN VALID True 3642 3037.1437 0.043620\n", "8332 626.64 135.36 13.15 159.64 umbrella 0 train 8332 coco_minitrain_25k/images/train2017/000000136415.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000136415.jpg_626_135_13_159.jpg NaN NaN NaN NaN VALID True 8332 2099.2660 0.082373\n", "3591 0.00 133.02 19.37 219.55 person 0 train 3591 coco_minitrain_25k/images/train2017/000000002415.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000002415.jpg_0_133_19_219.jpg NaN NaN NaN NaN VALID True 3591 4252.6835 0.088226" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col_x_xrow_y_xwidth_xheight_xlabelextsplitindexfilenamecrop_filenamecol_x_yrow_y_ywidth_yheight_yerror_codeis_validfd_indexareaaspect
36421.92136.5011.51263.87person0train3642coco_minitrain_25k/images/train2017/000000002444.jpgfastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000002444.jpg_1_136_11_263.jpgNaNNaNNaNNaNVALIDTrue36423037.14370.043620
8332626.64135.3613.15159.64umbrella0train8332coco_minitrain_25k/images/train2017/000000136415.jpgfastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000136415.jpg_626_135_13_159.jpgNaNNaNNaNNaNVALIDTrue83322099.26600.082373
35910.00133.0219.37219.55person0train3591coco_minitrain_25k/images/train2017/000000002415.jpgfastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000002415.jpg_0_133_19_219.jpgNaNNaNNaNNaNVALIDTrue35914252.68350.088226
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 25 } ], "source": [ "aspect_ratio_objects.head(3)" ] }, { "cell_type": "code", "execution_count": 26, "id": "fde725d7", "metadata": { "id": "fde725d7", "outputId": "febb9c13-29a4-4653-d982-74b280c7aa62", "colab": { "base_uri": "https://localhost:8080/", "height": 207 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col_x_x row_y_x width_x height_x label ext split index filename crop_filename col_x_y row_y_y width_y height_y error_code is_valid fd_index area aspect\n", "6006 89.05 212.44 486.91 24.63 train 0 train 6006 coco_minitrain_25k/images/train2017/000000397173.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000397173.jpg_89_212_486_24.jpg NaN NaN NaN NaN VALID True 6006 11992.5933 19.768981\n", "2021 221.00 180.00 305.00 15.00 car 0 train 2021 coco_minitrain_25k/images/train2017/000000001408.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000001408.jpg_221_180_305_15.jpg NaN NaN NaN NaN VALID True 2021 4575.0000 20.333333\n", "4261 33.00 216.00 602.00 18.00 boat 0 train 4261 coco_minitrain_25k/images/train2017/000000527098.jpg fastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000527098.jpg_33_216_602_18.jpg NaN NaN NaN NaN VALID True 4261 10836.0000 33.444444" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col_x_xrow_y_xwidth_xheight_xlabelextsplitindexfilenamecrop_filenamecol_x_yrow_y_ywidth_yheight_yerror_codeis_validfd_indexareaaspect
600689.05212.44486.9124.63train0train6006coco_minitrain_25k/images/train2017/000000397173.jpgfastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000397173.jpg_89_212_486_24.jpgNaNNaNNaNNaNVALIDTrue600611992.593319.768981
2021221.00180.00305.0015.00car0train2021coco_minitrain_25k/images/train2017/000000001408.jpgfastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000001408.jpg_221_180_305_15.jpgNaNNaNNaNNaNVALIDTrue20214575.000020.333333
426133.00216.00602.0018.00boat0train4261coco_minitrain_25k/images/train2017/000000527098.jpgfastdup_minicoco/crops/coco_minitrain_25kimagestrain2017000000527098.jpg_33_216_602_18.jpgNaNNaNNaNNaNVALIDTrue426110836.000033.444444
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 26 } ], "source": [ "aspect_ratio_objects.tail(3)" ] }, { "cell_type": "markdown", "id": "9af6979b", "metadata": { "id": "9af6979b" }, "source": [ "Look at that! The slices reveal many items that are either tiny (10x10 pixels) or have extreme aspect ratios - as extreme at 1:45 - an object 601 pixels wide by only 13 pixels high. " ] }, { "cell_type": "markdown", "id": "5f4d7cc1", "metadata": { "id": "5f4d7cc1" }, "source": [ "## Objects that didn't make the cut:\n", "Let's look at objects deemed invalid by fastdup. These are either objects that are too small to be useful in our analysis (smaller than 10px), have bouding boxes with illeagal values (negative or beyond image boundaries), or are part of images that are missing. We can tell which is which by the `error_code` column in our dataframe." ] }, { "cell_type": "code", "execution_count": 27, "id": "6b030732", "metadata": { "id": "6b030732", "outputId": "04f80f66-aaeb-4563-e428-87d5dbcb818c", "colab": { "base_uri": "https://localhost:8080/", "height": 187 } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " col_x_x row_y_x width_x height_x label ext split index filename crop_filename col_x_y row_y_y width_y height_y error_code is_valid fd_index\n", "0 437.17 244.79 19.52 9.93 mouse 0 train 16 NaN NaN NaN NaN NaN NaN ERROR_BAD_BOUNDING_BOX False 16\n", "1 137.84 332.22 8.92 11.50 person 0 train 60 NaN NaN NaN NaN NaN NaN ERROR_BAD_BOUNDING_BOX False 60\n", "2 177.35 294.13 5.32 11.92 person 0 train 65 NaN NaN NaN NaN NaN NaN ERROR_BAD_BOUNDING_BOX False 65" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
col_x_xrow_y_xwidth_xheight_xlabelextsplitindexfilenamecrop_filenamecol_x_yrow_y_ywidth_yheight_yerror_codeis_validfd_index
0437.17244.7919.529.93mouse0train16NaNNaNNaNNaNNaNNaNERROR_BAD_BOUNDING_BOXFalse16
1137.84332.228.9211.50person0train60NaNNaNNaNNaNNaNNaNERROR_BAD_BOUNDING_BOXFalse60
2177.35294.135.3211.92person0train65NaNNaNNaNNaNNaNNaNERROR_BAD_BOUNDING_BOXFalse65
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 27 } ], "source": [ "fd.invalid_instances().head(3)" ] }, { "cell_type": "markdown", "id": "6d1196e3", "metadata": { "id": "6d1196e3" }, "source": [ "## Distribution of error codes:\n", "A simple `value_counts` will tell us the distribution of the errors. We have found 18,592 (!) bounding boxes that are either too small or go beyond image boundaries. This is 10% of the data! Filtering them would both save us grusome debugging of training errors and failures and help up provide the model with useful size objects. " ] }, { "cell_type": "code", "execution_count": 28, "id": "3d5350cf", "metadata": { "id": "3d5350cf", "outputId": "5b8b41f4-3227-4ed5-aaba-5624f4c3f433", "colab": { "base_uri": "https://localhost:8080/" } }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "ERROR_MISSING_FILE 173544\n", "ERROR_BAD_BOUNDING_BOX 933\n", "Name: error_code, dtype: int64" ] }, "metadata": {}, "execution_count": 28 } ], "source": [ "fd.invalid_instances()['error_code'].value_counts()" ] }, { "cell_type": "markdown", "id": "39e4ee9b", "metadata": { "id": "39e4ee9b" }, "source": [ "## Find possible mislabels\n", "The fastdup similarity search and gallery is a strong tool for finding objects that are possibly mislabeled. By finding each object's nearest neighbors and their classes, we can find objects with classes contradicting their neighbors' - a strong sign for mislabels." ] }, { "cell_type": "code", "execution_count": 29, "id": "f5dea401", "metadata": { "id": "f5dea401", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "50d8eb14-1cbd-4ae4-a0aa-fcbd360936dc" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "laptop\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "100%|██████████| 25/25 [00:00<00:00, 77.16it/s]\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Finished OK. Components are stored as image files fastdup_minicoco/galleries/components_[index].jpg\n", "Stored components visual view in fastdup_minicoco/galleries/components.html\n", "Execution time in seconds 1.9\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Components Report\n", " \n", " \n", "\n", "\n", "\n", "
\n", "
\n", "
\n", " \n", " \"logo\"\n", " \n", "
\n", " \n", "
\n", "
\n", "
\n", "

Components Report

Showing groups of similar images, from different classes

\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4244
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
dining table1
pizza1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7021
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bowl1
dining table1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component5016
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
couch1
dog1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component8428
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
dining table1
knife1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component2495
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bus1
truck1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component6210
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
chair1
couch1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component6395
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
person1
sandwich1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component6632
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
person1
skis1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7228
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bed1
person1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component3191
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
remote1
wine glass1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7112
num_images2
mean_distance1.0
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
dining table1
pizza1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4647
num_images2
mean_distance0.9908
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
fork1
spoon1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component1245
num_images2
mean_distance0.9906
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
cat1
sink1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component9578
num_images2
mean_distance0.9889
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bed1
dog1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7706
num_images2
mean_distance0.987
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
cow1
horse1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7129
num_images2
mean_distance0.9863
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
hot dog1
sandwich1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7399
num_images2
mean_distance0.986
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
cat1
tv1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component891
num_images2
mean_distance0.9853
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
hot dog1
sandwich1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component8309
num_images2
mean_distance0.9846
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
hot dog1
sandwich1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component526
num_images2
mean_distance0.9831
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bottle1
refrigerator1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component348
num_images2
mean_distance0.9829
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bench1
dining table1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component5727
num_images2
mean_distance0.9827
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
cake1
dining table1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component6040
num_images2
mean_distance0.9817
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bowl1
cup1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4834
num_images2
mean_distance0.9794
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
apple1
orange1
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component1041
num_images2
mean_distance0.9794
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Label
bed1
couch1
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", " \n", "
\n", " \n", " " ] }, "metadata": {} } ], "source": [ "fd.vis.component_gallery(num_images=25, slice='diff')" ] }, { "cell_type": "code", "source": [], "metadata": { "id": "HERGxWSMSDh0" }, "id": "HERGxWSMSDh0", "execution_count": 21, "outputs": [] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" }, "vscode": { "interpreter": { "hash": "5b6e8fba36db23bc4d54e0302cd75fdd75c29d9edcbab68d6cfc74e7e4b30305" } } }, "nbformat": 4, "nbformat_minor": 5 }