{ "cells": [ { "cell_type": "markdown", "id": "SwSYWR4vzk_e", "metadata": { "id": "SwSYWR4vzk_e", "tags": [] }, "source": [ "# Analysing Image Classification Dataset" ] }, { "cell_type": "markdown", "id": "bbed0117-e8d1-4df6-b8b7-7bcce10b8655", "metadata": { "tags": [] }, "source": [ "## Installation & Setting Up" ] }, { "cell_type": "code", "execution_count": null, "id": "506e82b4-a1c2-4262-a326-d0924bb018b6", "metadata": { "id": "506e82b4-a1c2-4262-a326-d0924bb018b6" }, "outputs": [], "source": [ "!pip install pip -U\n", "!pip install fastdup" ] }, { "cell_type": "markdown", "id": "8a79fb1b-b089-4d4d-8fa8-3e2b2ef7f886", "metadata": { "id": "8a79fb1b-b089-4d4d-8fa8-3e2b2ef7f886", "tags": [] }, "source": [ "## Download Imagenette Dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "be5b7ca5-34f5-4a0f-b081-2e78be6a425a", "metadata": {}, "outputs": [], "source": [ "!wget https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz\n", "!tar -xf imagenette2-160.tgz" ] }, { "cell_type": "markdown", "id": "f01586fe-db75-4154-aa15-9ea2709c9461", "metadata": { "id": "f01586fe-db75-4154-aa15-9ea2709c9461" }, "source": [ "## Load and Format Annotations" ] }, { "cell_type": "code", "execution_count": 1, "id": "ff90fe31-7c39-46c5-8c58-3ae349fbcc91", "metadata": { "executionInfo": { "elapsed": 949, "status": "ok", "timestamp": 1677666765166, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "ff90fe31-7c39-46c5-8c58-3ae349fbcc91", "tags": [] }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "21d2474d-3fa5-4148-a0f1-ea8d55d63b85", "metadata": { "executionInfo": { "elapsed": 2, "status": "ok", "timestamp": 1677666768281, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "21d2474d-3fa5-4148-a0f1-ea8d55d63b85", "tags": [] }, "outputs": [], "source": [ "data_dir = 'imagenette2-160/'\n", "csv_path = 'imagenette2-160/noisy_imagenette.csv'" ] }, { "cell_type": "code", "execution_count": 3, "id": "2cb91ccb-9cb6-42ba-9489-96182eccc583", "metadata": { "executionInfo": { "elapsed": 2, "status": "ok", "timestamp": 1677666769859, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "2cb91ccb-9cb6-42ba-9489-96182eccc583", "tags": [] }, "outputs": [], "source": [ "label_map = {\n", " 'n02979186': 'cassette_player', \n", " 'n03417042': 'garbage_truck', \n", " 'n01440764': 'tench', \n", " 'n02102040': 'English_springer', \n", " 'n03028079': 'church',\n", " 'n03888257': 'parachute', \n", " 'n03394916': 'French_horn', \n", " 'n03000684': 'chain_saw', \n", " 'n03445777': 'golf_ball', \n", " 'n03425413': 'gas_pump'\n", "}" ] }, { "cell_type": "markdown", "id": "8aba34e1", "metadata": {}, "source": [ "Load the annotation provided with the dataset." ] }, { "cell_type": "code", "execution_count": 4, "id": "e2e90600-b02d-4a2a-a348-7b67157f9129", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "executionInfo": { "elapsed": 2, "status": "ok", "timestamp": 1677666769859, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "e2e90600-b02d-4a2a-a348-7b67157f9129", "outputId": "f9f72c0d-f613-4aac-d29c-3646b2301dcb", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pathnoisy_labels_0noisy_labels_1noisy_labels_5noisy_labels_25noisy_labels_50is_valid
0train/n02979186/n02979186_9036.JPEGn02979186n02979186n02979186n02979186n02979186False
1train/n02979186/n02979186_11957.JPEGn02979186n02979186n02979186n02979186n03000684False
2train/n02979186/n02979186_9715.JPEGn02979186n02979186n02979186n03417042n03000684False
\n", "
" ], "text/plain": [ " path noisy_labels_0 noisy_labels_1 \n", "0 train/n02979186/n02979186_9036.JPEG n02979186 n02979186 \\\n", "1 train/n02979186/n02979186_11957.JPEG n02979186 n02979186 \n", "2 train/n02979186/n02979186_9715.JPEG n02979186 n02979186 \n", "\n", " noisy_labels_5 noisy_labels_25 noisy_labels_50 is_valid \n", "0 n02979186 n02979186 n02979186 False \n", "1 n02979186 n02979186 n03000684 False \n", "2 n02979186 n03417042 n03000684 False " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_annot = pd.read_csv(csv_path)\n", "df_annot.head(3)" ] }, { "cell_type": "markdown", "id": "dfc957bf", "metadata": {}, "source": [ "Transform the annotation to fastdup supported format." ] }, { "cell_type": "code", "execution_count": 5, "id": "473185d1-89f5-4746-b87b-f2b3ef7c445b", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "executionInfo": { "elapsed": 1012, "status": "ok", "timestamp": 1677666771201, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "473185d1-89f5-4746-b87b-f2b3ef7c445b", "outputId": "c09c986d-bcef-4545-8ceb-ee5196b40ee6", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
filenamelabelsplit
0imagenette2-160/train/n02979186/n02979186_9036...cassette_playerimagenette2-160
1imagenette2-160/train/n02979186/n02979186_1195...cassette_playerimagenette2-160
2imagenette2-160/train/n02979186/n02979186_9715...cassette_playerimagenette2-160
3imagenette2-160/train/n02979186/n02979186_2173...cassette_playerimagenette2-160
4imagenette2-160/train/n02979186/ILSVRC2012_val...cassette_playerimagenette2-160
............
13389imagenette2-160/val/n03425413/n03425413_17521....gas_pumpimagenette2-160
13390imagenette2-160/val/n03425413/n03425413_20711....gas_pumpimagenette2-160
13391imagenette2-160/val/n03425413/n03425413_19050....gas_pumpimagenette2-160
13392imagenette2-160/val/n03425413/n03425413_13831....gas_pumpimagenette2-160
13393imagenette2-160/val/n03425413/n03425413_1242.JPEGgas_pumpimagenette2-160
\n", "

13394 rows × 3 columns

\n", "
" ], "text/plain": [ " filename label \n", "0 imagenette2-160/train/n02979186/n02979186_9036... cassette_player \\\n", "1 imagenette2-160/train/n02979186/n02979186_1195... cassette_player \n", "2 imagenette2-160/train/n02979186/n02979186_9715... cassette_player \n", "3 imagenette2-160/train/n02979186/n02979186_2173... cassette_player \n", "4 imagenette2-160/train/n02979186/ILSVRC2012_val... cassette_player \n", "... ... ... \n", "13389 imagenette2-160/val/n03425413/n03425413_17521.... gas_pump \n", "13390 imagenette2-160/val/n03425413/n03425413_20711.... gas_pump \n", "13391 imagenette2-160/val/n03425413/n03425413_19050.... gas_pump \n", "13392 imagenette2-160/val/n03425413/n03425413_13831.... gas_pump \n", "13393 imagenette2-160/val/n03425413/n03425413_1242.JPEG gas_pump \n", "\n", " split \n", "0 imagenette2-160 \n", "1 imagenette2-160 \n", "2 imagenette2-160 \n", "3 imagenette2-160 \n", "4 imagenette2-160 \n", "... ... \n", "13389 imagenette2-160 \n", "13390 imagenette2-160 \n", "13391 imagenette2-160 \n", "13392 imagenette2-160 \n", "13393 imagenette2-160 \n", "\n", "[13394 rows x 3 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# take relevant columns\n", "df_annot = df_annot[['path', 'noisy_labels_0']]\n", "\n", "# rename columns to fastdup's column names\n", "df_annot = df_annot.rename({'noisy_labels_0': 'label', 'path': 'filename'}, axis='columns')\n", "\n", "# append datadir\n", "df_annot['filename'] = df_annot['filename'].apply(lambda x: data_dir + x)\n", "\n", "# create split column\n", "df_annot['split'] = df_annot['filename'].apply(lambda x: x.split(\"/\")[0])\n", "\n", "# map label ids to regular labels\n", "df_annot['label'] = df_annot['label'].map(label_map)\n", "\n", "# show formated annotations\n", "df_annot" ] }, { "cell_type": "markdown", "id": "0c648ed1-5016-4230-9873-546eb510b764", "metadata": { "id": "0c648ed1-5016-4230-9873-546eb510b764" }, "source": [ "## Import & Run fastdup\n", "\n", "In this example we run fastdup by providing the annotations." ] }, { "cell_type": "code", "execution_count": 6, "id": "7f69d8b2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'0.922'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import fastdup\n", "fastdup.__version__" ] }, { "cell_type": "code", "execution_count": 6, "id": "92a6e2f9-e60c-44c0-b48a-f7413f7594ae", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n", "2023-03-20 17:57:26 [INFO] Going to loop over dir imagenette2-160\n", "2023-03-20 17:57:26 [INFO] Found total 13394 images to run on\n", "2023-03-20 17:57:54 [INFO] Found total 13394 images to run onimated: 0 Minutes 0 Features\n", "2023-03-20 17:57:55 [INFO] 1657) Finished write_index() NN model\n", "2023-03-20 17:57:55 [INFO] Stored nn model index file fastdup_imagenette/nnf.index\n", "2023-03-20 17:57:56 [INFO] Total time took 30624 ms\n", "2023-03-20 17:57:56 [INFO] Found a total of 0 fully identical images (d>0.990), which are 0.00 %\n", "2023-03-20 17:57:56 [INFO] Found a total of 0 nearly identical images(d>0.980), which are 0.00 %\n", "2023-03-20 17:57:56 [INFO] Found a total of 16741 above threshold images (d>0.800), which are 41.66 %\n", "2023-03-20 17:57:56 [INFO] Found a total of 1339 outlier images (d<0.050), which are 3.33 %\n", "2023-03-20 17:57:56 [INFO] Min distance found 0.470 max distance 0.969\n", "2023-03-20 17:57:56 [INFO] Running connected components for ccthreshold 0.900000 \n", ".0\n", " ########################################################################################\n", "\n", "Dataset Analysis Summary: \n", "\n", " Dataset contains 13394 images\n", " Valid images are 100.00% (13,394) of the data, invalid are 0.00% (0) of the data\n", " Similarity: 2.73% (366) belong to 20 similarity clusters (components).\n", " 97.27% (13,028) images do not belong to any similarity cluster.\n", " Largest cluster has 40 (0.30%) images.\n", " For a detailed analysis, use `.connected_components()`\n", "(similarity threshold used is 0.8, connected component threshold used is 0.9).\n", "\n", " Outliers: 6.21% (832) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n", " For a detailed list of outliers, use `.outliers()`.\n" ] } ], "source": [ "work_dir = 'fastdup_imagenette'\n", "\n", "fd = fastdup.create(work_dir=work_dir, input_dir=data_dir) \n", "fd.run(annotations=df_annot, ccthreshold=0.9, threshold=0.8)" ] }, { "cell_type": "markdown", "id": "62e35a12-fadd-4b3f-bcab-69e6e67862a4", "metadata": {}, "source": [ "## Outliers\n", "\n", "Visualize outliers from the dataset." ] }, { "cell_type": "code", "execution_count": 7, "id": "b39ec702-3ea1-4afe-a948-f026ba8fcb47", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "executionInfo": { "elapsed": 2658, "status": "ok", "timestamp": 1677667336302, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "b39ec702-3ea1-4afe-a948-f026ba8fcb47", "outputId": "caa992d2-5267-408c-b44a-3a4a66e1ab5f", "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 29767.95it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Stored outliers visual view in fastdup_imagenette/galleries/outliers.html\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Outliers Report\n", "

Showing image outliers, one per row


\n", " \n", "\n", "\n", "\n", "
\n", "
\n", "
\n", " \n", " \"logo\"\n", " \n", "
\n", " \n", "
\n", "
\n", "
\n", "

Outliers Report

\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.469904
Pathval/n03417042/n03417042_29412.JPEG
labelgarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.476124
Pathtrain/n02979186/n02979186_3967.JPEG
labelcassette_player
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.47929
Pathval/n03417042/n03417042_91.JPEG
labelgarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.48977
Pathval/n03417042/n03417042_7422.JPEG
labelgarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.505358
Pathtrain/n03417042/n03417042_15485.JPEG
labelgarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.510293
Pathtrain/n03417042/n03417042_19447.JPEG
labelgarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.514679
Pathtrain/n03445777/n03445777_5218.JPEG
labelgolf_ball
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.515321
Pathval/n03417042/n03417042_27581.JPEG
labelgarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.536679
Pathtrain/n03417042/n03417042_24856.JPEG
labelgarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.541046
Pathtrain/n03417042/n03417042_15198.JPEG
labelgarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.544796
Pathtrain/n03888257/n03888257_34639.JPEG
labelparachute
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.548765
Pathval/n03417042/n03417042_6081.JPEG
labelgarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.555266
Pathtrain/n03445777/n03445777_3254.JPEG
labelgolf_ball
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.569853
Pathtrain/n03445777/n03445777_13576.JPEG
labelgolf_ball
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.579928
Pathval/n02102040/n02102040_7670.JPEG
labelEnglish_springer
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.583889
Pathval/n03445777/n03445777_5932.JPEG
labelgolf_ball
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.590159
Pathtrain/n03888257/n03888257_79145.JPEG
labelparachute
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.607759
Pathtrain/n03394916/n03394916_37544.JPEG
labelFrench_horn
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.608525
Pathtrain/n03394916/n03394916_33663.JPEG
labelFrench_horn
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.609526
Pathtrain/n03888257/n03888257_7793.JPEG
labelparachute
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", " \n", "
\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fd.vis.outliers_gallery()" ] }, { "cell_type": "markdown", "id": "67378b58", "metadata": {}, "source": [ "Show outliers image data." ] }, { "cell_type": "code", "execution_count": 8, "id": "aa1c0e5d-6038-491b-8a91-1d76a87590d4", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 270 }, "executionInfo": { "elapsed": 429, "status": "ok", "timestamp": 1677667331251, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "aa1c0e5d-6038-491b-8a91-1d76a87590d4", "outputId": "b38332f8-7e4e-45de-f7d3-828a52757ec2", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexoutliernearestdistanceimg_filename_outlierlabel_outliersplit_outliererror_code_outlieris_valid_outlierimg_filename_nearestlabel_nearestsplit_nearesterror_code_nearestis_valid_nearest
013381200917570.469904val/n03417042/n03417042_29412.JPEGgarbage_truckvalVALIDTruetrain/n02102040/n02102040_7256.JPEGEnglish_springertrainVALIDTrue
11336266497630.476124train/n02979186/n02979186_3967.JPEGcassette_playertrainVALIDTrueval/n01440764/n01440764_710.JPEGtenchvalVALIDTrue
213351217218170.479290val/n03417042/n03417042_91.JPEGgarbage_truckvalVALIDTruetrain/n02102040/n02102040_7868.JPEGEnglish_springertrainVALIDTrue
313321213115220.489770val/n03417042/n03417042_7422.JPEGgarbage_truckvalVALIDTruetrain/n02102040/n02102040_4884.JPEGEnglish_springertrainVALIDTrue
41330589813920.505358train/n03417042/n03417042_15485.JPEGgarbage_trucktrainVALIDTruetrain/n02102040/n02102040_3719.JPEGEnglish_springertrainVALIDTrue
\n", "
" ], "text/plain": [ " index outlier nearest distance img_filename_outlier \\\n", "0 1338 12009 1757 0.469904 val/n03417042/n03417042_29412.JPEG \n", "1 1336 2664 9763 0.476124 train/n02979186/n02979186_3967.JPEG \n", "2 1335 12172 1817 0.479290 val/n03417042/n03417042_91.JPEG \n", "3 1332 12131 1522 0.489770 val/n03417042/n03417042_7422.JPEG \n", "4 1330 5898 1392 0.505358 train/n03417042/n03417042_15485.JPEG \n", "\n", " label_outlier split_outlier error_code_outlier is_valid_outlier \\\n", "0 garbage_truck val VALID True \n", "1 cassette_player train VALID True \n", "2 garbage_truck val VALID True \n", "3 garbage_truck val VALID True \n", "4 garbage_truck train VALID True \n", "\n", " img_filename_nearest label_nearest split_nearest \\\n", "0 train/n02102040/n02102040_7256.JPEG English_springer train \n", "1 val/n01440764/n01440764_710.JPEG tench val \n", "2 train/n02102040/n02102040_7868.JPEG English_springer train \n", "3 train/n02102040/n02102040_4884.JPEG English_springer train \n", "4 train/n02102040/n02102040_3719.JPEG English_springer train \n", "\n", " error_code_nearest is_valid_nearest \n", "0 VALID True \n", "1 VALID True \n", "2 VALID True \n", "3 VALID True \n", "4 VALID True " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fd.outliers().head(5)" ] }, { "cell_type": "markdown", "id": "bc16596d-899a-45eb-87ca-1d2b96a6ad96", "metadata": {}, "source": [ "## Comparing Labels of Similar Images\n", "Find possible mislabels by comparing a query image to other images in the dataset." ] }, { "cell_type": "code", "execution_count": 9, "id": "4d7cf1b9-c6c0-4b90-b7bb-59ca7bdbdcd7", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 180.17it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Stored similar images visual view in fastdup_imagenette/galleries/similarity.html\n" ] }, { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Similarity Report\n", " \n", " \n", "\n", "\n", "\n", "
\n", "
\n", "
\n", " \n", " \"logo\"\n", " \n", "
\n", " \n", "
\n", "
\n", "
\n", "

Similarity Report

\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelFrench_horn
from/train/n03394916/n03394916_44127.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.968786/val/n03394916/n03394916_30631.JPEGFrench_horn
0.918324/train/n03394916/n03394916_36016.JPEGFrench_horn
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelFrench_horn
from/val/n03394916/n03394916_30631.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.968786/train/n03394916/n03394916_44127.JPEGFrench_horn
0.903754/train/n03394916/n03394916_29969.JPEGFrench_horn
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelgolf_ball
from/val/n03445777/n03445777_6882.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.962459/train/n03445777/n03445777_13918.JPEGgolf_ball
0.918005/val/n03445777/n03445777_5912.JPEGgolf_ball
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelgolf_ball
from/train/n03445777/n03445777_13918.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.962459/val/n03445777/n03445777_6882.JPEGgolf_ball
0.91704/val/n03445777/n03445777_8820.JPEGgolf_ball
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelEnglish_springer
from/train/n02102040/n02102040_1564.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.953837/train/n02102040/n02102040_3837.JPEGEnglish_springer
0.908732/train/n02102040/n02102040_3586.JPEGEnglish_springer
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelEnglish_springer
from/train/n02102040/n02102040_3837.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.953837/train/n02102040/n02102040_1564.JPEGEnglish_springer
0.893944/train/n02102040/n02102040_3027.JPEGEnglish_springer
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labeltench
from/train/n01440764/n01440764_7457.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.953413/train/n01440764/n01440764_11339.JPEGtench
0.918778/train/n01440764/n01440764_9315.JPEGtench
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labeltench
from/train/n01440764/n01440764_11339.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.953413/train/n01440764/n01440764_7457.JPEGtench
0.889166/train/n01440764/n01440764_12279.JPEGtench
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelgarbage_truck
from/train/n03417042/n03417042_1578.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.952239/train/n03417042/n03417042_12906.JPEGgarbage_truck
0.837864/val/n03417042/n03417042_9610.JPEGgarbage_truck
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelgarbage_truck
from/train/n03417042/n03417042_12906.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.952239/train/n03417042/n03417042_1578.JPEGgarbage_truck
0.828749/train/n03417042/n03417042_27686.JPEGgarbage_truck
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelFrench_horn
from/val/n03394916/n03394916_6830.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.951679/val/n03394916/n03394916_21092.JPEGFrench_horn
0.893079/train/n03394916/n03394916_35469.JPEGFrench_horn
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelFrench_horn
from/val/n03394916/n03394916_21092.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.951679/val/n03394916/n03394916_6830.JPEGFrench_horn
0.865771/train/n03394916/n03394916_35469.JPEGFrench_horn
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelparachute
from/train/n03888257/n03888257_21027.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.950477/val/n03888257/n03888257_11210.JPEGparachute
0.92043/val/n03888257/n03888257_12491.JPEGparachute
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelparachute
from/val/n03888257/n03888257_11210.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.950477/train/n03888257/n03888257_21027.JPEGparachute
0.865155/val/n03888257/n03888257_12491.JPEGparachute
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelEnglish_springer
from/train/n02102040/n02102040_6313.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.950173/train/n02102040/n02102040_3767.JPEGEnglish_springer
0.947323/val/n02102040/n02102040_350.JPEGEnglish_springer
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelEnglish_springer
from/train/n02102040/n02102040_3767.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.950173/train/n02102040/n02102040_6313.JPEGEnglish_springer
0.914056/val/n02102040/n02102040_350.JPEGEnglish_springer
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelEnglish_springer
from/train/n02102040/ILSVRC2012_val_00032959.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.949877/val/n02102040/n02102040_662.JPEGEnglish_springer
0.933115/train/n02102040/n02102040_3114.JPEGEnglish_springer
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelEnglish_springer
from/val/n02102040/n02102040_662.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.949877/train/n02102040/ILSVRC2012_val_00032959.JPEGEnglish_springer
0.927345/val/n02102040/n02102040_3502.JPEGEnglish_springer
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelEnglish_springer
from/train/n02102040/n02102040_3114.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.949252/train/n02102040/n02102040_1306.JPEGEnglish_springer
0.941953/train/n02102040/n02102040_1055.JPEGEnglish_springer
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info From
labelEnglish_springer
from/train/n02102040/n02102040_1306.JPEG
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", "\n", " \n", "
Info To
0.949252/train/n02102040/n02102040_3114.JPEGEnglish_springer
0.936799/train/n02102040/n02102040_876.JPEGEnglish_springer
\n", "
\n", "
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Query Image
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t\t\n", "\t\t\t\t\t\t\t
Similar
\n", "\t\t\t\t\t\t
\n", "\t\t\t\t\t
\n", "
\n", " \n", "
\n", "
\n", " \n", "
\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fromtolabellabel2distance
3630imagenette2-160/train/n03394916/n03394916_44127.JPEG[imagenette2-160/val/n03394916/n03394916_30631.JPEG, imagenette2-160/train/n03394916/n03394916_36016.JPEG][French_horn, French_horn][French_horn, French_horn][0.968786, 0.918324]
7819imagenette2-160/val/n03394916/n03394916_30631.JPEG[imagenette2-160/train/n03394916/n03394916_44127.JPEG, imagenette2-160/train/n03394916/n03394916_29969.JPEG][French_horn, French_horn][French_horn, French_horn][0.968786, 0.903754]
8751imagenette2-160/val/n03445777/n03445777_6882.JPEG[imagenette2-160/train/n03445777/n03445777_13918.JPEG, imagenette2-160/val/n03445777/n03445777_5912.JPEG][golf_ball, golf_ball][golf_ball, golf_ball][0.962459, 0.918005]
5358imagenette2-160/train/n03445777/n03445777_13918.JPEG[imagenette2-160/val/n03445777/n03445777_6882.JPEG, imagenette2-160/val/n03445777/n03445777_8820.JPEG][golf_ball, golf_ball][golf_ball, golf_ball][0.962459, 0.91704]
896imagenette2-160/train/n02102040/n02102040_1564.JPEG[imagenette2-160/train/n02102040/n02102040_3837.JPEG, imagenette2-160/train/n02102040/n02102040_3586.JPEG][English_springer, English_springer][English_springer, English_springer][0.953837, 0.908732]
..................
5911imagenette2-160/train/n03888257/n03888257_12816.JPEG[imagenette2-160/train/n03888257/n03888257_38633.JPEG][parachute][parachute][0.800073]
6219imagenette2-160/train/n03888257/n03888257_38633.JPEG[imagenette2-160/train/n03888257/n03888257_12816.JPEG][parachute][parachute][0.800073]
4320imagenette2-160/train/n03417042/n03417042_3236.JPEG[imagenette2-160/train/n03417042/n03417042_12297.JPEG][garbage_truck][garbage_truck][0.800024]
3429imagenette2-160/train/n03394916/n03394916_32478.JPEG[imagenette2-160/train/n03394916/n03394916_35573.JPEG][French_horn][French_horn][0.800012]
7497imagenette2-160/val/n03028079/n03028079_13002.JPEG[imagenette2-160/train/n03028079/n03028079_3839.JPEG][church][church][0.800002]
\n", "

9057 rows × 5 columns

\n", "
" ], "text/plain": [ " from \\\n", "3630 imagenette2-160/train/n03394916/n03394916_44127.JPEG \n", "7819 imagenette2-160/val/n03394916/n03394916_30631.JPEG \n", "8751 imagenette2-160/val/n03445777/n03445777_6882.JPEG \n", "5358 imagenette2-160/train/n03445777/n03445777_13918.JPEG \n", "896 imagenette2-160/train/n02102040/n02102040_1564.JPEG \n", "... ... \n", "5911 imagenette2-160/train/n03888257/n03888257_12816.JPEG \n", "6219 imagenette2-160/train/n03888257/n03888257_38633.JPEG \n", "4320 imagenette2-160/train/n03417042/n03417042_3236.JPEG \n", "3429 imagenette2-160/train/n03394916/n03394916_32478.JPEG \n", "7497 imagenette2-160/val/n03028079/n03028079_13002.JPEG \n", "\n", " to \\\n", "3630 [imagenette2-160/val/n03394916/n03394916_30631.JPEG, imagenette2-160/train/n03394916/n03394916_36016.JPEG] \n", "7819 [imagenette2-160/train/n03394916/n03394916_44127.JPEG, imagenette2-160/train/n03394916/n03394916_29969.JPEG] \n", "8751 [imagenette2-160/train/n03445777/n03445777_13918.JPEG, imagenette2-160/val/n03445777/n03445777_5912.JPEG] \n", "5358 [imagenette2-160/val/n03445777/n03445777_6882.JPEG, imagenette2-160/val/n03445777/n03445777_8820.JPEG] \n", "896 [imagenette2-160/train/n02102040/n02102040_3837.JPEG, imagenette2-160/train/n02102040/n02102040_3586.JPEG] \n", "... ... \n", "5911 [imagenette2-160/train/n03888257/n03888257_38633.JPEG] \n", "6219 [imagenette2-160/train/n03888257/n03888257_12816.JPEG] \n", "4320 [imagenette2-160/train/n03417042/n03417042_12297.JPEG] \n", "3429 [imagenette2-160/train/n03394916/n03394916_35573.JPEG] \n", "7497 [imagenette2-160/train/n03028079/n03028079_3839.JPEG] \n", "\n", " label \\\n", "3630 [French_horn, French_horn] \n", "7819 [French_horn, French_horn] \n", "8751 [golf_ball, golf_ball] \n", "5358 [golf_ball, golf_ball] \n", "896 [English_springer, English_springer] \n", "... ... \n", "5911 [parachute] \n", "6219 [parachute] \n", "4320 [garbage_truck] \n", "3429 [French_horn] \n", "7497 [church] \n", "\n", " label2 distance \n", "3630 [French_horn, French_horn] [0.968786, 0.918324] \n", "7819 [French_horn, French_horn] [0.968786, 0.903754] \n", "8751 [golf_ball, golf_ball] [0.962459, 0.918005] \n", "5358 [golf_ball, golf_ball] [0.962459, 0.91704] \n", "896 [English_springer, English_springer] [0.953837, 0.908732] \n", "... ... ... \n", "5911 [parachute] [0.800073] \n", "6219 [parachute] [0.800073] \n", "4320 [garbage_truck] [0.800024] \n", "3429 [French_horn] [0.800012] \n", "7497 [church] [0.800002] \n", "\n", "[9057 rows x 5 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fd.vis.similarity_gallery() " ] }, { "cell_type": "markdown", "id": "c2c393be-2b42-4814-8688-03d2be9e8998", "metadata": {}, "source": [ "## Similar Image Pairs\n", "\n", "Find similar image pairs within and across the train and validation subfolders. Pairs may include train-train, train-val, val-train, and val-val." ] }, { "cell_type": "code", "execution_count": 16, "id": "9e065403-582b-4f94-855b-33fd8f4826a1", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 343.69it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Stored similarity visual view in fastdup_imagenette/galleries/duplicates.html\n" ] }, { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Duplicates Report\n", " \n", " \n", "\n", "\n", "\n", "
\n", "
\n", "
\n", " \n", " \"logo\"\n", " \n", "
\n", " \n", "
\n", "
\n", "
\n", "

Duplicates Report

\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.968786
FromFrench_horn
ToFrench_horn
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.962459
Fromgolf_ball
Togolf_ball
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.953837
FromEnglish_springer
ToEnglish_springer
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.953413
Fromtench
Totench
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.952239
Fromgarbage_truck
Togarbage_truck
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.951679
FromFrench_horn
ToFrench_horn
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.950477
Fromparachute
Toparachute
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.950173
FromEnglish_springer
ToEnglish_springer
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.949877
FromEnglish_springer
ToEnglish_springer
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
Distance0.949252
FromEnglish_springer
ToEnglish_springer
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", " \n", "
\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fd.vis.duplicates_gallery()" ] }, { "cell_type": "markdown", "id": "e10989e1", "metadata": {}, "source": [ "Show similar image pairs." ] }, { "cell_type": "code", "execution_count": 10, "id": "3ea590e9-d221-4202-b03b-e5fef4487c89", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 270 }, "executionInfo": { "elapsed": 499, "status": "ok", "timestamp": 1677667342908, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "3ea590e9-d221-4202-b03b-e5fef4487c89", "outputId": "3c5f4cc0-0ba5-42a0-e01b-f165e9cf655c", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fromtodistanceimg_filename_fromlabel_fromsplit_fromerror_code_fromis_valid_fromimg_filename_tolabel_tosplit_toerror_code_tois_valid_to
01152153900.968786val/n03394916/n03394916_30631.JPEGFrench_hornvalVALIDTruetrain/n03394916/n03394916_44127.JPEGFrench_horntrainVALIDTrue
15390115210.968786train/n03394916/n03394916_44127.JPEGFrench_horntrainVALIDTrueval/n03394916/n03394916_30631.JPEGFrench_hornvalVALIDTrue
21291477150.962459val/n03445777/n03445777_6882.JPEGgolf_ballvalVALIDTruetrain/n03445777/n03445777_13918.JPEGgolf_balltrainVALIDTrue
37715129140.962459train/n03445777/n03445777_13918.JPEGgolf_balltrainVALIDTrueval/n03445777/n03445777_6882.JPEGgolf_ballvalVALIDTrue
4111714040.953837train/n02102040/n02102040_1564.JPEGEnglish_springertrainVALIDTruetrain/n02102040/n02102040_3837.JPEGEnglish_springertrainVALIDTrue
\n", "
" ], "text/plain": [ " from to distance img_filename_from \\\n", "0 11521 5390 0.968786 val/n03394916/n03394916_30631.JPEG \n", "1 5390 11521 0.968786 train/n03394916/n03394916_44127.JPEG \n", "2 12914 7715 0.962459 val/n03445777/n03445777_6882.JPEG \n", "3 7715 12914 0.962459 train/n03445777/n03445777_13918.JPEG \n", "4 1117 1404 0.953837 train/n02102040/n02102040_1564.JPEG \n", "\n", " label_from split_from error_code_from is_valid_from \\\n", "0 French_horn val VALID True \n", "1 French_horn train VALID True \n", "2 golf_ball val VALID True \n", "3 golf_ball train VALID True \n", "4 English_springer train VALID True \n", "\n", " img_filename_to label_to split_to \\\n", "0 train/n03394916/n03394916_44127.JPEG French_horn train \n", "1 val/n03394916/n03394916_30631.JPEG French_horn val \n", "2 train/n03445777/n03445777_13918.JPEG golf_ball train \n", "3 val/n03445777/n03445777_6882.JPEG golf_ball val \n", "4 train/n02102040/n02102040_3837.JPEG English_springer train \n", "\n", " error_code_to is_valid_to \n", "0 VALID True \n", "1 VALID True \n", "2 VALID True \n", "3 VALID True \n", "4 VALID True " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fd.similarity().head(5)" ] }, { "cell_type": "markdown", "id": "95d21e6d-a951-48dd-8c4c-894c8ba556fd", "metadata": {}, "source": [ "## Image Clusters" ] }, { "cell_type": "code", "execution_count": 11, "id": "4a6db529-cb1e-4655-af50-d97f3e131319", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "output_embedded_package_id": "1Wh1vmG-F-RG0ZYZP1oRgiyqHAtnfsuEk" }, "executionInfo": { "elapsed": 6376, "status": "ok", "timestamp": 1677667352994, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "4a6db529-cb1e-4655-af50-d97f3e131319", "outputId": "adfc3ee1-84c9-4aa6-a0db-09a6a800b566", "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tench\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 97.47it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Finished OK. Components are stored as image files fastdup_imagenette/galleries/components_[index].jpg\n", "Stored components visual view in fastdup_imagenette/galleries/components.html\n", "Execution time in seconds 2.3\n" ] }, { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Components Report\n", "

Showing groups of similar images


\n", " \n", "\n", "\n", "\n", "
\n", "
\n", "
\n", " \n", " \"logo\"\n", " \n", "
\n", " \n", "
\n", "
\n", "
\n", "

Components Report

\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component36
num_images24
mean_distance0.9003
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
tench24
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7332
num_images22
mean_distance0.9011
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
golf_ball22
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component143
num_images16
mean_distance0.9003
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
tench16
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component6
num_images13
mean_distance0.9023
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
tench13
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component10
num_images11
mean_distance0.9065
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
tench11
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4589
num_images11
mean_distance0.9005
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
French_horn11
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component900
num_images10
mean_distance0.9018
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
English_springer10
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component5491
num_images10
mean_distance0.9001
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
garbage_truck10
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component150
num_images10
mean_distance0.9032
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
tench10
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7341
num_images9
mean_distance0.9112
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
golf_ball9
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component7355
num_images8
mean_distance0.9057
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
golf_ball8
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component5478
num_images8
mean_distance0.9025
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
garbage_truck8
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component151
num_images7
mean_distance0.9006
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
tench7
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component902
num_images7
mean_distance0.9044
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
English_springer7
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component4571
num_images6
mean_distance0.9038
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
French_horn6
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component41
num_images6
mean_distance0.9007
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
tench6
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component5718
num_images6
mean_distance0.9043
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
garbage_truck6
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component917
num_images5
mean_distance0.9037
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
English_springer5
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component8448
num_images5
mean_distance0.9004
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
parachute5
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component218
num_images5
mean_distance0.9
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
tench5
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", " \n", "
\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fd.vis.component_gallery()" ] }, { "cell_type": "markdown", "id": "ca5d4b6e-7ff6-49b8-b487-6ba1573ab104", "metadata": {}, "source": [ "You can also visualize clusters with specific labels using the `slice` parameter. For example let's visualize clusters with the `chain_saw` label" ] }, { "cell_type": "code", "execution_count": 12, "id": "4b38dacf-becc-4631-9aeb-6fe9bd235aa1", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "output_embedded_package_id": "1xYIrPsODG8kAMaZOpGeKNRoa4-HjPC-w" }, "executionInfo": { "elapsed": 5130, "status": "ok", "timestamp": 1677667368207, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "4b38dacf-becc-4631-9aeb-6fe9bd235aa1", "outputId": "131d0f11-5627-4beb-b58c-3801e09a3b42", "scrolled": true, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "chain_saw\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 313.24it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Finished OK. Components are stored as image files fastdup_imagenette/galleries/components_[index].jpg\n", "Stored components visual view in fastdup_imagenette/galleries/components.html\n", "Execution time in seconds 1.4\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Components Report\n", "

, slice: chain_saw


\n", " \n", "\n", "\n", "\n", "
\n", "
\n", "
\n", " \n", " \"logo\"\n", " \n", "
\n", " \n", "
\n", "
\n", "
\n", "

Components Report

\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component2953
num_images3
mean_distance0.9064
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw3
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component2875
num_images2
mean_distance0.9029
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component2891
num_images2
mean_distance0.9208
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component2939
num_images2
mean_distance0.9222
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component3065
num_images2
mean_distance0.9139
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component3068
num_images2
mean_distance0.9198
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component3077
num_images2
mean_distance0.9073
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component3078
num_images2
mean_distance0.9192
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component3153
num_images2
mean_distance0.9355
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component3381
num_images2
mean_distance0.9345
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", "\n", " \n", " \n", "\n", " \n", "
Info
component10340
num_images2
mean_distance0.9039
\n", "
\n", "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", "
Label
chain_saw2
\n", "
\n", "
\n", "
\n", " \n", "
\n", "
\n", " \n", "
\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fd.vis.component_gallery(slice='chain_saw')" ] }, { "cell_type": "markdown", "id": "28498d81-d073-4f3d-baa4-732e1df93a34", "metadata": {}, "source": [ "## Connected Components" ] }, { "cell_type": "code", "execution_count": 13, "id": "0346be91-5380-48b9-a8df-074c342efcd3", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 1036, "status": "ok", "timestamp": 1677667380699, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "0346be91-5380-48b9-a8df-074c342efcd3", "outputId": "ffa6bd9d-b5b3-4ed5-86e1-c47ca9658667", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fastdup_idcomponent_idsumcountmean_distancemin_distancemax_distanceimg_filenamelabelspliterror_codeis_valid
77787778733236.673440.00.91680.90110.9328train/n03445777/n03445777_16186.JPEGgolf_balltrainVALIDTrue
79907990733236.673440.00.91680.90110.9328train/n03445777/n03445777_3503.JPEGgolf_balltrainVALIDTrue
6826823636.581540.00.91450.90030.9339train/n01440764/n01440764_6159.JPEGtenchtrainVALIDTrue
954595453636.581540.00.91450.90030.9339val/n01440764/n01440764_12250.JPEGtenchvalVALIDTrue
76517651733236.673440.00.91680.90110.9328train/n03445777/n03445777_11389.JPEGgolf_balltrainVALIDTrue
\n", "
" ], "text/plain": [ " fastdup_id component_id sum count mean_distance min_distance \\\n", "7778 7778 7332 36.6734 40.0 0.9168 0.9011 \n", "7990 7990 7332 36.6734 40.0 0.9168 0.9011 \n", "682 682 36 36.5815 40.0 0.9145 0.9003 \n", "9545 9545 36 36.5815 40.0 0.9145 0.9003 \n", "7651 7651 7332 36.6734 40.0 0.9168 0.9011 \n", "\n", " max_distance img_filename label split \\\n", "7778 0.9328 train/n03445777/n03445777_16186.JPEG golf_ball train \n", "7990 0.9328 train/n03445777/n03445777_3503.JPEG golf_ball train \n", "682 0.9339 train/n01440764/n01440764_6159.JPEG tench train \n", "9545 0.9339 val/n01440764/n01440764_12250.JPEG tench val \n", "7651 0.9328 train/n03445777/n03445777_11389.JPEG golf_ball train \n", "\n", " error_code is_valid \n", "7778 VALID True \n", "7990 VALID True \n", "682 VALID True \n", "9545 VALID True \n", "7651 VALID True " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cc_df, _ = fd.connected_components()\n", "cc_df.sort_values('count', ascending=False).head(5)" ] }, { "cell_type": "markdown", "id": "569cb878", "metadata": {}, "source": [ "We can also get metadata for individual images using their `fastdup_id` available in `fd.annotations()`" ] }, { "cell_type": "code", "execution_count": 14, "id": "e80d6817-fed6-4fa4-8714-b01214e0d3f8", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 990, "status": "ok", "timestamp": 1677667384644, "user": { "displayName": "Tom Shani", "userId": "00667426488827942961" }, "user_tz": -120 }, "id": "e80d6817-fed6-4fa4-8714-b01214e0d3f8", "outputId": "4f973aba-572d-4e50-d22d-c5bfc8cf3d2d", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'img_filename': 'train/n01440764/n01440764_17789.JPEG',\n", " 'label': 'tench',\n", " 'split': 'train',\n", " 'fastdup_id': 349,\n", " 'error_code': 'VALID',\n", " 'is_valid': True}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fd[349]" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 5 }