{
"cells": [
{
"cell_type": "markdown",
"id": "SwSYWR4vzk_e",
"metadata": {
"id": "SwSYWR4vzk_e",
"tags": []
},
"source": [
"# Analysing Image Classification Dataset"
]
},
{
"cell_type": "markdown",
"id": "bbed0117-e8d1-4df6-b8b7-7bcce10b8655",
"metadata": {
"tags": []
},
"source": [
"## Installation & Setting Up"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "506e82b4-a1c2-4262-a326-d0924bb018b6",
"metadata": {
"id": "506e82b4-a1c2-4262-a326-d0924bb018b6"
},
"outputs": [],
"source": [
"!pip install pip -U\n",
"!pip install fastdup"
]
},
{
"cell_type": "markdown",
"id": "8a79fb1b-b089-4d4d-8fa8-3e2b2ef7f886",
"metadata": {
"id": "8a79fb1b-b089-4d4d-8fa8-3e2b2ef7f886",
"tags": []
},
"source": [
"## Download Imagenette Dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be5b7ca5-34f5-4a0f-b081-2e78be6a425a",
"metadata": {},
"outputs": [],
"source": [
"!wget https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz\n",
"!tar -xf imagenette2-160.tgz"
]
},
{
"cell_type": "markdown",
"id": "f01586fe-db75-4154-aa15-9ea2709c9461",
"metadata": {
"id": "f01586fe-db75-4154-aa15-9ea2709c9461"
},
"source": [
"## Load and Format Annotations"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "ff90fe31-7c39-46c5-8c58-3ae349fbcc91",
"metadata": {
"executionInfo": {
"elapsed": 949,
"status": "ok",
"timestamp": 1677666765166,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "ff90fe31-7c39-46c5-8c58-3ae349fbcc91",
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "21d2474d-3fa5-4148-a0f1-ea8d55d63b85",
"metadata": {
"executionInfo": {
"elapsed": 2,
"status": "ok",
"timestamp": 1677666768281,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "21d2474d-3fa5-4148-a0f1-ea8d55d63b85",
"tags": []
},
"outputs": [],
"source": [
"data_dir = 'imagenette2-160/'\n",
"csv_path = 'imagenette2-160/noisy_imagenette.csv'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2cb91ccb-9cb6-42ba-9489-96182eccc583",
"metadata": {
"executionInfo": {
"elapsed": 2,
"status": "ok",
"timestamp": 1677666769859,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "2cb91ccb-9cb6-42ba-9489-96182eccc583",
"tags": []
},
"outputs": [],
"source": [
"label_map = {\n",
" 'n02979186': 'cassette_player', \n",
" 'n03417042': 'garbage_truck', \n",
" 'n01440764': 'tench', \n",
" 'n02102040': 'English_springer', \n",
" 'n03028079': 'church',\n",
" 'n03888257': 'parachute', \n",
" 'n03394916': 'French_horn', \n",
" 'n03000684': 'chain_saw', \n",
" 'n03445777': 'golf_ball', \n",
" 'n03425413': 'gas_pump'\n",
"}"
]
},
{
"cell_type": "markdown",
"id": "8aba34e1",
"metadata": {},
"source": [
"Load the annotation provided with the dataset."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e2e90600-b02d-4a2a-a348-7b67157f9129",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 143
},
"executionInfo": {
"elapsed": 2,
"status": "ok",
"timestamp": 1677666769859,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "e2e90600-b02d-4a2a-a348-7b67157f9129",
"outputId": "f9f72c0d-f613-4aac-d29c-3646b2301dcb",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" path | \n",
" noisy_labels_0 | \n",
" noisy_labels_1 | \n",
" noisy_labels_5 | \n",
" noisy_labels_25 | \n",
" noisy_labels_50 | \n",
" is_valid | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" train/n02979186/n02979186_9036.JPEG | \n",
" n02979186 | \n",
" n02979186 | \n",
" n02979186 | \n",
" n02979186 | \n",
" n02979186 | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" train/n02979186/n02979186_11957.JPEG | \n",
" n02979186 | \n",
" n02979186 | \n",
" n02979186 | \n",
" n02979186 | \n",
" n03000684 | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" train/n02979186/n02979186_9715.JPEG | \n",
" n02979186 | \n",
" n02979186 | \n",
" n02979186 | \n",
" n03417042 | \n",
" n03000684 | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" path noisy_labels_0 noisy_labels_1 \n",
"0 train/n02979186/n02979186_9036.JPEG n02979186 n02979186 \\\n",
"1 train/n02979186/n02979186_11957.JPEG n02979186 n02979186 \n",
"2 train/n02979186/n02979186_9715.JPEG n02979186 n02979186 \n",
"\n",
" noisy_labels_5 noisy_labels_25 noisy_labels_50 is_valid \n",
"0 n02979186 n02979186 n02979186 False \n",
"1 n02979186 n02979186 n03000684 False \n",
"2 n02979186 n03417042 n03000684 False "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_annot = pd.read_csv(csv_path)\n",
"df_annot.head(3)"
]
},
{
"cell_type": "markdown",
"id": "dfc957bf",
"metadata": {},
"source": [
"Transform the annotation to fastdup supported format."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "473185d1-89f5-4746-b87b-f2b3ef7c445b",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"executionInfo": {
"elapsed": 1012,
"status": "ok",
"timestamp": 1677666771201,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "473185d1-89f5-4746-b87b-f2b3ef7c445b",
"outputId": "c09c986d-bcef-4545-8ceb-ee5196b40ee6",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" filename | \n",
" label | \n",
" split | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" imagenette2-160/train/n02979186/n02979186_9036... | \n",
" cassette_player | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
" 1 | \n",
" imagenette2-160/train/n02979186/n02979186_1195... | \n",
" cassette_player | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
" 2 | \n",
" imagenette2-160/train/n02979186/n02979186_9715... | \n",
" cassette_player | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
" 3 | \n",
" imagenette2-160/train/n02979186/n02979186_2173... | \n",
" cassette_player | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
" 4 | \n",
" imagenette2-160/train/n02979186/ILSVRC2012_val... | \n",
" cassette_player | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 13389 | \n",
" imagenette2-160/val/n03425413/n03425413_17521.... | \n",
" gas_pump | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
" 13390 | \n",
" imagenette2-160/val/n03425413/n03425413_20711.... | \n",
" gas_pump | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
" 13391 | \n",
" imagenette2-160/val/n03425413/n03425413_19050.... | \n",
" gas_pump | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
" 13392 | \n",
" imagenette2-160/val/n03425413/n03425413_13831.... | \n",
" gas_pump | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
" 13393 | \n",
" imagenette2-160/val/n03425413/n03425413_1242.JPEG | \n",
" gas_pump | \n",
" imagenette2-160 | \n",
"
\n",
" \n",
"
\n",
"
13394 rows × 3 columns
\n",
"
"
],
"text/plain": [
" filename label \n",
"0 imagenette2-160/train/n02979186/n02979186_9036... cassette_player \\\n",
"1 imagenette2-160/train/n02979186/n02979186_1195... cassette_player \n",
"2 imagenette2-160/train/n02979186/n02979186_9715... cassette_player \n",
"3 imagenette2-160/train/n02979186/n02979186_2173... cassette_player \n",
"4 imagenette2-160/train/n02979186/ILSVRC2012_val... cassette_player \n",
"... ... ... \n",
"13389 imagenette2-160/val/n03425413/n03425413_17521.... gas_pump \n",
"13390 imagenette2-160/val/n03425413/n03425413_20711.... gas_pump \n",
"13391 imagenette2-160/val/n03425413/n03425413_19050.... gas_pump \n",
"13392 imagenette2-160/val/n03425413/n03425413_13831.... gas_pump \n",
"13393 imagenette2-160/val/n03425413/n03425413_1242.JPEG gas_pump \n",
"\n",
" split \n",
"0 imagenette2-160 \n",
"1 imagenette2-160 \n",
"2 imagenette2-160 \n",
"3 imagenette2-160 \n",
"4 imagenette2-160 \n",
"... ... \n",
"13389 imagenette2-160 \n",
"13390 imagenette2-160 \n",
"13391 imagenette2-160 \n",
"13392 imagenette2-160 \n",
"13393 imagenette2-160 \n",
"\n",
"[13394 rows x 3 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# take relevant columns\n",
"df_annot = df_annot[['path', 'noisy_labels_0']]\n",
"\n",
"# rename columns to fastdup's column names\n",
"df_annot = df_annot.rename({'noisy_labels_0': 'label', 'path': 'filename'}, axis='columns')\n",
"\n",
"# append datadir\n",
"df_annot['filename'] = df_annot['filename'].apply(lambda x: data_dir + x)\n",
"\n",
"# create split column\n",
"df_annot['split'] = df_annot['filename'].apply(lambda x: x.split(\"/\")[0])\n",
"\n",
"# map label ids to regular labels\n",
"df_annot['label'] = df_annot['label'].map(label_map)\n",
"\n",
"# show formated annotations\n",
"df_annot"
]
},
{
"cell_type": "markdown",
"id": "0c648ed1-5016-4230-9873-546eb510b764",
"metadata": {
"id": "0c648ed1-5016-4230-9873-546eb510b764"
},
"source": [
"## Import & Run fastdup\n",
"\n",
"In this example we run fastdup by providing the annotations."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7f69d8b2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'0.922'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import fastdup\n",
"fastdup.__version__"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "92a6e2f9-e60c-44c0-b48a-f7413f7594ae",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n",
"2023-03-20 17:57:26 [INFO] Going to loop over dir imagenette2-160\n",
"2023-03-20 17:57:26 [INFO] Found total 13394 images to run on\n",
"2023-03-20 17:57:54 [INFO] Found total 13394 images to run onimated: 0 Minutes 0 Features\n",
"2023-03-20 17:57:55 [INFO] 1657) Finished write_index() NN model\n",
"2023-03-20 17:57:55 [INFO] Stored nn model index file fastdup_imagenette/nnf.index\n",
"2023-03-20 17:57:56 [INFO] Total time took 30624 ms\n",
"2023-03-20 17:57:56 [INFO] Found a total of 0 fully identical images (d>0.990), which are 0.00 %\n",
"2023-03-20 17:57:56 [INFO] Found a total of 0 nearly identical images(d>0.980), which are 0.00 %\n",
"2023-03-20 17:57:56 [INFO] Found a total of 16741 above threshold images (d>0.800), which are 41.66 %\n",
"2023-03-20 17:57:56 [INFO] Found a total of 1339 outlier images (d<0.050), which are 3.33 %\n",
"2023-03-20 17:57:56 [INFO] Min distance found 0.470 max distance 0.969\n",
"2023-03-20 17:57:56 [INFO] Running connected components for ccthreshold 0.900000 \n",
".0\n",
" ########################################################################################\n",
"\n",
"Dataset Analysis Summary: \n",
"\n",
" Dataset contains 13394 images\n",
" Valid images are 100.00% (13,394) of the data, invalid are 0.00% (0) of the data\n",
" Similarity: 2.73% (366) belong to 20 similarity clusters (components).\n",
" 97.27% (13,028) images do not belong to any similarity cluster.\n",
" Largest cluster has 40 (0.30%) images.\n",
" For a detailed analysis, use `.connected_components()`\n",
"(similarity threshold used is 0.8, connected component threshold used is 0.9).\n",
"\n",
" Outliers: 6.21% (832) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n",
" For a detailed list of outliers, use `.outliers()`.\n"
]
}
],
"source": [
"work_dir = 'fastdup_imagenette'\n",
"\n",
"fd = fastdup.create(work_dir=work_dir, input_dir=data_dir) \n",
"fd.run(annotations=df_annot, ccthreshold=0.9, threshold=0.8)"
]
},
{
"cell_type": "markdown",
"id": "62e35a12-fadd-4b3f-bcab-69e6e67862a4",
"metadata": {},
"source": [
"## Outliers\n",
"\n",
"Visualize outliers from the dataset."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b39ec702-3ea1-4afe-a948-f026ba8fcb47",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"executionInfo": {
"elapsed": 2658,
"status": "ok",
"timestamp": 1677667336302,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "b39ec702-3ea1-4afe-a948-f026ba8fcb47",
"outputId": "caa992d2-5267-408c-b44a-3a4a66e1ab5f",
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 29767.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Stored outliers visual view in fastdup_imagenette/galleries/outliers.html\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" Outliers Report\n",
" Showing image outliers, one per row
\n",
" \n",
"\n",
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
Outliers Report
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.469904 | \n",
"
\n",
"\n",
" Path | \n",
" val/n03417042/n03417042_29412.JPEG | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.476124 | \n",
"
\n",
"\n",
" Path | \n",
" train/n02979186/n02979186_3967.JPEG | \n",
"
\n",
"\n",
" label | \n",
" cassette_player | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.47929 | \n",
"
\n",
"\n",
" Path | \n",
" val/n03417042/n03417042_91.JPEG | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.48977 | \n",
"
\n",
"\n",
" Path | \n",
" val/n03417042/n03417042_7422.JPEG | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.505358 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03417042/n03417042_15485.JPEG | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.510293 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03417042/n03417042_19447.JPEG | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.514679 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03445777/n03445777_5218.JPEG | \n",
"
\n",
"\n",
" label | \n",
" golf_ball | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.515321 | \n",
"
\n",
"\n",
" Path | \n",
" val/n03417042/n03417042_27581.JPEG | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.536679 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03417042/n03417042_24856.JPEG | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.541046 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03417042/n03417042_15198.JPEG | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.544796 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03888257/n03888257_34639.JPEG | \n",
"
\n",
"\n",
" label | \n",
" parachute | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.548765 | \n",
"
\n",
"\n",
" Path | \n",
" val/n03417042/n03417042_6081.JPEG | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.555266 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03445777/n03445777_3254.JPEG | \n",
"
\n",
"\n",
" label | \n",
" golf_ball | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.569853 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03445777/n03445777_13576.JPEG | \n",
"
\n",
"\n",
" label | \n",
" golf_ball | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.579928 | \n",
"
\n",
"\n",
" Path | \n",
" val/n02102040/n02102040_7670.JPEG | \n",
"
\n",
"\n",
" label | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.583889 | \n",
"
\n",
"\n",
" Path | \n",
" val/n03445777/n03445777_5932.JPEG | \n",
"
\n",
"\n",
" label | \n",
" golf_ball | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.590159 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03888257/n03888257_79145.JPEG | \n",
"
\n",
"\n",
" label | \n",
" parachute | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.607759 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03394916/n03394916_37544.JPEG | \n",
"
\n",
"\n",
" label | \n",
" French_horn | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.608525 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03394916/n03394916_33663.JPEG | \n",
"
\n",
"\n",
" label | \n",
" French_horn | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.609526 | \n",
"
\n",
"\n",
" Path | \n",
" train/n03888257/n03888257_7793.JPEG | \n",
"
\n",
"\n",
" label | \n",
" parachute | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fd.vis.outliers_gallery()"
]
},
{
"cell_type": "markdown",
"id": "67378b58",
"metadata": {},
"source": [
"Show outliers image data."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "aa1c0e5d-6038-491b-8a91-1d76a87590d4",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 270
},
"executionInfo": {
"elapsed": 429,
"status": "ok",
"timestamp": 1677667331251,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "aa1c0e5d-6038-491b-8a91-1d76a87590d4",
"outputId": "b38332f8-7e4e-45de-f7d3-828a52757ec2",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" index | \n",
" outlier | \n",
" nearest | \n",
" distance | \n",
" img_filename_outlier | \n",
" label_outlier | \n",
" split_outlier | \n",
" error_code_outlier | \n",
" is_valid_outlier | \n",
" img_filename_nearest | \n",
" label_nearest | \n",
" split_nearest | \n",
" error_code_nearest | \n",
" is_valid_nearest | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1338 | \n",
" 12009 | \n",
" 1757 | \n",
" 0.469904 | \n",
" val/n03417042/n03417042_29412.JPEG | \n",
" garbage_truck | \n",
" val | \n",
" VALID | \n",
" True | \n",
" train/n02102040/n02102040_7256.JPEG | \n",
" English_springer | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" 1336 | \n",
" 2664 | \n",
" 9763 | \n",
" 0.476124 | \n",
" train/n02979186/n02979186_3967.JPEG | \n",
" cassette_player | \n",
" train | \n",
" VALID | \n",
" True | \n",
" val/n01440764/n01440764_710.JPEG | \n",
" tench | \n",
" val | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" 1335 | \n",
" 12172 | \n",
" 1817 | \n",
" 0.479290 | \n",
" val/n03417042/n03417042_91.JPEG | \n",
" garbage_truck | \n",
" val | \n",
" VALID | \n",
" True | \n",
" train/n02102040/n02102040_7868.JPEG | \n",
" English_springer | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 3 | \n",
" 1332 | \n",
" 12131 | \n",
" 1522 | \n",
" 0.489770 | \n",
" val/n03417042/n03417042_7422.JPEG | \n",
" garbage_truck | \n",
" val | \n",
" VALID | \n",
" True | \n",
" train/n02102040/n02102040_4884.JPEG | \n",
" English_springer | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 4 | \n",
" 1330 | \n",
" 5898 | \n",
" 1392 | \n",
" 0.505358 | \n",
" train/n03417042/n03417042_15485.JPEG | \n",
" garbage_truck | \n",
" train | \n",
" VALID | \n",
" True | \n",
" train/n02102040/n02102040_3719.JPEG | \n",
" English_springer | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" index outlier nearest distance img_filename_outlier \\\n",
"0 1338 12009 1757 0.469904 val/n03417042/n03417042_29412.JPEG \n",
"1 1336 2664 9763 0.476124 train/n02979186/n02979186_3967.JPEG \n",
"2 1335 12172 1817 0.479290 val/n03417042/n03417042_91.JPEG \n",
"3 1332 12131 1522 0.489770 val/n03417042/n03417042_7422.JPEG \n",
"4 1330 5898 1392 0.505358 train/n03417042/n03417042_15485.JPEG \n",
"\n",
" label_outlier split_outlier error_code_outlier is_valid_outlier \\\n",
"0 garbage_truck val VALID True \n",
"1 cassette_player train VALID True \n",
"2 garbage_truck val VALID True \n",
"3 garbage_truck val VALID True \n",
"4 garbage_truck train VALID True \n",
"\n",
" img_filename_nearest label_nearest split_nearest \\\n",
"0 train/n02102040/n02102040_7256.JPEG English_springer train \n",
"1 val/n01440764/n01440764_710.JPEG tench val \n",
"2 train/n02102040/n02102040_7868.JPEG English_springer train \n",
"3 train/n02102040/n02102040_4884.JPEG English_springer train \n",
"4 train/n02102040/n02102040_3719.JPEG English_springer train \n",
"\n",
" error_code_nearest is_valid_nearest \n",
"0 VALID True \n",
"1 VALID True \n",
"2 VALID True \n",
"3 VALID True \n",
"4 VALID True "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fd.outliers().head(5)"
]
},
{
"cell_type": "markdown",
"id": "bc16596d-899a-45eb-87ca-1d2b96a6ad96",
"metadata": {},
"source": [
"## Comparing Labels of Similar Images\n",
"Find possible mislabels by comparing a query image to other images in the dataset."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4d7cf1b9-c6c0-4b90-b7bb-59ca7bdbdcd7",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 180.17it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Stored similar images visual view in fastdup_imagenette/galleries/similarity.html\n"
]
},
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" Similarity Report\n",
" \n",
" \n",
"\n",
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
Similarity Report
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" French_horn | \n",
"
\n",
"\n",
" from | \n",
" /train/n03394916/n03394916_44127.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.968786 | \n",
" /val/n03394916/n03394916_30631.JPEG | \n",
" French_horn | \n",
"
\n",
"\n",
" 0.918324 | \n",
" /train/n03394916/n03394916_36016.JPEG | \n",
" French_horn | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" French_horn | \n",
"
\n",
"\n",
" from | \n",
" /val/n03394916/n03394916_30631.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.968786 | \n",
" /train/n03394916/n03394916_44127.JPEG | \n",
" French_horn | \n",
"
\n",
"\n",
" 0.903754 | \n",
" /train/n03394916/n03394916_29969.JPEG | \n",
" French_horn | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" golf_ball | \n",
"
\n",
"\n",
" from | \n",
" /val/n03445777/n03445777_6882.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.962459 | \n",
" /train/n03445777/n03445777_13918.JPEG | \n",
" golf_ball | \n",
"
\n",
"\n",
" 0.918005 | \n",
" /val/n03445777/n03445777_5912.JPEG | \n",
" golf_ball | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" golf_ball | \n",
"
\n",
"\n",
" from | \n",
" /train/n03445777/n03445777_13918.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.962459 | \n",
" /val/n03445777/n03445777_6882.JPEG | \n",
" golf_ball | \n",
"
\n",
"\n",
" 0.91704 | \n",
" /val/n03445777/n03445777_8820.JPEG | \n",
" golf_ball | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" English_springer | \n",
"
\n",
"\n",
" from | \n",
" /train/n02102040/n02102040_1564.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.953837 | \n",
" /train/n02102040/n02102040_3837.JPEG | \n",
" English_springer | \n",
"
\n",
"\n",
" 0.908732 | \n",
" /train/n02102040/n02102040_3586.JPEG | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" English_springer | \n",
"
\n",
"\n",
" from | \n",
" /train/n02102040/n02102040_3837.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.953837 | \n",
" /train/n02102040/n02102040_1564.JPEG | \n",
" English_springer | \n",
"
\n",
"\n",
" 0.893944 | \n",
" /train/n02102040/n02102040_3027.JPEG | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" tench | \n",
"
\n",
"\n",
" from | \n",
" /train/n01440764/n01440764_7457.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.953413 | \n",
" /train/n01440764/n01440764_11339.JPEG | \n",
" tench | \n",
"
\n",
"\n",
" 0.918778 | \n",
" /train/n01440764/n01440764_9315.JPEG | \n",
" tench | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" tench | \n",
"
\n",
"\n",
" from | \n",
" /train/n01440764/n01440764_11339.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.953413 | \n",
" /train/n01440764/n01440764_7457.JPEG | \n",
" tench | \n",
"
\n",
"\n",
" 0.889166 | \n",
" /train/n01440764/n01440764_12279.JPEG | \n",
" tench | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
"\n",
" from | \n",
" /train/n03417042/n03417042_1578.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.952239 | \n",
" /train/n03417042/n03417042_12906.JPEG | \n",
" garbage_truck | \n",
"
\n",
"\n",
" 0.837864 | \n",
" /val/n03417042/n03417042_9610.JPEG | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" garbage_truck | \n",
"
\n",
"\n",
" from | \n",
" /train/n03417042/n03417042_12906.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.952239 | \n",
" /train/n03417042/n03417042_1578.JPEG | \n",
" garbage_truck | \n",
"
\n",
"\n",
" 0.828749 | \n",
" /train/n03417042/n03417042_27686.JPEG | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" French_horn | \n",
"
\n",
"\n",
" from | \n",
" /val/n03394916/n03394916_6830.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.951679 | \n",
" /val/n03394916/n03394916_21092.JPEG | \n",
" French_horn | \n",
"
\n",
"\n",
" 0.893079 | \n",
" /train/n03394916/n03394916_35469.JPEG | \n",
" French_horn | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" French_horn | \n",
"
\n",
"\n",
" from | \n",
" /val/n03394916/n03394916_21092.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.951679 | \n",
" /val/n03394916/n03394916_6830.JPEG | \n",
" French_horn | \n",
"
\n",
"\n",
" 0.865771 | \n",
" /train/n03394916/n03394916_35469.JPEG | \n",
" French_horn | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" parachute | \n",
"
\n",
"\n",
" from | \n",
" /train/n03888257/n03888257_21027.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.950477 | \n",
" /val/n03888257/n03888257_11210.JPEG | \n",
" parachute | \n",
"
\n",
"\n",
" 0.92043 | \n",
" /val/n03888257/n03888257_12491.JPEG | \n",
" parachute | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" parachute | \n",
"
\n",
"\n",
" from | \n",
" /val/n03888257/n03888257_11210.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.950477 | \n",
" /train/n03888257/n03888257_21027.JPEG | \n",
" parachute | \n",
"
\n",
"\n",
" 0.865155 | \n",
" /val/n03888257/n03888257_12491.JPEG | \n",
" parachute | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" English_springer | \n",
"
\n",
"\n",
" from | \n",
" /train/n02102040/n02102040_6313.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.950173 | \n",
" /train/n02102040/n02102040_3767.JPEG | \n",
" English_springer | \n",
"
\n",
"\n",
" 0.947323 | \n",
" /val/n02102040/n02102040_350.JPEG | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" English_springer | \n",
"
\n",
"\n",
" from | \n",
" /train/n02102040/n02102040_3767.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.950173 | \n",
" /train/n02102040/n02102040_6313.JPEG | \n",
" English_springer | \n",
"
\n",
"\n",
" 0.914056 | \n",
" /val/n02102040/n02102040_350.JPEG | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" English_springer | \n",
"
\n",
"\n",
" from | \n",
" /train/n02102040/ILSVRC2012_val_00032959.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.949877 | \n",
" /val/n02102040/n02102040_662.JPEG | \n",
" English_springer | \n",
"
\n",
"\n",
" 0.933115 | \n",
" /train/n02102040/n02102040_3114.JPEG | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" English_springer | \n",
"
\n",
"\n",
" from | \n",
" /val/n02102040/n02102040_662.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.949877 | \n",
" /train/n02102040/ILSVRC2012_val_00032959.JPEG | \n",
" English_springer | \n",
"
\n",
"\n",
" 0.927345 | \n",
" /val/n02102040/n02102040_3502.JPEG | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" English_springer | \n",
"
\n",
"\n",
" from | \n",
" /train/n02102040/n02102040_3114.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.949252 | \n",
" /train/n02102040/n02102040_1306.JPEG | \n",
" English_springer | \n",
"
\n",
"\n",
" 0.941953 | \n",
" /train/n02102040/n02102040_1055.JPEG | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info From | \n",
"
\n",
"\n",
" label | \n",
" English_springer | \n",
"
\n",
"\n",
" from | \n",
" /train/n02102040/n02102040_1306.JPEG | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info To | \n",
"
\n",
"\n",
" 0.949252 | \n",
" /train/n02102040/n02102040_3114.JPEG | \n",
" English_springer | \n",
"
\n",
"\n",
" 0.936799 | \n",
" /train/n02102040/n02102040_876.JPEG | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tQuery Image | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\tSimilar | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t\t\t\t | \n",
"\t\t\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t\t\t\n",
"\t\t\t\t\t\t\t
\n",
"\t\t\t\t\t\t
\n",
"\t\t\t\t\t
\n",
"
\n",
" \n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" from | \n",
" to | \n",
" label | \n",
" label2 | \n",
" distance | \n",
"
\n",
" \n",
" \n",
" \n",
" 3630 | \n",
" imagenette2-160/train/n03394916/n03394916_44127.JPEG | \n",
" [imagenette2-160/val/n03394916/n03394916_30631.JPEG, imagenette2-160/train/n03394916/n03394916_36016.JPEG] | \n",
" [French_horn, French_horn] | \n",
" [French_horn, French_horn] | \n",
" [0.968786, 0.918324] | \n",
"
\n",
" \n",
" 7819 | \n",
" imagenette2-160/val/n03394916/n03394916_30631.JPEG | \n",
" [imagenette2-160/train/n03394916/n03394916_44127.JPEG, imagenette2-160/train/n03394916/n03394916_29969.JPEG] | \n",
" [French_horn, French_horn] | \n",
" [French_horn, French_horn] | \n",
" [0.968786, 0.903754] | \n",
"
\n",
" \n",
" 8751 | \n",
" imagenette2-160/val/n03445777/n03445777_6882.JPEG | \n",
" [imagenette2-160/train/n03445777/n03445777_13918.JPEG, imagenette2-160/val/n03445777/n03445777_5912.JPEG] | \n",
" [golf_ball, golf_ball] | \n",
" [golf_ball, golf_ball] | \n",
" [0.962459, 0.918005] | \n",
"
\n",
" \n",
" 5358 | \n",
" imagenette2-160/train/n03445777/n03445777_13918.JPEG | \n",
" [imagenette2-160/val/n03445777/n03445777_6882.JPEG, imagenette2-160/val/n03445777/n03445777_8820.JPEG] | \n",
" [golf_ball, golf_ball] | \n",
" [golf_ball, golf_ball] | \n",
" [0.962459, 0.91704] | \n",
"
\n",
" \n",
" 896 | \n",
" imagenette2-160/train/n02102040/n02102040_1564.JPEG | \n",
" [imagenette2-160/train/n02102040/n02102040_3837.JPEG, imagenette2-160/train/n02102040/n02102040_3586.JPEG] | \n",
" [English_springer, English_springer] | \n",
" [English_springer, English_springer] | \n",
" [0.953837, 0.908732] | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 5911 | \n",
" imagenette2-160/train/n03888257/n03888257_12816.JPEG | \n",
" [imagenette2-160/train/n03888257/n03888257_38633.JPEG] | \n",
" [parachute] | \n",
" [parachute] | \n",
" [0.800073] | \n",
"
\n",
" \n",
" 6219 | \n",
" imagenette2-160/train/n03888257/n03888257_38633.JPEG | \n",
" [imagenette2-160/train/n03888257/n03888257_12816.JPEG] | \n",
" [parachute] | \n",
" [parachute] | \n",
" [0.800073] | \n",
"
\n",
" \n",
" 4320 | \n",
" imagenette2-160/train/n03417042/n03417042_3236.JPEG | \n",
" [imagenette2-160/train/n03417042/n03417042_12297.JPEG] | \n",
" [garbage_truck] | \n",
" [garbage_truck] | \n",
" [0.800024] | \n",
"
\n",
" \n",
" 3429 | \n",
" imagenette2-160/train/n03394916/n03394916_32478.JPEG | \n",
" [imagenette2-160/train/n03394916/n03394916_35573.JPEG] | \n",
" [French_horn] | \n",
" [French_horn] | \n",
" [0.800012] | \n",
"
\n",
" \n",
" 7497 | \n",
" imagenette2-160/val/n03028079/n03028079_13002.JPEG | \n",
" [imagenette2-160/train/n03028079/n03028079_3839.JPEG] | \n",
" [church] | \n",
" [church] | \n",
" [0.800002] | \n",
"
\n",
" \n",
"
\n",
"
9057 rows × 5 columns
\n",
"
"
],
"text/plain": [
" from \\\n",
"3630 imagenette2-160/train/n03394916/n03394916_44127.JPEG \n",
"7819 imagenette2-160/val/n03394916/n03394916_30631.JPEG \n",
"8751 imagenette2-160/val/n03445777/n03445777_6882.JPEG \n",
"5358 imagenette2-160/train/n03445777/n03445777_13918.JPEG \n",
"896 imagenette2-160/train/n02102040/n02102040_1564.JPEG \n",
"... ... \n",
"5911 imagenette2-160/train/n03888257/n03888257_12816.JPEG \n",
"6219 imagenette2-160/train/n03888257/n03888257_38633.JPEG \n",
"4320 imagenette2-160/train/n03417042/n03417042_3236.JPEG \n",
"3429 imagenette2-160/train/n03394916/n03394916_32478.JPEG \n",
"7497 imagenette2-160/val/n03028079/n03028079_13002.JPEG \n",
"\n",
" to \\\n",
"3630 [imagenette2-160/val/n03394916/n03394916_30631.JPEG, imagenette2-160/train/n03394916/n03394916_36016.JPEG] \n",
"7819 [imagenette2-160/train/n03394916/n03394916_44127.JPEG, imagenette2-160/train/n03394916/n03394916_29969.JPEG] \n",
"8751 [imagenette2-160/train/n03445777/n03445777_13918.JPEG, imagenette2-160/val/n03445777/n03445777_5912.JPEG] \n",
"5358 [imagenette2-160/val/n03445777/n03445777_6882.JPEG, imagenette2-160/val/n03445777/n03445777_8820.JPEG] \n",
"896 [imagenette2-160/train/n02102040/n02102040_3837.JPEG, imagenette2-160/train/n02102040/n02102040_3586.JPEG] \n",
"... ... \n",
"5911 [imagenette2-160/train/n03888257/n03888257_38633.JPEG] \n",
"6219 [imagenette2-160/train/n03888257/n03888257_12816.JPEG] \n",
"4320 [imagenette2-160/train/n03417042/n03417042_12297.JPEG] \n",
"3429 [imagenette2-160/train/n03394916/n03394916_35573.JPEG] \n",
"7497 [imagenette2-160/train/n03028079/n03028079_3839.JPEG] \n",
"\n",
" label \\\n",
"3630 [French_horn, French_horn] \n",
"7819 [French_horn, French_horn] \n",
"8751 [golf_ball, golf_ball] \n",
"5358 [golf_ball, golf_ball] \n",
"896 [English_springer, English_springer] \n",
"... ... \n",
"5911 [parachute] \n",
"6219 [parachute] \n",
"4320 [garbage_truck] \n",
"3429 [French_horn] \n",
"7497 [church] \n",
"\n",
" label2 distance \n",
"3630 [French_horn, French_horn] [0.968786, 0.918324] \n",
"7819 [French_horn, French_horn] [0.968786, 0.903754] \n",
"8751 [golf_ball, golf_ball] [0.962459, 0.918005] \n",
"5358 [golf_ball, golf_ball] [0.962459, 0.91704] \n",
"896 [English_springer, English_springer] [0.953837, 0.908732] \n",
"... ... ... \n",
"5911 [parachute] [0.800073] \n",
"6219 [parachute] [0.800073] \n",
"4320 [garbage_truck] [0.800024] \n",
"3429 [French_horn] [0.800012] \n",
"7497 [church] [0.800002] \n",
"\n",
"[9057 rows x 5 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fd.vis.similarity_gallery() "
]
},
{
"cell_type": "markdown",
"id": "c2c393be-2b42-4814-8688-03d2be9e8998",
"metadata": {},
"source": [
"## Similar Image Pairs\n",
"\n",
"Find similar image pairs within and across the train and validation subfolders. Pairs may include train-train, train-val, val-train, and val-val."
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "9e065403-582b-4f94-855b-33fd8f4826a1",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 343.69it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Stored similarity visual view in fastdup_imagenette/galleries/duplicates.html\n"
]
},
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" Duplicates Report\n",
" \n",
" \n",
"\n",
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
Duplicates Report
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.968786 | \n",
"
\n",
"\n",
" From | \n",
" French_horn | \n",
"
\n",
"\n",
" To | \n",
" French_horn | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.962459 | \n",
"
\n",
"\n",
" From | \n",
" golf_ball | \n",
"
\n",
"\n",
" To | \n",
" golf_ball | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.953837 | \n",
"
\n",
"\n",
" From | \n",
" English_springer | \n",
"
\n",
"\n",
" To | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.953413 | \n",
"
\n",
"\n",
" From | \n",
" tench | \n",
"
\n",
"\n",
" To | \n",
" tench | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.952239 | \n",
"
\n",
"\n",
" From | \n",
" garbage_truck | \n",
"
\n",
"\n",
" To | \n",
" garbage_truck | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.951679 | \n",
"
\n",
"\n",
" From | \n",
" French_horn | \n",
"
\n",
"\n",
" To | \n",
" French_horn | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.950477 | \n",
"
\n",
"\n",
" From | \n",
" parachute | \n",
"
\n",
"\n",
" To | \n",
" parachute | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.950173 | \n",
"
\n",
"\n",
" From | \n",
" English_springer | \n",
"
\n",
"\n",
" To | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.949877 | \n",
"
\n",
"\n",
" From | \n",
" English_springer | \n",
"
\n",
"\n",
" To | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" Distance | \n",
" 0.949252 | \n",
"
\n",
"\n",
" From | \n",
" English_springer | \n",
"
\n",
"\n",
" To | \n",
" English_springer | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fd.vis.duplicates_gallery()"
]
},
{
"cell_type": "markdown",
"id": "e10989e1",
"metadata": {},
"source": [
"Show similar image pairs."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "3ea590e9-d221-4202-b03b-e5fef4487c89",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 270
},
"executionInfo": {
"elapsed": 499,
"status": "ok",
"timestamp": 1677667342908,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "3ea590e9-d221-4202-b03b-e5fef4487c89",
"outputId": "3c5f4cc0-0ba5-42a0-e01b-f165e9cf655c",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" from | \n",
" to | \n",
" distance | \n",
" img_filename_from | \n",
" label_from | \n",
" split_from | \n",
" error_code_from | \n",
" is_valid_from | \n",
" img_filename_to | \n",
" label_to | \n",
" split_to | \n",
" error_code_to | \n",
" is_valid_to | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 11521 | \n",
" 5390 | \n",
" 0.968786 | \n",
" val/n03394916/n03394916_30631.JPEG | \n",
" French_horn | \n",
" val | \n",
" VALID | \n",
" True | \n",
" train/n03394916/n03394916_44127.JPEG | \n",
" French_horn | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" 5390 | \n",
" 11521 | \n",
" 0.968786 | \n",
" train/n03394916/n03394916_44127.JPEG | \n",
" French_horn | \n",
" train | \n",
" VALID | \n",
" True | \n",
" val/n03394916/n03394916_30631.JPEG | \n",
" French_horn | \n",
" val | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" 12914 | \n",
" 7715 | \n",
" 0.962459 | \n",
" val/n03445777/n03445777_6882.JPEG | \n",
" golf_ball | \n",
" val | \n",
" VALID | \n",
" True | \n",
" train/n03445777/n03445777_13918.JPEG | \n",
" golf_ball | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 3 | \n",
" 7715 | \n",
" 12914 | \n",
" 0.962459 | \n",
" train/n03445777/n03445777_13918.JPEG | \n",
" golf_ball | \n",
" train | \n",
" VALID | \n",
" True | \n",
" val/n03445777/n03445777_6882.JPEG | \n",
" golf_ball | \n",
" val | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 4 | \n",
" 1117 | \n",
" 1404 | \n",
" 0.953837 | \n",
" train/n02102040/n02102040_1564.JPEG | \n",
" English_springer | \n",
" train | \n",
" VALID | \n",
" True | \n",
" train/n02102040/n02102040_3837.JPEG | \n",
" English_springer | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" from to distance img_filename_from \\\n",
"0 11521 5390 0.968786 val/n03394916/n03394916_30631.JPEG \n",
"1 5390 11521 0.968786 train/n03394916/n03394916_44127.JPEG \n",
"2 12914 7715 0.962459 val/n03445777/n03445777_6882.JPEG \n",
"3 7715 12914 0.962459 train/n03445777/n03445777_13918.JPEG \n",
"4 1117 1404 0.953837 train/n02102040/n02102040_1564.JPEG \n",
"\n",
" label_from split_from error_code_from is_valid_from \\\n",
"0 French_horn val VALID True \n",
"1 French_horn train VALID True \n",
"2 golf_ball val VALID True \n",
"3 golf_ball train VALID True \n",
"4 English_springer train VALID True \n",
"\n",
" img_filename_to label_to split_to \\\n",
"0 train/n03394916/n03394916_44127.JPEG French_horn train \n",
"1 val/n03394916/n03394916_30631.JPEG French_horn val \n",
"2 train/n03445777/n03445777_13918.JPEG golf_ball train \n",
"3 val/n03445777/n03445777_6882.JPEG golf_ball val \n",
"4 train/n02102040/n02102040_3837.JPEG English_springer train \n",
"\n",
" error_code_to is_valid_to \n",
"0 VALID True \n",
"1 VALID True \n",
"2 VALID True \n",
"3 VALID True \n",
"4 VALID True "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fd.similarity().head(5)"
]
},
{
"cell_type": "markdown",
"id": "95d21e6d-a951-48dd-8c4c-894c8ba556fd",
"metadata": {},
"source": [
"## Image Clusters"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "4a6db529-cb1e-4655-af50-d97f3e131319",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000,
"output_embedded_package_id": "1Wh1vmG-F-RG0ZYZP1oRgiyqHAtnfsuEk"
},
"executionInfo": {
"elapsed": 6376,
"status": "ok",
"timestamp": 1677667352994,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "4a6db529-cb1e-4655-af50-d97f3e131319",
"outputId": "adfc3ee1-84c9-4aa6-a0db-09a6a800b566",
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tench\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 97.47it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finished OK. Components are stored as image files fastdup_imagenette/galleries/components_[index].jpg\n",
"Stored components visual view in fastdup_imagenette/galleries/components.html\n",
"Execution time in seconds 2.3\n"
]
},
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" Components Report\n",
" Showing groups of similar images
\n",
" \n",
"\n",
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
Components Report
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 36 | \n",
"
\n",
"\n",
" num_images | \n",
" 24 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9003 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" tench | \n",
" 24 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 7332 | \n",
"
\n",
"\n",
" num_images | \n",
" 22 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9011 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" golf_ball | \n",
" 22 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 143 | \n",
"
\n",
"\n",
" num_images | \n",
" 16 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9003 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" tench | \n",
" 16 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 6 | \n",
"
\n",
"\n",
" num_images | \n",
" 13 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9023 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" tench | \n",
" 13 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 10 | \n",
"
\n",
"\n",
" num_images | \n",
" 11 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9065 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" tench | \n",
" 11 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 4589 | \n",
"
\n",
"\n",
" num_images | \n",
" 11 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9005 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" French_horn | \n",
" 11 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 900 | \n",
"
\n",
"\n",
" num_images | \n",
" 10 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9018 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" English_springer | \n",
" 10 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 5491 | \n",
"
\n",
"\n",
" num_images | \n",
" 10 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9001 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" garbage_truck | \n",
" 10 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 150 | \n",
"
\n",
"\n",
" num_images | \n",
" 10 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9032 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" tench | \n",
" 10 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 7341 | \n",
"
\n",
"\n",
" num_images | \n",
" 9 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9112 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" golf_ball | \n",
" 9 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 7355 | \n",
"
\n",
"\n",
" num_images | \n",
" 8 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9057 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" golf_ball | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 5478 | \n",
"
\n",
"\n",
" num_images | \n",
" 8 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9025 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" garbage_truck | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 151 | \n",
"
\n",
"\n",
" num_images | \n",
" 7 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9006 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" tench | \n",
" 7 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 902 | \n",
"
\n",
"\n",
" num_images | \n",
" 7 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9044 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" English_springer | \n",
" 7 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 4571 | \n",
"
\n",
"\n",
" num_images | \n",
" 6 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9038 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" French_horn | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 41 | \n",
"
\n",
"\n",
" num_images | \n",
" 6 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9007 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" tench | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 5718 | \n",
"
\n",
"\n",
" num_images | \n",
" 6 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9043 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" garbage_truck | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 917 | \n",
"
\n",
"\n",
" num_images | \n",
" 5 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9037 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" English_springer | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 8448 | \n",
"
\n",
"\n",
" num_images | \n",
" 5 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9004 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" parachute | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 218 | \n",
"
\n",
"\n",
" num_images | \n",
" 5 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" tench | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fd.vis.component_gallery()"
]
},
{
"cell_type": "markdown",
"id": "ca5d4b6e-7ff6-49b8-b487-6ba1573ab104",
"metadata": {},
"source": [
"You can also visualize clusters with specific labels using the `slice` parameter. For example let's visualize clusters with the `chain_saw` label"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "4b38dacf-becc-4631-9aeb-6fe9bd235aa1",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000,
"output_embedded_package_id": "1xYIrPsODG8kAMaZOpGeKNRoa4-HjPC-w"
},
"executionInfo": {
"elapsed": 5130,
"status": "ok",
"timestamp": 1677667368207,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "4b38dacf-becc-4631-9aeb-6fe9bd235aa1",
"outputId": "131d0f11-5627-4beb-b58c-3801e09a3b42",
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"chain_saw\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 313.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finished OK. Components are stored as image files fastdup_imagenette/galleries/components_[index].jpg\n",
"Stored components visual view in fastdup_imagenette/galleries/components.html\n",
"Execution time in seconds 1.4\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" Components Report\n",
" , slice: chain_saw
\n",
" \n",
"\n",
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
Components Report
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 2953 | \n",
"
\n",
"\n",
" num_images | \n",
" 3 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9064 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 2875 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9029 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 2891 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9208 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 2939 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9222 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 3065 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9139 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 3068 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9198 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 3077 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9073 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 3078 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9192 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 3153 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9355 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 3381 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9345 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 10340 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9039 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" chain_saw | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fd.vis.component_gallery(slice='chain_saw')"
]
},
{
"cell_type": "markdown",
"id": "28498d81-d073-4f3d-baa4-732e1df93a34",
"metadata": {},
"source": [
"## Connected Components"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "0346be91-5380-48b9-a8df-074c342efcd3",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"executionInfo": {
"elapsed": 1036,
"status": "ok",
"timestamp": 1677667380699,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "0346be91-5380-48b9-a8df-074c342efcd3",
"outputId": "ffa6bd9d-b5b3-4ed5-86e1-c47ca9658667",
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fastdup_id | \n",
" component_id | \n",
" sum | \n",
" count | \n",
" mean_distance | \n",
" min_distance | \n",
" max_distance | \n",
" img_filename | \n",
" label | \n",
" split | \n",
" error_code | \n",
" is_valid | \n",
"
\n",
" \n",
" \n",
" \n",
" 7778 | \n",
" 7778 | \n",
" 7332 | \n",
" 36.6734 | \n",
" 40.0 | \n",
" 0.9168 | \n",
" 0.9011 | \n",
" 0.9328 | \n",
" train/n03445777/n03445777_16186.JPEG | \n",
" golf_ball | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 7990 | \n",
" 7990 | \n",
" 7332 | \n",
" 36.6734 | \n",
" 40.0 | \n",
" 0.9168 | \n",
" 0.9011 | \n",
" 0.9328 | \n",
" train/n03445777/n03445777_3503.JPEG | \n",
" golf_ball | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 682 | \n",
" 682 | \n",
" 36 | \n",
" 36.5815 | \n",
" 40.0 | \n",
" 0.9145 | \n",
" 0.9003 | \n",
" 0.9339 | \n",
" train/n01440764/n01440764_6159.JPEG | \n",
" tench | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 9545 | \n",
" 9545 | \n",
" 36 | \n",
" 36.5815 | \n",
" 40.0 | \n",
" 0.9145 | \n",
" 0.9003 | \n",
" 0.9339 | \n",
" val/n01440764/n01440764_12250.JPEG | \n",
" tench | \n",
" val | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
" 7651 | \n",
" 7651 | \n",
" 7332 | \n",
" 36.6734 | \n",
" 40.0 | \n",
" 0.9168 | \n",
" 0.9011 | \n",
" 0.9328 | \n",
" train/n03445777/n03445777_11389.JPEG | \n",
" golf_ball | \n",
" train | \n",
" VALID | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fastdup_id component_id sum count mean_distance min_distance \\\n",
"7778 7778 7332 36.6734 40.0 0.9168 0.9011 \n",
"7990 7990 7332 36.6734 40.0 0.9168 0.9011 \n",
"682 682 36 36.5815 40.0 0.9145 0.9003 \n",
"9545 9545 36 36.5815 40.0 0.9145 0.9003 \n",
"7651 7651 7332 36.6734 40.0 0.9168 0.9011 \n",
"\n",
" max_distance img_filename label split \\\n",
"7778 0.9328 train/n03445777/n03445777_16186.JPEG golf_ball train \n",
"7990 0.9328 train/n03445777/n03445777_3503.JPEG golf_ball train \n",
"682 0.9339 train/n01440764/n01440764_6159.JPEG tench train \n",
"9545 0.9339 val/n01440764/n01440764_12250.JPEG tench val \n",
"7651 0.9328 train/n03445777/n03445777_11389.JPEG golf_ball train \n",
"\n",
" error_code is_valid \n",
"7778 VALID True \n",
"7990 VALID True \n",
"682 VALID True \n",
"9545 VALID True \n",
"7651 VALID True "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cc_df, _ = fd.connected_components()\n",
"cc_df.sort_values('count', ascending=False).head(5)"
]
},
{
"cell_type": "markdown",
"id": "569cb878",
"metadata": {},
"source": [
"We can also get metadata for individual images using their `fastdup_id` available in `fd.annotations()`"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "e80d6817-fed6-4fa4-8714-b01214e0d3f8",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 990,
"status": "ok",
"timestamp": 1677667384644,
"user": {
"displayName": "Tom Shani",
"userId": "00667426488827942961"
},
"user_tz": -120
},
"id": "e80d6817-fed6-4fa4-8714-b01214e0d3f8",
"outputId": "4f973aba-572d-4e50-d22d-c5bfc8cf3d2d",
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"{'img_filename': 'train/n01440764/n01440764_17789.JPEG',\n",
" 'label': 'tench',\n",
" 'split': 'train',\n",
" 'fastdup_id': 349,\n",
" 'error_code': 'VALID',\n",
" 'is_valid': True}"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fd[349]"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}