{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "8ecd1bd3-0374-4aa0-b14e-0a9a556a5bdc",
"metadata": {},
"source": [
"# Investigating BLIP model performance with fastdup\n",
"\n",
"Score images with BLIP"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "4ecd8ff3-eb15-41ba-b18d-12c5693b138f",
"metadata": {},
"outputs": [],
"source": [
"from tqdm import tqdm\n",
"from transformers import BlipProcessor, BlipForConditionalGeneration\n",
"from PIL import Image\n",
"import cv2\n",
"import numpy as np \n",
"processor = BlipProcessor.from_pretrained(\"Salesforce/blip-image-captioning-large\")\n",
"model = BlipForConditionalGeneration.from_pretrained(\"Salesforce/blip-image-captioning-large\")\n",
" \n",
"def generate_blip_labels(filenames, kwargs):\n",
" #print('got files', filenames)\n",
" try:\n",
" \n",
" preds = []\n",
" images = []\n",
" for image_path in filenames:\n",
" i_image = Image.open(image_path)\n",
" if i_image is not None:\n",
" i_image = cv2.cvtColor(np.array(i_image), cv2.COLOR_BGR2RGB)\n",
" im_pil = Image.fromarray(i_image)\n",
" images.append(im_pil)\n",
" else:\n",
" print('Non image' + image_path)\n",
" \n",
" inputs = processor(images, return_tensors=\"pt\")\n",
" out = model.generate(**inputs)\n",
" for i in range(len(out)):\n",
" preds.append((processor.decode(out[i], skip_special_tokens=True)))\n",
" return preds\n",
" except Exception as e:\n",
" print(e)\n",
" #fastdup_capture_exception(\"Auto caption image blip\", e)\n",
" return None\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "158e396a-e4ab-48ff-8bca-fce09cd2ce4c",
"metadata": {},
"outputs": [],
"source": [
"files=!find laion_10K/ -name '*.jpg'"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "19577552-d7ba-4716-bc35-ac4bcc2ebc8a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████| 1000/1000 [2:57:09<00:00, 10.63s/it]\n"
]
}
],
"source": [
"from tqdm import tqdm\n",
"images = []\n",
"out = []\n",
"for i in tqdm(range(len(files)//10)):\n",
" curfiles = files[i*10:i*10+10]\n",
" curout = generate_blip_labels(curfiles, {})\n",
" images.extend(curfiles)\n",
" out.extend(curout)\n",
"import pandas as pd\n",
"import fastdup\n",
"df = pd.DataFrame({'from':images, 'to':images, 'label':out})\n",
"df.to_csv('all_labels')\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "12887512-04f2-4c53-8f26-32a986fa0493",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" from | \n",
" to | \n",
" label | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" laion_10K//001025133.jpg | \n",
" laion_10K//001025133.jpg | \n",
" a close up of a coat on a mannequin dummy | \n",
"
\n",
" \n",
" 1 | \n",
" laion_10K//000843093.jpg | \n",
" laion_10K//000843093.jpg | \n",
" a close up of a dress on a mannequin mannequin | \n",
"
\n",
" \n",
" 2 | \n",
" laion_10K//000157698.jpg | \n",
" laion_10K//000157698.jpg | \n",
" a close up of a soccer game with a large crowd watching | \n",
"
\n",
" \n",
" 3 | \n",
" laion_10K//000927273.jpg | \n",
" laion_10K//000927273.jpg | \n",
" asthma and allergies laura b both extension associate, environmental health education alabama cooperative extension system auburn | \n",
"
\n",
" \n",
" 4 | \n",
" laion_10K//000499672.jpg | \n",
" laion_10K//000499672.jpg | \n",
" a close up of a dress on a mannequin mannequin mannequin manne | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" from ... label\n",
"0 laion_10K//001025133.jpg ... a close up of a coat on a mannequin dummy\n",
"1 laion_10K//000843093.jpg ... a close up of a dress on a mannequin mannequin\n",
"2 laion_10K//000157698.jpg ... a close up of a soccer game with a large crowd watching\n",
"3 laion_10K//000927273.jpg ... asthma and allergies laura b both extension associate, environmental health education alabama cooperative extension system auburn\n",
"4 laion_10K//000499672.jpg ... a close up of a dress on a mannequin mannequin mannequin manne\n",
"\n",
"[5 rows x 3 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "e9bc7e71-8ebe-4109-ad92-bf57d9d4d769",
"metadata": {},
"source": [
"# Run fastdup to cluster images"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "b9a77582-f186-4aa9-b14e-26f97a5dc646",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n",
"2023-04-14 16:50:21 [INFO] Going to loop over dir laion_10k\n",
"2023-04-14 16:50:21 [INFO] Found total 10000 images to run on, 10000 train, 0 test, name list 10000, counter 10000 \n",
"2023-04-14 16:50:33 [INFO] Found total 10000 images to run on\n",
"2023-04-14 16:50:35 [INFO] 1597) Finished write_index() NN model\n",
"2023-04-14 16:50:35 [INFO] Stored nn model index file laion_10k_out/nnf.index\n",
"2023-04-14 16:50:35 [INFO] Total time took 14468 ms\n",
"2023-04-14 16:50:35 [INFO] Found a total of 45 fully identical images (d>0.990), which are 0.15 %\n",
"2023-04-14 16:50:35 [INFO] Found a total of 2 nearly identical images(d>0.980), which are 0.01 %\n",
"2023-04-14 16:50:35 [INFO] Found a total of 525 above threshold images (d>0.900), which are 1.75 %\n",
"2023-04-14 16:50:35 [INFO] Found a total of 1000 outlier images (d<0.050), which are 3.33 %\n",
"2023-04-14 16:50:35 [INFO] Min distance found 0.468 max distance 1.000\n",
"2023-04-14 16:50:35 [INFO] Running connected components for ccthreshold 0.960000 \n",
".0\n",
" ########################################################################################\n",
"\n",
"Dataset Analysis Summary: \n",
"\n",
" Dataset contains 10000 images\n",
" Valid images are 100.00% (10,000) of the data, invalid are 0.00% (0) of the data\n",
" Similarity: 0.25% (25) belong to 6 similarity clusters (components).\n",
" 99.75% (9,975) images do not belong to any similarity cluster.\n",
" Largest cluster has 10 (0.10%) images.\n",
" For a detailed analysis, use `.connected_components()`\n",
"(similarity threshold used is 0.9, connected component threshold used is 0.96).\n",
"\n",
" Outliers: 6.59% (659) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n",
" For a detailed list of outliers, use `.outliers()`.\n"
]
}
],
"source": [
"import fastdup\n",
"fd = fastdup.create(input_dir='laion_10k', work_dir='laion_10k_out')\n",
"fd.run(overwrite=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "a1771e81-7aba-4843-b723-e59aaeb34ac1",
"metadata": {},
"outputs": [],
"source": [
"comps = fastdup.find_top_components('laion_10k_out')\n"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "b6629a00-88a3-4f8b-b540-166755b49f0f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" from | \n",
" to | \n",
" label | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" laion_10K/001025133.jpg | \n",
" laion_10K//001025133.jpg | \n",
" a close up of a coat on a mannequin dummy | \n",
"
\n",
" \n",
" 1 | \n",
" laion_10K/000843093.jpg | \n",
" laion_10K//000843093.jpg | \n",
" a close up of a dress on a mannequin mannequin | \n",
"
\n",
" \n",
" 2 | \n",
" laion_10K/000157698.jpg | \n",
" laion_10K//000157698.jpg | \n",
" a close up of a soccer game with a large crowd watching | \n",
"
\n",
" \n",
" 3 | \n",
" laion_10K/000927273.jpg | \n",
" laion_10K//000927273.jpg | \n",
" asthma and allergies laura b both extension associate, environmental health education alabama cooperative extension system auburn | \n",
"
\n",
" \n",
" 4 | \n",
" laion_10K/000499672.jpg | \n",
" laion_10K//000499672.jpg | \n",
" a close up of a dress on a mannequin mannequin mannequin manne | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" from ... label\n",
"0 laion_10K/001025133.jpg ... a close up of a coat on a mannequin dummy\n",
"1 laion_10K/000843093.jpg ... a close up of a dress on a mannequin mannequin\n",
"2 laion_10K/000157698.jpg ... a close up of a soccer game with a large crowd watching\n",
"3 laion_10K/000927273.jpg ... asthma and allergies laura b both extension associate, environmental health education alabama cooperative extension system auburn\n",
"4 laion_10K/000499672.jpg ... a close up of a dress on a mannequin mannequin mannequin manne\n",
"\n",
"[5 rows x 3 columns]"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['from'] = df['from'].apply(lambda x: x.replace('//', '/'))\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "b643b8d3-52db-4724-ac07-036647076364",
"metadata": {},
"outputs": [],
"source": [
"label_dict = pd.Series(df.label.values,index=df2['from']).to_dict()"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "41056171-43d8-4efb-8183-f99fb5019ff5",
"metadata": {},
"outputs": [],
"source": [
"comps['label'] = comps['files'].apply(lambda x: [label_dict.get(y.replace('laion_10k', 'laion_10K'),'N/A') for y in x])"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "0c051fa5-4bd9-4255-af99-eec383162177",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" component_id | \n",
" files | \n",
" files_ids | \n",
" distance | \n",
" len | \n",
" label | \n",
"
\n",
" \n",
" \n",
" \n",
" 1364 | \n",
" 1364 | \n",
" [laion_10k/000150342.jpg, laion_10k/000295438.jpg, laion_10k/000917354.jpg, laion_10k/001109068.jpg, laion_10k/001120690.jpg] | \n",
" [1364, 2672, 8082, 9813, 9922] | \n",
" 0.9607 | \n",
" 5 | \n",
" [a resume template for a medical assistant, a resume template for a job in a computer engineering department, a resume template for a job in a computer science department, a document with a black and white image of a man in a suit, a resume template for a job in a computer science department] | \n",
"
\n",
" \n",
" 1608 | \n",
" 1608 | \n",
" [laion_10k/000174292.jpg, laion_10k/000177292.jpg, laion_10k/000251959.jpg, laion_10k/000685365.jpg, laion_10k/000713476.jpg] | \n",
" [1608, 1630, 2314, 6096, 6343] | \n",
" 0.9601 | \n",
" 5 | \n",
" [a white paper with a black and red text that says result of cash flow, a slide from a presentation on the effects of the production of a product, a white screen with a red and black text that says advantages and limitationss of ctr, a white paper with a black and white image of a cell phone, a white paper with a black and white image of a man in a hat] | \n",
"
\n",
" \n",
" 401 | \n",
" 401 | \n",
" [laion_10k/000043483.jpg, laion_10k/000713727.jpg, laion_10k/000866598.jpg] | \n",
" [401, 6346, 7684] | \n",
" 0.9623 | \n",
" 3 | \n",
" [a black t - shirt with the letter n in the center, a black polo shirt with a blue and white design on the chest, a black t - shirt with the words quarannted with somebody soon on it] | \n",
"
\n",
" \n",
" 659 | \n",
" 659 | \n",
" [laion_10k/000072839.jpg, laion_10k/000418741.jpg, laion_10k/000693449.jpg] | \n",
" [659, 3740, 6169] | \n",
" 1.0000 | \n",
" 3 | \n",
" [a white and blue wall hanging with a clock on it, a white and blue wall hanging with a clock on it, a white and blue wall hanging with a clock on it] | \n",
"
\n",
" \n",
" 887 | \n",
" 887 | \n",
" [laion_10k/000098275.jpg, laion_10k/000338557.jpg, laion_10k/000886819.jpg] | \n",
" [887, 3056, 7826] | \n",
" 0.9660 | \n",
" 3 | \n",
" [wittg - who is this guy?, ncdc - national climate data center, qfe - quarterly ceo forecast] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" component_id ... label\n",
"1364 1364 ... [a resume template for a medical assistant, a resume template for a job in a computer engineering department, a resume template for a job in a computer science department, a document with a black and white image of a man in a suit, a resume template for a job in a computer science department]\n",
"1608 1608 ... [a white paper with a black and red text that says result of cash flow, a slide from a presentation on the effects of the production of a product, a white screen with a red and black text that says advantages and limitationss of ctr, a white paper with a black and white image of a cell phone, a white paper with a black and white image of a man in a hat]\n",
"401 401 ... [a black t - shirt with the letter n in the center, a black polo shirt with a blue and white design on the chest, a black t - shirt with the words quarannted with somebody soon on it]\n",
"659 659 ... [a white and blue wall hanging with a clock on it, a white and blue wall hanging with a clock on it, a white and blue wall hanging with a clock on it]\n",
"887 887 ... [wittg - who is this guy?, ncdc - national climate data center, qfe - quarterly ceo forecast]\n",
"\n",
"[5 rows x 6 columns]"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"comps.head()"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "28efa9ae-479b-4945-9b34-e59d0034fa41",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 297.36it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Finished OK. Components are stored as image files laion_10k_out/galleries/components_[index].jpg\n",
"Stored components visual view in laion_10k_out/galleries/components.html\n",
"Execution time in seconds 0.1\n"
]
},
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" Components Report\n",
" \n",
" \n",
"\n",
"\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
Components Report
Showing groups of similar images
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 1364 | \n",
"
\n",
"\n",
" num_images | \n",
" 5 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9607 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a resume template for a job in a computer science department | \n",
" 2 | \n",
"
\n",
"\n",
" a document with a black and white image of a man in a suit | \n",
" 1 | \n",
"
\n",
"\n",
" a resume template for a job in a computer engineering department | \n",
" 1 | \n",
"
\n",
"\n",
" a resume template for a medical assistant | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 1608 | \n",
"
\n",
"\n",
" num_images | \n",
" 5 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9601 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a slide from a presentation on the effects of the production of a product | \n",
" 1 | \n",
"
\n",
"\n",
" a white paper with a black and red text that says result of cash flow | \n",
" 1 | \n",
"
\n",
"\n",
" a white paper with a black and white image of a cell phone | \n",
" 1 | \n",
"
\n",
"\n",
" a white paper with a black and white image of a man in a hat | \n",
" 1 | \n",
"
\n",
"\n",
" a white screen with a red and black text that says advantages and limitationss of ctr | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 401 | \n",
"
\n",
"\n",
" num_images | \n",
" 3 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9623 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a black polo shirt with a blue and white design on the chest | \n",
" 1 | \n",
"
\n",
"\n",
" a black t - shirt with the letter n in the center | \n",
" 1 | \n",
"
\n",
"\n",
" a black t - shirt with the words quarannted with somebody soon on it | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 659 | \n",
"
\n",
"\n",
" num_images | \n",
" 3 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a white and blue wall hanging with a clock on it | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 887 | \n",
"
\n",
"\n",
" num_images | \n",
" 3 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.966 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" ncdc - national climate data center | \n",
" 1 | \n",
"
\n",
"\n",
" qfe - quarterly ceo forecast | \n",
" 1 | \n",
"
\n",
"\n",
" wittg - who is this guy? | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 948 | \n",
"
\n",
"\n",
" num_images | \n",
" 3 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" the north face men's t shirt size medium s | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 55 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9773 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" 2018 ski - doo renegade x 850 e - tec es in massapeb, pennsylvania | \n",
" 1 | \n",
"
\n",
"\n",
" a black snowmobiler parked on a white surface | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 2956 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9683 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a table with a chart of different types of forecast | \n",
" 1 | \n",
"
\n",
"\n",
" a table with a chart of the different stocks and options | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 4500 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9622 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a red t - shirt with the words you are here on it | \n",
" 1 | \n",
"
\n",
"\n",
" a women's v - neck shirt with the words headsburg california on it | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 4410 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9655 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a close up of a pot with a handle and a strainer | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 3964 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" whirk electric double oven with built in microwave | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 3388 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" article submission 7000 directorys | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 3230 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9648 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a black and white photo of a shoe with a caption on it | \n",
" 1 | \n",
"
\n",
"\n",
" a blue ribbon is the symbol of a cancer survivor apron | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 1706 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9892 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a black ski - doo snowmobile parked on a white surface | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 2343 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" there is a man riding a surfboard on the beach | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 2120 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9799 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a picture of a white board with a green background and a questionnaire | \n",
" 1 | \n",
"
\n",
"\n",
" a white board with a green background and a green and white sign | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 84 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9747 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" fairview shores, fl real estate house value index | \n",
" 1 | \n",
"
\n",
"\n",
" phoenixville, pa real estate house value index trend | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 1003 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9733 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a resume template for a job that is a good example of a | \n",
" 1 | \n",
"
\n",
"\n",
" a resume template for an investment assistant | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 613 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9775 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" 1080 w lakeview drive irving, tx 7507 - photo 2 | \n",
" 1 | \n",
"
\n",
"\n",
" a large house with a lot of windows and a lot of grass | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Info | \n",
"
\n",
"\n",
" component | \n",
" 6496 | \n",
"
\n",
"\n",
" num_images | \n",
" 2 | \n",
"
\n",
"\n",
" mean_distance | \n",
" 0.9625 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
" \n",
" Label | \n",
"
\n",
"\n",
" a paper with a questionnaire for a research paper | \n",
" 1 | \n",
"
\n",
"\n",
" a resume template for a job in a computer engineering department | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n",
" \n",
"
\n",
" \n",
" \n",
" \n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from IPython.display import HTML\n",
"fd.vis.component_gallery(external_df=comps, label_col='baby') "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d112168-07d4-488b-9587-1c088a7f0476",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}