{"cells":[{"cell_type":"markdown","source":["# Cleaning and preparing an image dataset using fastdup V1.0"],"metadata":{"id":"LCLC7GEmzriP"},"id":"LCLC7GEmzriP"},{"cell_type":"code","execution_count":null,"id":"58a77ca9-6df1-4ac1-b041-fdc85ad59ddb","metadata":{"id":"58a77ca9-6df1-4ac1-b041-fdc85ad59ddb"},"outputs":[],"source":["# download fastdup\n","!pip install pip -U\n","!pip install fastdup\n","!pip install pandas\n","!pip install matplotlib\n","!pip install wurlitzer\n","%load_ext wurlitzer"]},{"cell_type":"code","execution_count":2,"id":"6a49b5eb","metadata":{"id":"6a49b5eb","executionInfo":{"status":"ok","timestamp":1677668109538,"user_tz":-120,"elapsed":2034,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[],"source":["import fastdup\n","import pandas as pd"]},{"cell_type":"markdown","id":"ff4dfa80-d1e4-46d1-ae10-e8715c16bb07","metadata":{"id":"ff4dfa80-d1e4-46d1-ae10-e8715c16bb07"},"source":["# Download food-101 Dataset"]},{"cell_type":"code","execution_count":1,"id":"fddb8af6","metadata":{"id":"fddb8af6","outputId":"562c7636-b0bb-4bdb-c282-b229ffeaaf85","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1677653732682,"user_tz":-120,"elapsed":349477,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["--2023-03-01 06:49:43-- http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz\n","Resolving data.vision.ee.ethz.ch (data.vision.ee.ethz.ch)... 129.132.52.178, 2001:67c:10ec:36c2::178\n","Connecting to data.vision.ee.ethz.ch (data.vision.ee.ethz.ch)|129.132.52.178|:80... connected.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz [following]\n","--2023-03-01 06:49:43-- https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz\n","Connecting to data.vision.ee.ethz.ch (data.vision.ee.ethz.ch)|129.132.52.178|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 4996278331 (4.7G) [application/x-gzip]\n","Saving to: ‘food-101.tar.gz’\n","\n","food-101.tar.gz 100%[===================>] 4.65G 18.6MB/s in 4m 22s \n","\n","2023-03-01 06:54:06 (18.2 MB/s) - ‘food-101.tar.gz’ saved [4996278331/4996278331]\n","\n"]}],"source":["!wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz\n","!tar -xf food-101.tar.gz"]},{"cell_type":"markdown","id":"7e2e70a3","metadata":{"tags":[],"id":"7e2e70a3"},"source":["# Run fastdup"]},{"cell_type":"code","execution_count":3,"id":"b0108828-f2ee-435a-8ddb-2344ddcfbd4d","metadata":{"id":"b0108828-f2ee-435a-8ddb-2344ddcfbd4d","executionInfo":{"status":"ok","timestamp":1677668113537,"user_tz":-120,"elapsed":443,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[],"source":["images_dir = 'food-101/images/'\n","work_dir = 'fastdup_food101'"]},{"cell_type":"code","execution_count":4,"id":"2f7632e1","metadata":{"scrolled":true,"tags":[],"id":"2f7632e1","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1677670755290,"user_tz":-120,"elapsed":2639768,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}},"outputId":"f614f0d7-1606-41fa-8a8f-9f935dbe8256"},"outputs":[{"output_type":"stream","name":"stdout","text":["FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n","2023-03-01 10:55:15 [INFO] Going to loop over dir food-101/images\n","2023-03-01 10:55:22 [INFO] Found total 101000 images to run on\n","2023-03-01 11:26:06 [INFO] Found total 101000 images to run on\n","2023-03-01 11:36:56 [INFO] 649444) Finished write_index() NN model\n","2023-03-01 11:36:56 [INFO] Stored nn model index file fastdup_food101/nnf.index\n","2023-03-01 11:39:04 [INFO] Total time took 2622423 ms\n","2023-03-01 11:39:04 [INFO] Found a total of 170 fully identical images (d>0.990), which are 0.06 %\n","2023-03-01 11:39:04 [INFO] Found a total of 88 nearly identical images(d>0.980), which are 0.03 %\n","2023-03-01 11:39:04 [INFO] Found a total of 5236 above threshold images (d>0.900), which are 1.73 %\n","2023-03-01 11:39:04 [INFO] Found a total of 10100 outlier images (d<0.050), which are 3.33 %\n","2023-03-01 11:39:04 [INFO] Min distance found 0.379 max distance 1.000\n","2023-03-01 11:39:04 [INFO] Running connected components for ccthreshold 0.960000 \n",".0\n"," ########################################################################################\n","\n","Dataset Analysis Summary: \n","\n"," Dataset contains 101000 images\n"," Valid images are 100.00% (101,000) of the data, invalid are 0.00% (0) of the data\n"," Similarity: 0.23% (228) belong to 3 similarity clusters (components).\n"," 99.77% (100,772) images do not belong to any similarity cluster.\n"," Largest cluster has 6 (0.01%) images.\n"," For a detailed analysis, use `.connected_components()`\n","(similarity threshold used is 0.9, connected component threshold used is 0.96).\n","\n"," Outliers: 5.97% (6,028) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n"," For a detailed list of outliers, use `.outliers(data=True)`.\n"]}],"source":["fd = fastdup.create(work_dir=work_dir, input_dir=images_dir)\n","fd.run()"]},{"cell_type":"markdown","id":"d4ddd8be","metadata":{"tags":[],"id":"d4ddd8be"},"source":["# Find duplicates"]},{"cell_type":"code","execution_count":5,"id":"8aaa2a06","metadata":{"scrolled":true,"id":"8aaa2a06","colab":{"base_uri":"https://localhost:8080/","height":1000,"output_embedded_package_id":"1tgMsme5DD-GtqDFvk-AqD924HaQ_-V9k"},"executionInfo":{"status":"ok","timestamp":1677671278321,"user_tz":-120,"elapsed":16870,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}},"outputId":"ec92d6b4-a937-4736-afa6-f9f5e8cdc423"},"outputs":[{"output_type":"display_data","data":{"text/plain":"Output hidden; open in https://colab.research.google.com to view."},"metadata":{}}],"source":["# visualize clusters of duplicate images\n","fd.vis.component_gallery(max_width=800)"]},{"cell_type":"code","execution_count":6,"id":"e266e5fe","metadata":{"id":"e266e5fe","outputId":"f839fac3-5412-45ba-df5d-abfe027dd842","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1677671288349,"user_tz":-120,"elapsed":444,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" fastdup_id component_id sum count mean_distance min_distance \\\n","19859 19859 19811 5.8648 6.0 0.9775 0.9721 \n","21874 21874 21803 6.0000 6.0 1.0000 1.0000 \n","21854 21854 21803 6.0000 6.0 1.0000 1.0000 \n","19862 19862 19811 5.8648 6.0 0.9775 0.9721 \n","19861 19861 19811 5.8648 6.0 0.9775 0.9721 \n","\n"," max_distance img_filename error_code is_valid \n","19859 0.9856 chicken_quesadilla/535057.jpg VALID True \n","21874 1.0000 chocolate_cake/55122.jpg VALID True \n","21854 1.0000 chocolate_cake/49494.jpg VALID True \n","19862 0.9856 chicken_quesadilla/535546.jpg VALID True \n","19861 0.9856 chicken_quesadilla/535532.jpg VALID True "],"text/html":["\n","
\n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," fastdup_id \n"," component_id \n"," sum \n"," count \n"," mean_distance \n"," min_distance \n"," max_distance \n"," img_filename \n"," error_code \n"," is_valid \n"," \n"," \n"," \n"," \n"," 19859 \n"," 19859 \n"," 19811 \n"," 5.8648 \n"," 6.0 \n"," 0.9775 \n"," 0.9721 \n"," 0.9856 \n"," chicken_quesadilla/535057.jpg \n"," VALID \n"," True \n"," \n"," \n"," 21874 \n"," 21874 \n"," 21803 \n"," 6.0000 \n"," 6.0 \n"," 1.0000 \n"," 1.0000 \n"," 1.0000 \n"," chocolate_cake/55122.jpg \n"," VALID \n"," True \n"," \n"," \n"," 21854 \n"," 21854 \n"," 21803 \n"," 6.0000 \n"," 6.0 \n"," 1.0000 \n"," 1.0000 \n"," 1.0000 \n"," chocolate_cake/49494.jpg \n"," VALID \n"," True \n"," \n"," \n"," 19862 \n"," 19862 \n"," 19811 \n"," 5.8648 \n"," 6.0 \n"," 0.9775 \n"," 0.9721 \n"," 0.9856 \n"," chicken_quesadilla/535546.jpg \n"," VALID \n"," True \n"," \n"," \n"," 19861 \n"," 19861 \n"," 19811 \n"," 5.8648 \n"," 6.0 \n"," 0.9775 \n"," 0.9721 \n"," 0.9856 \n"," chicken_quesadilla/535532.jpg \n"," VALID \n"," True \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":6}],"source":["# get a list of clusters with duplicate images using connected components\n","cc_df, _ = fd.connected_components()\n","cc_df[cc_df['count'] > 0.0].sort_values(by=['count'], ascending=False).head()"]},{"cell_type":"markdown","id":"36976bdb-9875-441e-a041-5e1e1304d99b","metadata":{"id":"36976bdb-9875-441e-a041-5e1e1304d99b"},"source":["## Connected Components Grouping"]},{"cell_type":"code","execution_count":7,"id":"168bc163","metadata":{"id":"168bc163","executionInfo":{"status":"ok","timestamp":1677671291446,"user_tz":-120,"elapsed":425,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[],"source":["# a function to group connected components\n","def get_clusters(df, sort_by='count', min_count=2, ascending=False):\n"," # columns to aggregate\n"," agg_dict = {'img_filename': list, 'mean_distance': max, 'count': len}\n","\n"," if 'label' in df.columns:\n"," agg_dict['label'] = list\n"," \n"," # filter by count\n"," df = df[df['count'] >= min_count]\n"," \n"," # group and aggregate columns\n"," grouped_df = df.groupby('component_id').agg(agg_dict)\n"," \n"," # sort\n"," grouped_df = grouped_df.sort_values(by=[sort_by], ascending=ascending)\n"," return grouped_df"]},{"cell_type":"code","execution_count":8,"id":"2d0ebd16","metadata":{"id":"2d0ebd16","outputId":"b3b9f360-6cd8-4bb5-8590-af7c556c88fb","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1677671292998,"user_tz":-120,"elapsed":8,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" img_filename \\\n","component_id \n","26465 [crab_cakes/2780617.jpg, crab_cakes/2780621.jpg, crab_cakes/2780623.jpg] \n","21803 [chocolate_cake/49494.jpg, chocolate_cake/51717.jpg, chocolate_cake/55122.jpg] \n","35796 [escargots/637185.jpg, escargots/637187.jpg, escargots/637188.jpg] \n","\n"," mean_distance count \n","component_id \n","26465 0.9759 3 \n","21803 1.0000 3 \n","35796 0.9798 3 "],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," img_filename \n"," mean_distance \n"," count \n"," \n"," \n"," component_id \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," 26465 \n"," [crab_cakes/2780617.jpg, crab_cakes/2780621.jpg, crab_cakes/2780623.jpg] \n"," 0.9759 \n"," 3 \n"," \n"," \n"," 21803 \n"," [chocolate_cake/49494.jpg, chocolate_cake/51717.jpg, chocolate_cake/55122.jpg] \n"," 1.0000 \n"," 3 \n"," \n"," \n"," 35796 \n"," [escargots/637185.jpg, escargots/637187.jpg, escargots/637188.jpg] \n"," 0.9798 \n"," 3 \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":8}],"source":["# get clusters using connected components\n","cluster_df = get_clusters(cc_df)\n","cluster_df.head(3)"]},{"cell_type":"code","execution_count":9,"id":"a1a9ec14","metadata":{"id":"a1a9ec14","outputId":"77cd7624-0eae-4567-fc59-eb49287d6cb4","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1677671296722,"user_tz":-120,"elapsed":831,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" img_filename \\\n","component_id \n","131 [apple_pie/1461580.jpg, apple_pie/1469191.jpg] \n","41128 [french_onion_soup/1741871.jpg, french_onion_soup/1741875.jpg] \n","57729 [hummus/620711.jpg, hummus/622847.jpg] \n","\n"," mean_distance count \n","component_id \n","131 1.0 2 \n","41128 1.0 2 \n","57729 1.0 2 "],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," img_filename \n"," mean_distance \n"," count \n"," \n"," \n"," component_id \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," 131 \n"," [apple_pie/1461580.jpg, apple_pie/1469191.jpg] \n"," 1.0 \n"," 2 \n"," \n"," \n"," 41128 \n"," [french_onion_soup/1741871.jpg, french_onion_soup/1741875.jpg] \n"," 1.0 \n"," 2 \n"," \n"," \n"," 57729 \n"," [hummus/620711.jpg, hummus/622847.jpg] \n"," 1.0 \n"," 2 \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":9}],"source":["# get clusters sorted differently\n","get_clusters(cc_df, sort_by='mean_distance').head(3)"]},{"cell_type":"code","execution_count":10,"id":"ed5b9ad3-e428-45f4-a74b-a697e3711a9a","metadata":{"id":"ed5b9ad3-e428-45f4-a74b-a697e3711a9a","executionInfo":{"status":"ok","timestamp":1677671300533,"user_tz":-120,"elapsed":359,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[],"source":["# It's a good start, but as we can see there are not that many duplicates here, and the data may contain more.\n","# Let's lower the threshold a bit and re-evaluate the duplicates case."]},{"cell_type":"markdown","id":"6b9899fc","metadata":{"id":"6b9899fc"},"source":["## Re-run with lower threshold\n","Now we have more clusters containing more of the images, and we are able to remove highly similar images with higher recall."]},{"cell_type":"code","execution_count":11,"id":"f0337c2d","metadata":{"id":"f0337c2d","outputId":"af0e6cc8-4919-4bce-cfc9-da9658c4cf68","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1677673944455,"user_tz":-120,"elapsed":2641287,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["FastDup Software, (C) copyright 2022 Dr. Amir Alush and Dr. Danny Bickson.\n","2023-03-01 11:48:22 [INFO] Going to loop over dir food-101/images\n","2023-03-01 11:48:24 [INFO] Found total 101000 images to run on\n","2023-03-01 12:19:09 [INFO] Found total 101000 images to run on\n","2023-03-01 12:29:58 [INFO] 648922) Finished write_index() NN model\n","2023-03-01 12:29:58 [INFO] Stored nn model index file fastdup_food101/nnf.index\n","2023-03-01 12:32:14 [INFO] Total time took 2630145 ms\n","2023-03-01 12:32:14 [INFO] Found a total of 170 fully identical images (d>0.990), which are 0.06 %\n","2023-03-01 12:32:14 [INFO] Found a total of 88 nearly identical images(d>0.980), which are 0.03 %\n","2023-03-01 12:32:14 [INFO] Found a total of 5236 above threshold images (d>0.900), which are 1.73 %\n","2023-03-01 12:32:14 [INFO] Found a total of 10100 outlier images (d<0.050), which are 3.33 %\n","2023-03-01 12:32:14 [INFO] Min distance found 0.379 max distance 1.000\n","2023-03-01 12:32:14 [INFO] Running connected components for ccthreshold 0.900000 \n",".0\n"," ########################################################################################\n","\n","Dataset Analysis Summary: \n","\n"," Dataset contains 101000 images\n"," Valid images are 100.00% (101,000) of the data, invalid are 0.00% (0) of the data\n"," Similarity: 1.70% (1,718) belong to 30 similarity clusters (components).\n"," 98.30% (99,282) images do not belong to any similarity cluster.\n"," Largest cluster has 79 (0.08%) images.\n"," For a detailed analysis, use `.connected_components()`\n","(similarity threshold used is 0.9, connected component threshold used is 0.9).\n","\n"," Outliers: 5.97% (6,029) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n"," For a detailed list of outliers, use `.outliers(data=True)`.\n","\n"," ########################################################################################\n","\n","Dataset Analysis Summary: \n","\n"," Dataset contains 101000 images\n"," Valid images are 100.00% (101,000) of the data, invalid are 0.00% (0) of the data\n"," Similarity: 1.70% (1,718) belong to 30 similarity clusters (components).\n"," 98.30% (99,282) images do not belong to any similarity cluster.\n"," Largest cluster has 79 (0.08%) images.\n"," For a detailed analysis, use `.connected_components()`\n","(similarity threshold used is 0.9, connected component threshold used is 0.9).\n","\n"," Outliers: 5.97% (6,029) of images are possible outliers, and fall in the bottom 5.00% of similarity values.\n"," For a detailed list of outliers, use `.outliers(data=True)`.\n"]},{"output_type":"execute_result","data":{"text/plain":["['Dataset contains 101000 images',\n"," 'Valid images are 100.00% (101,000) of the data, invalid are 0.00% (0) of the data',\n"," 'Similarity: 1.70% (1,718) belong to 30 similarity clusters (components).',\n"," '98.30% (99,282) images do not belong to any similarity cluster.',\n"," 'Largest cluster has 79 (0.08%) images.',\n"," 'For a detailed analysis, use `.connected_components()`\\n(similarity threshold used is 0.9, connected component threshold used is 0.9).\\n',\n"," 'Outliers: 5.97% (6,029) of images are possible outliers, and fall in the bottom 5.00% of similarity values.',\n"," 'For a detailed list of outliers, use `.outliers(data=True)`.']"]},"metadata":{},"execution_count":11}],"source":["# run with ccthreshold = 0.9 compared to default 0.96\n","fd.run(ccthreshold=0.9, overwrite=True)\n","fd.summary()"]},{"cell_type":"code","execution_count":12,"id":"38e38a88","metadata":{"id":"38e38a88","outputId":"df67f615-6956-4935-99ac-920a6537d906","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1677675399104,"user_tz":-120,"elapsed":889,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" fastdup_id component_id sum count mean_distance min_distance \\\n","25103 25103 24810 73.0287 79.0 0.9244 0.9004 \n","40109 40109 24810 73.0287 79.0 0.9244 0.9004 \n","40824 40824 24810 73.0287 79.0 0.9244 0.9004 \n","40807 40807 24810 73.0287 79.0 0.9244 0.9004 \n","40787 40787 24810 73.0287 79.0 0.9244 0.9004 \n","\n"," max_distance img_filename error_code is_valid \n","25103 0.9467 club_sandwich/1318118.jpg VALID True \n","40109 0.9467 french_fries/147628.jpg VALID True \n","40824 0.9467 french_fries/3907871.jpg VALID True \n","40807 0.9467 french_fries/3832957.jpg VALID True \n","40787 0.9467 french_fries/3746805.jpg VALID True "],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," fastdup_id \n"," component_id \n"," sum \n"," count \n"," mean_distance \n"," min_distance \n"," max_distance \n"," img_filename \n"," error_code \n"," is_valid \n"," \n"," \n"," \n"," \n"," 25103 \n"," 25103 \n"," 24810 \n"," 73.0287 \n"," 79.0 \n"," 0.9244 \n"," 0.9004 \n"," 0.9467 \n"," club_sandwich/1318118.jpg \n"," VALID \n"," True \n"," \n"," \n"," 40109 \n"," 40109 \n"," 24810 \n"," 73.0287 \n"," 79.0 \n"," 0.9244 \n"," 0.9004 \n"," 0.9467 \n"," french_fries/147628.jpg \n"," VALID \n"," True \n"," \n"," \n"," 40824 \n"," 40824 \n"," 24810 \n"," 73.0287 \n"," 79.0 \n"," 0.9244 \n"," 0.9004 \n"," 0.9467 \n"," french_fries/3907871.jpg \n"," VALID \n"," True \n"," \n"," \n"," 40807 \n"," 40807 \n"," 24810 \n"," 73.0287 \n"," 79.0 \n"," 0.9244 \n"," 0.9004 \n"," 0.9467 \n"," french_fries/3832957.jpg \n"," VALID \n"," True \n"," \n"," \n"," 40787 \n"," 40787 \n"," 24810 \n"," 73.0287 \n"," 79.0 \n"," 0.9244 \n"," 0.9004 \n"," 0.9467 \n"," french_fries/3746805.jpg \n"," VALID \n"," True \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":12}],"source":["# Now we see a much larger amount of images clustered together\n","cc90_df, _ = fd.connected_components()\n","cc90_df[cc90_df['count'] > 0.0].sort_values(by=['count'], ascending=False).head()"]},{"cell_type":"code","execution_count":13,"id":"bc129034","metadata":{"scrolled":true,"id":"bc129034","colab":{"base_uri":"https://localhost:8080/","height":1000,"output_embedded_package_id":"1tLYb9BG9aAoRmPD2Pm99LKnTZWjFU_n4"},"executionInfo":{"status":"ok","timestamp":1677675425276,"user_tz":-120,"elapsed":23407,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}},"outputId":"0c75e1cc-0586-431d-f11a-2f373e1ae922"},"outputs":[{"output_type":"display_data","data":{"text/plain":"Output hidden; open in https://colab.research.google.com to view."},"metadata":{}}],"source":["# let's see the new clusters\n","fd.vis.component_gallery(max_width=800)"]},{"cell_type":"markdown","id":"e06ab2b4","metadata":{"id":"e06ab2b4"},"source":["## Get a list of duplicates to remove"]},{"cell_type":"code","execution_count":14,"id":"717e6151","metadata":{"scrolled":true,"id":"717e6151","outputId":"f350e7e8-d0b8-4f01-8de0-200feef33ce8","colab":{"base_uri":"https://localhost:8080/","height":479},"executionInfo":{"status":"ok","timestamp":1677675458050,"user_tz":-120,"elapsed":384,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" img_filename \\\n","component_id \n","24810 [club_sandwich/1297247.jpg, club_sandwich/1318118.jpg, club_sandwich/1886101.jpg, club_sandwich/2778614.jpg, club_sandwich/3106065.jpg, club_sandwich/588478.jpg, french_fries/1099260.jpg, french_fries/1295274.jpg, french_fries/1361604.jpg, french_fries/1384733.jpg, french_fries/147628.jpg, french_fries/1610240.jpg, french_fries/1692353.jpg, french_fries/1700344.jpg, french_fries/1712331.jpg, french_fries/1740113.jpg, french_fries/1810352.jpg, french_fries/1969264.jpg, french_fries/2073415.jpg, french_fries/2246387.jpg, french_fries/2348229.jpg, french_fries/2369999.jpg, french_fries/2700217.jpg, french_fries/2761796.jpg, french_fries/2885926.jpg, french_fries/2936284.jpg, french_fries/3030853.jpg, french_fries/3069835.jpg, french_fries/3359887.jpg, french_fries/3405511.jpg, french_fries/3423618.jpg, french_fries/3499831.jpg, french_fries/3669402.jpg, french_fries/3673168.jpg, french_fries/3697215.jpg, french_fries/3746805.jpg, french_fries/3832957.jpg, french_fries/3907871.jpg, french_fries/467106.jpg, french_fries/889641.jpg] \n","18229 [chicken_curry/2394967.jpg, chicken_curry/2701143.jpg, chicken_curry/882723.jpg, hot_and_sour_soup/1151861.jpg, hot_and_sour_soup/1167380.jpg, hot_and_sour_soup/1400511.jpg, hot_and_sour_soup/1617113.jpg, hot_and_sour_soup/1670529.jpg, hot_and_sour_soup/2041812.jpg, hot_and_sour_soup/2367229.jpg, hot_and_sour_soup/2377494.jpg, hot_and_sour_soup/2520927.jpg, hot_and_sour_soup/3086202.jpg, hot_and_sour_soup/3113531.jpg, hot_and_sour_soup/3286625.jpg, hot_and_sour_soup/3428336.jpg, hot_and_sour_soup/3452669.jpg, hot_and_sour_soup/3552976.jpg, hot_and_sour_soup/3567487.jpg, hot_and_sour_soup/3568665.jpg, hot_and_sour_soup/3601021.jpg, hot_and_sour_soup/3706507.jpg, hot_and_sour_soup/387487.jpg, hot_and_sour_soup/478316.jpg, hot_and_sour_soup/564763.jpg, lobster_bisque/1346617.jpg, lobster_bisque/1826587.jpg, lobster_bisque/2917736.jpg, lobster_bisque/3282626.jpg, lobster_bisque/3319694.jpg, lobster_bisque/3358721.jpg, lobster_bisque/3414592.jpg, lobster_bisque/3466502.jpg] \n","26394 [crab_cakes/3467918.jpg, pad_thai/1709738.jpg, pad_thai/3059603.jpg, spaghetti_bolognese/3565695.jpg, spaghetti_carbonara/1117183.jpg, spaghetti_carbonara/1390373.jpg, spaghetti_carbonara/1559267.jpg, spaghetti_carbonara/1668631.jpg, spaghetti_carbonara/1739526.jpg, spaghetti_carbonara/1891700.jpg, spaghetti_carbonara/190173.jpg, spaghetti_carbonara/1936669.jpg, spaghetti_carbonara/1940255.jpg, spaghetti_carbonara/2228065.jpg, spaghetti_carbonara/2281641.jpg, spaghetti_carbonara/2567706.jpg, spaghetti_carbonara/2774715.jpg, spaghetti_carbonara/2796656.jpg, spaghetti_carbonara/2835081.jpg, spaghetti_carbonara/2967972.jpg, spaghetti_carbonara/2980079.jpg, spaghetti_carbonara/3045854.jpg, spaghetti_carbonara/3377897.jpg, spaghetti_carbonara/3581296.jpg, spaghetti_carbonara/3708340.jpg, spaghetti_carbonara/3908531.jpg, spaghetti_carbonara/560793.jpg, spaghetti_carbonara/733714.jpg, spaghetti_carbonara/755025.jpg] \n","\n"," mean_distance count \n","component_id \n","24810 0.9244 40 \n","18229 0.9250 33 \n","26394 0.9279 29 "],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," img_filename \n"," mean_distance \n"," count \n"," \n"," \n"," component_id \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," 24810 \n"," [club_sandwich/1297247.jpg, club_sandwich/1318118.jpg, club_sandwich/1886101.jpg, club_sandwich/2778614.jpg, club_sandwich/3106065.jpg, club_sandwich/588478.jpg, french_fries/1099260.jpg, french_fries/1295274.jpg, french_fries/1361604.jpg, french_fries/1384733.jpg, french_fries/147628.jpg, french_fries/1610240.jpg, french_fries/1692353.jpg, french_fries/1700344.jpg, french_fries/1712331.jpg, french_fries/1740113.jpg, french_fries/1810352.jpg, french_fries/1969264.jpg, french_fries/2073415.jpg, french_fries/2246387.jpg, french_fries/2348229.jpg, french_fries/2369999.jpg, french_fries/2700217.jpg, french_fries/2761796.jpg, french_fries/2885926.jpg, french_fries/2936284.jpg, french_fries/3030853.jpg, french_fries/3069835.jpg, french_fries/3359887.jpg, french_fries/3405511.jpg, french_fries/3423618.jpg, french_fries/3499831.jpg, french_fries/3669402.jpg, french_fries/3673168.jpg, french_fries/3697215.jpg, french_fries/3746805.jpg, french_fries/3832957.jpg, french_fries/3907871.jpg, french_fries/467106.jpg, french_fries/889641.jpg] \n"," 0.9244 \n"," 40 \n"," \n"," \n"," 18229 \n"," [chicken_curry/2394967.jpg, chicken_curry/2701143.jpg, chicken_curry/882723.jpg, hot_and_sour_soup/1151861.jpg, hot_and_sour_soup/1167380.jpg, hot_and_sour_soup/1400511.jpg, hot_and_sour_soup/1617113.jpg, hot_and_sour_soup/1670529.jpg, hot_and_sour_soup/2041812.jpg, hot_and_sour_soup/2367229.jpg, hot_and_sour_soup/2377494.jpg, hot_and_sour_soup/2520927.jpg, hot_and_sour_soup/3086202.jpg, hot_and_sour_soup/3113531.jpg, hot_and_sour_soup/3286625.jpg, hot_and_sour_soup/3428336.jpg, hot_and_sour_soup/3452669.jpg, hot_and_sour_soup/3552976.jpg, hot_and_sour_soup/3567487.jpg, hot_and_sour_soup/3568665.jpg, hot_and_sour_soup/3601021.jpg, hot_and_sour_soup/3706507.jpg, hot_and_sour_soup/387487.jpg, hot_and_sour_soup/478316.jpg, hot_and_sour_soup/564763.jpg, lobster_bisque/1346617.jpg, lobster_bisque/1826587.jpg, lobster_bisque/2917736.jpg, lobster_bisque/3282626.jpg, lobster_bisque/3319694.jpg, lobster_bisque/3358721.jpg, lobster_bisque/3414592.jpg, lobster_bisque/3466502.jpg] \n"," 0.9250 \n"," 33 \n"," \n"," \n"," 26394 \n"," [crab_cakes/3467918.jpg, pad_thai/1709738.jpg, pad_thai/3059603.jpg, spaghetti_bolognese/3565695.jpg, spaghetti_carbonara/1117183.jpg, spaghetti_carbonara/1390373.jpg, spaghetti_carbonara/1559267.jpg, spaghetti_carbonara/1668631.jpg, spaghetti_carbonara/1739526.jpg, spaghetti_carbonara/1891700.jpg, spaghetti_carbonara/190173.jpg, spaghetti_carbonara/1936669.jpg, spaghetti_carbonara/1940255.jpg, spaghetti_carbonara/2228065.jpg, spaghetti_carbonara/2281641.jpg, spaghetti_carbonara/2567706.jpg, spaghetti_carbonara/2774715.jpg, spaghetti_carbonara/2796656.jpg, spaghetti_carbonara/2835081.jpg, spaghetti_carbonara/2967972.jpg, spaghetti_carbonara/2980079.jpg, spaghetti_carbonara/3045854.jpg, spaghetti_carbonara/3377897.jpg, spaghetti_carbonara/3581296.jpg, spaghetti_carbonara/3708340.jpg, spaghetti_carbonara/3908531.jpg, spaghetti_carbonara/560793.jpg, spaghetti_carbonara/733714.jpg, spaghetti_carbonara/755025.jpg] \n"," 0.9279 \n"," 29 \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":14}],"source":["# List the largest clusters. These include non identical but larger clusters.\n","clusters_df = get_clusters(cc90_df)\n","clusters_df.head(3)"]},{"cell_type":"code","execution_count":15,"id":"17f95c09","metadata":{"id":"17f95c09","outputId":"17ed0185-3deb-4cca-e5e4-5ab9c621abab","colab":{"base_uri":"https://localhost:8080/","height":175},"executionInfo":{"status":"ok","timestamp":1677675461904,"user_tz":-120,"elapsed":454,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" img_filename mean_distance \\\n","component_id \n","131 [apple_pie/1461580.jpg, apple_pie/1469191.jpg] 1.0 \n","31820 [dumplings/167000.jpg, dumplings/180290.jpg] 1.0 \n","48041 [greek_salad/857079.jpg, greek_salad/862426.jpg] 1.0 \n","\n"," count \n","component_id \n","131 2 \n","31820 2 \n","48041 2 "],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," img_filename \n"," mean_distance \n"," count \n"," \n"," \n"," component_id \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," 131 \n"," [apple_pie/1461580.jpg, apple_pie/1469191.jpg] \n"," 1.0 \n"," 2 \n"," \n"," \n"," 31820 \n"," [dumplings/167000.jpg, dumplings/180290.jpg] \n"," 1.0 \n"," 2 \n"," \n"," \n"," 48041 \n"," [greek_salad/857079.jpg, greek_salad/862426.jpg] \n"," 1.0 \n"," 2 \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":15}],"source":["# List the clusters with the highest similarity. These are identical but smaller clusters.\n","get_clusters(cc90_df, sort_by='mean_distance').head(3)"]},{"cell_type":"code","execution_count":16,"id":"71561227","metadata":{"id":"71561227","outputId":"e58215b1-9c33-4153-8c2b-59567273ee51","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1677675470152,"user_tz":-120,"elapsed":257,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["Found 2153 highly similar images to discard\n"]}],"source":["# First sample from each cluster that is kept\n","cluster_images_to_keep = []\n","cluster_images_to_discard = []\n","\n","for cluster_file_list in clusters_df.img_filename:\n"," # keep first file, discard rest\n"," keep = cluster_file_list[0]\n"," discard = cluster_file_list[1:]\n"," \n"," cluster_images_to_keep.append(keep)\n"," cluster_images_to_discard.extend(discard)\n","\n","cluster_images_to_discard = set(cluster_images_to_discard)\n","print(f\"Found {len(cluster_images_to_discard)} highly similar images to discard\")"]},{"cell_type":"markdown","id":"97277406","metadata":{"id":"97277406"},"source":["# Find outliers"]},{"cell_type":"code","execution_count":17,"id":"9cb2d187","metadata":{"id":"9cb2d187","outputId":"7d6fb883-06e1-4a7c-c9ad-5897ba66a452","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1677675523356,"user_tz":-120,"elapsed":616,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" index outlier nearest distance img_filename_outlier \\\n","0 10099 9797 96131 0.379365 breakfast_burrito/462294.jpg \n","1 10098 63325 38290 0.429241 macarons/2117640.jpg \n","2 10095 96131 83667 0.515785 tacos/1505262.jpg \n","3 10094 89017 25421 0.528563 shrimp_and_grits/1047420.jpg \n","4 10093 95577 22575 0.546918 sushi/3100962.jpg \n","\n"," error_code_outlier is_valid_outlier img_filename_nearest \\\n","0 VALID True tacos/1505262.jpg \n","1 VALID True fish_and_chips/2079080.jpg \n","2 VALID True red_velvet_cake/3143813.jpg \n","3 VALID True club_sandwich/2465517.jpg \n","4 VALID True chocolate_mousse/303176.jpg \n","\n"," error_code_nearest is_valid_nearest \n","0 VALID True \n","1 VALID True \n","2 VALID True \n","3 VALID True \n","4 VALID True "],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," index \n"," outlier \n"," nearest \n"," distance \n"," img_filename_outlier \n"," error_code_outlier \n"," is_valid_outlier \n"," img_filename_nearest \n"," error_code_nearest \n"," is_valid_nearest \n"," \n"," \n"," \n"," \n"," 0 \n"," 10099 \n"," 9797 \n"," 96131 \n"," 0.379365 \n"," breakfast_burrito/462294.jpg \n"," VALID \n"," True \n"," tacos/1505262.jpg \n"," VALID \n"," True \n"," \n"," \n"," 1 \n"," 10098 \n"," 63325 \n"," 38290 \n"," 0.429241 \n"," macarons/2117640.jpg \n"," VALID \n"," True \n"," fish_and_chips/2079080.jpg \n"," VALID \n"," True \n"," \n"," \n"," 2 \n"," 10095 \n"," 96131 \n"," 83667 \n"," 0.515785 \n"," tacos/1505262.jpg \n"," VALID \n"," True \n"," red_velvet_cake/3143813.jpg \n"," VALID \n"," True \n"," \n"," \n"," 3 \n"," 10094 \n"," 89017 \n"," 25421 \n"," 0.528563 \n"," shrimp_and_grits/1047420.jpg \n"," VALID \n"," True \n"," club_sandwich/2465517.jpg \n"," VALID \n"," True \n"," \n"," \n"," 4 \n"," 10093 \n"," 95577 \n"," 22575 \n"," 0.546918 \n"," sushi/3100962.jpg \n"," VALID \n"," True \n"," chocolate_mousse/303176.jpg \n"," VALID \n"," True \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":17}],"source":["# show furthest outliers\n","fd.outliers().head(5)"]},{"cell_type":"code","execution_count":18,"id":"c65a330c","metadata":{"scrolled":true,"id":"c65a330c","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1677675529008,"user_tz":-120,"elapsed":2075,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}},"outputId":"43384e75-1e3b-4dee-a23b-9ef9a318769a"},"outputs":[{"output_type":"stream","name":"stderr","text":["100%|██████████| 20/20 [00:00<00:00, 7099.36it/s]"]},{"output_type":"stream","name":"stdout","text":["fastdup_food101/galleries/outliers_20230301125846/food-101_images_breakfast_burrito_462294_9797.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_macarons_2117640_63325.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_tacos_1505262_96131.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_shrimp_and_grits_1047420_89017.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_sushi_3100962_95577.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_pho_2399877_75357.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_pho_1840846_75236.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_chocolate_cake_2518457_21410.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_red_velvet_cake_2894652_83590.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_macarons_3785921_63762.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_waffles_720603_100900.0.jpg.jpg\n"]},{"output_type":"stream","name":"stderr","text":["\n"]},{"output_type":"stream","name":"stdout","text":["fastdup_food101/galleries/outliers_20230301125846/food-101_images_pad_thai_2614597_70494.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_prime_rib_587532_79858.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_macarons_2591602_63463.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_tacos_1091159_96024.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_hamburger_1608876_53171.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_frozen_yogurt_3577020_45688.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_peking_duck_388951_74793.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_steak_2788759_93480.0.jpg.jpg\n","fastdup_food101/galleries/outliers_20230301125846/food-101_images_ice_cream_1837798_58194.0.jpg.jpg\n","Stored outliers visual view in fastdup_food101/galleries/outliers_20230301125846/outliers.html\n"]},{"output_type":"display_data","data":{"text/plain":[""],"text/html":[" \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," Outliers Report \n"," \n"," \n","\n","\n","\n"," \n"," \n"," \n"," \n"," \n","
\n","
\n","
Outliers Report
\n","
\n","
\n","
\n"," \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.379365 \n"," \n","\n"," Path \n"," breakfast_burrito/462294.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.429241 \n"," \n","\n"," Path \n"," macarons/2117640.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.515785 \n"," \n","\n"," Path \n"," tacos/1505262.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.528563 \n"," \n","\n"," Path \n"," shrimp_and_grits/1047420.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.546918 \n"," \n","\n"," Path \n"," sushi/3100962.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.573438 \n"," \n","\n"," Path \n"," pho/2399877.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.574433 \n"," \n","\n"," Path \n"," pho/1840846.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.576987 \n"," \n","\n"," Path \n"," chocolate_cake/2518457.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.583183 \n"," \n","\n"," Path \n"," red_velvet_cake/2894652.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.585142 \n"," \n","\n"," Path \n"," macarons/3785921.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.591061 \n"," \n","\n"," Path \n"," waffles/720603.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.592497 \n"," \n","\n"," Path \n"," pad_thai/2614597.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.594438 \n"," \n","\n"," Path \n"," prime_rib/587532.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.594464 \n"," \n","\n"," Path \n"," macarons/2591602.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.596057 \n"," \n","\n"," Path \n"," tacos/1091159.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.596191 \n"," \n","\n"," Path \n"," hamburger/1608876.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.597233 \n"," \n","\n"," Path \n"," frozen_yogurt/3577020.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.601192 \n"," \n","\n"," Path \n"," peking_duck/388951.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.605568 \n"," \n","\n"," Path \n"," steak/2788759.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," Distance \n"," 0.610535 \n"," \n","\n"," Path \n"," ice_cream/1837798.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n"," \n","
\n"," \n"," \n"," \n"," \n"," "]},"metadata":{}}],"source":["# visualize outliers\n","fd.vis.outliers_gallery()"]},{"cell_type":"markdown","id":"3a9268dc","metadata":{"id":"3a9268dc"},"source":["# Remove broken images\n","Using fastdup we are able to recover a list of files that are corrupted or that could not be loaded from various reasons. The reason is listed for each image. We will fetch them, and add them to our list of images to remove. Food-101 is meticoulsly curated, so just for the sake of demonstration we've added one empty image."]},{"cell_type":"code","execution_count":19,"id":"09080622","metadata":{"id":"09080622","outputId":"bab9ce79-8bf1-42c7-ccc5-b383b5c6787d","colab":{"base_uri":"https://localhost:8080/","height":49},"executionInfo":{"status":"ok","timestamp":1677675538250,"user_tz":-120,"elapsed":479,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":["Empty DataFrame\n","Columns: [img_filename, fastdup_id, error_code, is_valid]\n","Index: []"],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," img_filename \n"," fastdup_id \n"," error_code \n"," is_valid \n"," \n"," \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":19}],"source":["inv = fd.invalid_instances()\n","inv"]},{"cell_type":"code","execution_count":20,"id":"58aa55c1","metadata":{"id":"58aa55c1","outputId":"2266f36a-cef3-4002-fc9e-9ae15d152200","colab":{"base_uri":"https://localhost:8080/","height":81},"executionInfo":{"status":"ok","timestamp":1677675540080,"user_tz":-120,"elapsed":405,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" img_filename fastdup_id error_code is_valid\n","0 apple_pie/broken_image.jpg 101001 ERROR_ZERO_SIZE_FILE False"],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," img_filename \n"," fastdup_id \n"," error_code \n"," is_valid \n"," \n"," \n"," \n"," \n"," 0 \n"," apple_pie/broken_image.jpg \n"," 101001 \n"," ERROR_ZERO_SIZE_FILE \n"," False \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":20}],"source":["inv = pd.DataFrame([['apple_pie/broken_image.jpg', 101001, 'ERROR_ZERO_SIZE_FILE', False]], columns=[inv.columns])\n","inv"]},{"cell_type":"markdown","id":"0d1b48c6","metadata":{"id":"0d1b48c6"},"source":["# Find dark, bright and blurry images"]},{"cell_type":"code","execution_count":21,"id":"56758be3","metadata":{"id":"56758be3","outputId":"b3b9e91b-13e7-4fd7-d58f-e5774b32cc0d","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1677675543477,"user_tz":-120,"elapsed":566,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" fastdup_id img_w img_h unique blur mean min max stdv \\\n","0 0 308 512 0 2538.7280 113.0935 0.0 255.0 53.7116 \n","1 1 512 512 0 334.7137 131.6409 4.0 255.0 55.6296 \n","2 2 384 512 0 728.2955 117.8109 0.0 255.0 29.5684 \n","3 3 512 512 0 1013.1310 95.8764 0.0 255.0 67.6263 \n","4 4 512 512 0 2478.7454 63.2681 0.0 255.0 68.2799 \n","\n"," file_size contrast img_filename error_code is_valid \n","0 42196 1.0000 apple_pie/1005649.jpg VALID True \n","1 40760 0.9691 apple_pie/1011328.jpg VALID True \n","2 33835 1.0000 apple_pie/101251.jpg VALID True \n","3 48051 1.0000 apple_pie/1014775.jpg VALID True \n","4 59055 1.0000 apple_pie/1026328.jpg VALID True "],"text/html":["\n"," \n","
\n","
\n","\n","
\n"," \n"," \n"," \n"," fastdup_id \n"," img_w \n"," img_h \n"," unique \n"," blur \n"," mean \n"," min \n"," max \n"," stdv \n"," file_size \n"," contrast \n"," img_filename \n"," error_code \n"," is_valid \n"," \n"," \n"," \n"," \n"," 0 \n"," 0 \n"," 308 \n"," 512 \n"," 0 \n"," 2538.7280 \n"," 113.0935 \n"," 0.0 \n"," 255.0 \n"," 53.7116 \n"," 42196 \n"," 1.0000 \n"," apple_pie/1005649.jpg \n"," VALID \n"," True \n"," \n"," \n"," 1 \n"," 1 \n"," 512 \n"," 512 \n"," 0 \n"," 334.7137 \n"," 131.6409 \n"," 4.0 \n"," 255.0 \n"," 55.6296 \n"," 40760 \n"," 0.9691 \n"," apple_pie/1011328.jpg \n"," VALID \n"," True \n"," \n"," \n"," 2 \n"," 2 \n"," 384 \n"," 512 \n"," 0 \n"," 728.2955 \n"," 117.8109 \n"," 0.0 \n"," 255.0 \n"," 29.5684 \n"," 33835 \n"," 1.0000 \n"," apple_pie/101251.jpg \n"," VALID \n"," True \n"," \n"," \n"," 3 \n"," 3 \n"," 512 \n"," 512 \n"," 0 \n"," 1013.1310 \n"," 95.8764 \n"," 0.0 \n"," 255.0 \n"," 67.6263 \n"," 48051 \n"," 1.0000 \n"," apple_pie/1014775.jpg \n"," VALID \n"," True \n"," \n"," \n"," 4 \n"," 4 \n"," 512 \n"," 512 \n"," 0 \n"," 2478.7454 \n"," 63.2681 \n"," 0.0 \n"," 255.0 \n"," 68.2799 \n"," 59055 \n"," 1.0000 \n"," apple_pie/1026328.jpg \n"," VALID \n"," True \n"," \n"," \n","
\n","
\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","\n"," \n","
\n","
\n"," "]},"metadata":{},"execution_count":21}],"source":["# show image statistics\n","fd.img_stats().head(5)"]},{"cell_type":"code","execution_count":22,"id":"893381be","metadata":{"id":"893381be","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1677675579962,"user_tz":-120,"elapsed":34396,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}},"outputId":"8b140c87-7012-4263-8427-cb1ad2788c03"},"outputs":[{"output_type":"stream","name":"stderr","text":["100%|██████████| 25/25 [00:00<00:00, 115.30it/s]\n"]},{"output_type":"stream","name":"stdout","text":["Stored blur visual view in fastdup_food101/galleries/stats_20230301125905/blur.html\n"]},{"output_type":"display_data","data":{"text/plain":[""],"text/html":[" \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," blur Image Report \n"," \n"," \n","\n","\n","\n"," \n"," \n"," \n"," \n"," \n","
\n","
\n","
blur Image Report
\n","
\n","
\n","
\n"," \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 9.0875 \n"," \n","\n"," filename \n"," food-101/images/breakfast_burrito/462294.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 21.3909 \n"," \n","\n"," filename \n"," food-101/images/bread_pudding/444890.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 22.5841 \n"," \n","\n"," filename \n"," food-101/images/miso_soup/3215987.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 26.4281 \n"," \n","\n"," filename \n"," food-101/images/clam_chowder/908590.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 28.9824 \n"," \n","\n"," filename \n"," food-101/images/hot_dog/3050169.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 31.5239 \n"," \n","\n"," filename \n"," food-101/images/dumplings/2174768.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 34.5124 \n"," \n","\n"," filename \n"," food-101/images/clam_chowder/2250407.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 34.7576 \n"," \n","\n"," filename \n"," food-101/images/strawberry_shortcake/3363461.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 36.0844 \n"," \n","\n"," filename \n"," food-101/images/tacos/1505262.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 36.7776 \n"," \n","\n"," filename \n"," food-101/images/spring_rolls/406134.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 37.5194 \n"," \n","\n"," filename \n"," food-101/images/hummus/3707400.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 37.8985 \n"," \n","\n"," filename \n"," food-101/images/carrot_cake/345630.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 38.884 \n"," \n","\n"," filename \n"," food-101/images/baklava/3877397.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 39.3872 \n"," \n","\n"," filename \n"," food-101/images/gyoza/2712704.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 39.6907 \n"," \n","\n"," filename \n"," food-101/images/macarons/2117640.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 39.9276 \n"," \n","\n"," filename \n"," food-101/images/miso_soup/2757717.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 40.1626 \n"," \n","\n"," filename \n"," food-101/images/lobster_bisque/353321.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 40.897 \n"," \n","\n"," filename \n"," food-101/images/french_fries/172042.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 41.1794 \n"," \n","\n"," filename \n"," food-101/images/gnocchi/3167362.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 41.3781 \n"," \n","\n"," filename \n"," food-101/images/baklava/1413667.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 42.7645 \n"," \n","\n"," filename \n"," food-101/images/chocolate_mousse/1653769.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 43.3361 \n"," \n","\n"," filename \n"," food-101/images/hot_dog/917699.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 45.0252 \n"," \n","\n"," filename \n"," food-101/images/pizza/2412970.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 45.5609 \n"," \n","\n"," filename \n"," food-101/images/baklava/3681797.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," blur \n"," 45.9959 \n"," \n","\n"," filename \n"," food-101/images/beignets/726875.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n"," \n","
\n"," \n"," \n"," \n"," \n"," "]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":[""],"image/png":"iVBORw0KGgoAAAANSUhEUgAAAaMAAAENCAYAAACigwpqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAj90lEQVR4nO3df5wddX3v8dc7kQBbDG4IlEKBANEmCKgQWqXtA4UiBEQshQLV22p7jbS10HovNWAiEfUSvOWHl9xbSPu4BbWIEFHkR0gJXjAWqSRokZIgAgmBiE1gkxg2EEg+94/vnGQyObtnTnLOmbO77+fjcR67M/OZ73zO5mQ/OzPf73cUEZiZmVVpVNUJmJmZuRiZmVnlXIzMzKxyLkZmZlY5FyMzM6uci5GZmVXOxcisy0laLun36qx/r6Tnq8jJrNVcjMzMrHIuRmYjkKQ3VZ2DWZ6LkdnQcJykJyT1SfonSXsUAySFpIm55RslfSH7/r2Snpf0aUkvAv/UwdzNGvJfR2ZDw4eBU4BXgDuBGcDCJtvYHxgHHIL/ELUu4w+k2dAwJyJWRsTLwBeB83eijS3AZRHxWkRsbG16ZrvGxchsaFiZ+34FcMBOtLE6Il5tUT5mLeViZDY0HJT7/mBgVZ2YfqAnt7x/Ybun6Leu5WJkNjT8paRflzQO+AzwjToxPwb+SNJoSacCJ3QyQbNd4WJkNjTcDPwL8AzwNPCFOjEXAWcAa0kdHr7dodzMdpn8cD0zM6uaz4zMzKxyLkZmZlY5FyMzM6uci5GZmVXO0wE1abwUE449tuo0zMyGlCVLlqyJiH0H2u7edE2aIsVi/8zMzJoiaUlETBlouy/TmZlZ5VyMzMysci5GZmZWORcjMzOrnItRs9yTzsys5VyMzMysci5GZmZWORejZi1dWnUGZmbDjotRs/r7q87AzGzY8XRALTRh+t1bv18++/QKMzEzG1p8ZmRmZpVzMTIzs8q5GJmZWeVcjMzMrHIuRs0aP77qDMzMhh0Xo2YdckjVGZiZDTsuRmZmVrm2FiNJEyXdIOkxSZslPVDY/l5JMcBrQS7uowPEXFBoT5IulbRS0kZJ35P0zjp5HSHpfkn9klZJulzS6FJvyoNezcxart2DXt8OnAY8DOxWZ/ujwHsK6w4GvgHMrxN/IrAxt/xMYft0YCZwMbAM+BSwUNKREfEigKReYCHwBHAmcDhwFakwz2j4jjwdkJlZy7W7GN0ZEXcASJoHbHf3PyLWkwrVVpJ+F9gC3FqnvUciYkO9A0nag1SMroiIOdm6HwDLgU+yrdBcAOwJnJUd/z5JY4FZkr6UrTMzsw5q62W6iNiyE7udDzwYEaua3O94YCy5IhYRrwB3AlNzcVOBBYWicwupQJ2wE/mamdku6qoODJLeBrwL+PoAIU9LekPSk5I+Udg2CdgMPFVYvzTblo9blg+IiOeA/kKcmZl1SLdNlHoe8DrwzcL6n5PuBf0QGJ3FXS+pJyKuyWJ6gQ0Rsbmwbx/QI2lMRGzK4tbWOXZftm0HkqYB0wD8nFczs9brxmL0LxHxcn5lRCwAFuRWzc/uEc2Q9OWdvBxYWkTMBeYCTJGinccyMxuJuuYynaR3AJMZ+BJd0TxgHDAhW+4D9qrTRbsX6M/Oimpxe9dprzfbZmZmHdY1xYh0VrQRuKNkfBS+LiNdwptYiCveI1pG4d6QpIOAnkJcfZMnl0zPzMzK6rZidOdAXbfrOBtYA6zIlh8C1gPn1AIk9QBnsP2YpfnAKZLenFt3LqkQPtjwqD09JdMzM7Oy2nrPKCsGp2WLBwJjJZ2dLd8TEf1Z3LtJl9v+ZoB2vknqvPAY6ezn3Ox1Ye1+UUS8Kmk2MFNSH9sGvY4Crss1dz1wIXC7pCuBw4BZwNUeY2RmVo12d2DYD7itsK62fChpQCqks6J11J91AeBJ4E+BgwCRZk/444j4aiFuNqn4XALsAywGTo6IX9QCIqJP0knAHNIYpLXANaSC1NiKFY1jzMysKYpw57BmTJFi8QA/swnT7976/fLZp3cqJTOzridpSURMGWh7t3XtHlLyxcfMzHZeN3VgMDOzEcrFyMzMKudiZGZmlXMxMjOzyrkYNcuDXs3MWs7FqFmeDsjMrOVcjMzMrHIuRmZmVjkPem3WkiWlwjwbg5lZeT4zMjOzyrkYmZlZ5VyMzMysci5GZmZWORcjMzOrXFuLkaSJkm6Q9JikzZIeqBOzXFIUXi/WiTtC0v2S+iWtknS5pNGFGEm6VNJKSRslfU/SO3emLTMz65x2d+1+O+mx4w8Duw0SdzPbPxp8U36jpF5gIekJr2cChwNXkYrpjFzodGAmcDHbHju+UNKREfFik23Vd/DBDUPMzKw57S5Gd0bEHQCS5gHjB4j7eUQ8PEg7FwB7AmdFxHrgPkljgVmSvhQR6yXtQSpGV0TEnOyYPyA92vyTbCs0Ddsa9B3tu2/DN21mZs1p62W6iNjSoqamAgsKheIWUlE5IVs+HhgL3Jo7/ivAndn+zbRlZmYd1C0dGP5M0iZJ6yTNk3RIYfsk0mW3rSLiOaA/21aL2Qw8Vdh3aS6mbFsDW726YYiZmTWnG6YDuoN0T+l5YDJwGbBI0lERsS6L6QXW1tm3L9tWi9kQEZvrxPRIGhMRm0q2tR1J04BpAMeWe09mZtaEyotRRFyUW1wk6SHgx8DHgGuryKkoIuYCcwGmSFFxOmZmw063XKbbKiIeB54Ejsmt7gP2rhPem22rxexVp4t2L9CfnRWVbcvMzDqo64pRJrJXzTIK93MkHQT0sO3+zzJgNDCx0FbxHlGZtszMrIO6rhhJOpJULPLPapgPnCLpzbl15wIbgQez5YeA9cA5ubZ6gDOy/Ztpy8zMOqit94yyYnBatnggMFbS2dnyPcD7gI8AdwGrSEVoBvAccGOuqeuBC4HbJV0JHAbMAq6uddGOiFclzQZmSupj26DXUWw/oLZhW2Zm1lnt7sCwH3BbYV1t+VBgZRZzLfAW4CXgXuDSfGGIiD5JJwFzSOOG1gLXkIpI3mxS8bkE2AdYDJwcEb/YibbMzKxD2lqMImI5oAZhJ5Vs6wngxAYxAXwxe+1SWwM61p27zcxarevuGZmZ2cjjYmRmZpVzMWrW0qVVZ2BmNuy4GDWrv7/qDMzMhh0XIzMzq5yLkZmZVc7FyMzMKudiZGZmlXMxMjOzyrkYNWv8+KozMDMbdlyMmnVI8YnoZma2q1yMzMysci5GzfKgVzOzlnMxapanAzIzazkXIzMzq1xbi5GkiZJukPSYpM2SHihs/zVJ/1PSv0vaIGmlpJskHVCIe6+kqPOaXeeYH5f0lKRXJS3JHqRXjDlQ0rck/VLSGklzsqfSmplZBdr9pNe3kx47/jCwW53txwK/D/wj8G/Ar5KeuPqQpCMjYkMh/sPAM7nlF/IbJZ1Peqz4LOD7wMeAuyQdFxGPZzG7AQuATcB5pCfMXp19/chOvUszM9sl7S5Gd0bEHQCS5gHFQTrfByZFxBu1FZIeBZ4E/gC4qRD/WK2oDGAWcFNEfD5r60HgXcB0thWas4HJwMSIeDaLex24RdLnIuKppt+lmZntkrZepouILQ22r80XomzdT4F+4ID6e9Un6TDgbcCthePfBkzNhU4FHqkVosy3SWdKpzZzTDMza42u68Ag6WigB/hpnc3fze49LZc0Q9Lo3LZJ2ddlhX2WAuMk7ZuL2y4mIjYBT+faMDOzDmr3ZbqmSBoFfBl4CvhObtM6YDawiHQG8wHgc8C+wEVZTG/2dW2h2b7c9tXZ12JMLa63znokTQOmARy+//4l342ZmZXVVcUIuAJ4D3BCRLxeWxkRPwJ+lItbKOk14FOSPh8Ra9qZVETMBeYCTJkyJdp5LDOzkahripGkvwAuBs6PiH8rscs84G+Bo4Hvsu0MaG+2P/Opne305b7uXae9XuDfm8u6nAnT795uefns09txGDOzIasr7hlJ+gPgOuBvI+IbJXeLwtfafaDifZ9JwMsRsToXt12MpDHAYex4v2lHK1aUTM/MzMqqvBhJei/wz8B1EfF3Tex6NvAG8BhARDxD6vRwTq7tUdny/Nx+84HjJOWn3/4gsDtwb8OjrmnrFUEzsxGprZfpslkNTssWDwTGSjo7W74HOITUrXoZ8A1J787tvjoins7a+XtS54NHSB0YTgM+CVwbES/l9pkFfE3ScuBfgT8B3gr8US5mHvAZ4HZJM0mX7K4BbvYYIzOzapQqRpKOioif7ET7+5HG+eTVlg8FfotUDN4BPFSIuwn4aPb9UuC/An8DjAF+Bvw3Us+7rSLi65L2Aj4NzAT+A/hAfqBsRLwu6VRgDmlM0mvALaT7VWZmVgFFNO4cJmkR6TLWjcA/R8S6NufVtaZIsTj7mRU7JpTlDgxmNtJIWhIRUwbaXuqeUUT8LmleuIOAJZJulnRyi3I0M7MRrnQHhux+ygzSJbATgP8laZmks9qVnJmZjQylipGkoyVdQ7p3cyJwRkRMzr6/po35dZ8eP2nCzKzVyvamu470mIdLI2JjbWVErJI0oy2ZdavJk6vOwMxs2ClbjE4HNkbEZtg6fmePiOiPiK+2LTszMxsRyt4zWgjsmVvuydaZmZntsrLFaI/8U1ez70fmzZMlS6rOwMxs2ClbjF6RdExtQdKxwMZB4s3MzEore8/or4HbJK0CBOwPnNuupMzMbGQpVYwi4hFJk4DfyFY9mX/ekJmZ2a5oZqLU44AJ2T7HSCIivtKWrMzMbEQpO1HqV4HDgR8Dm7PVAbgYmZnZLit7ZjQFOCLKzKpqZmbWpLK96R4ndVqwgw+uOgMzs2GnbDEaDzwhaYGk79RejXaSNFHSDZIek7RZ0gN1YiTpUkkrJW2U9D1J76wTd4Sk+yX1S1ol6XJJo9vV1oD23bdUmJmZlVf2Mt2snWz/7aSnsj4M7DZAzHTSg/AuJj3x9VPAQklHRsSLAJJ6STM+PAGcSbp/dRWpmM5oU1tmZtYhZbt2PyjpEOCtEbEwe5x4mTOJOyPiDgBJ80hnWFtJ2oNUQK6IiDnZuh8Ay0mPFa8VhwtI0xGdFRHrgfskjQVmSfpSRKxvZVuDvqPVq0u8bTMza0bZR0h8HJgH3JCtOhD4dqP9ImJLg5DjgbGkx3/X9nkFuBOYmoubCiwoFIpbSEXlhDa0NbDnnmsYYmZmzSl7z+gvgd8G1sPWB+3t14LjTyJ1FX+qsH5pti0ftywfEBHPAf25uFa2ZWZmHVS2GL0WEZtqC5LeRBpntKt6gQ21R1Pk9AE9ksbk4tbW2b8v29bqtrYjaZqkxZIWD/JezMxsJ5UtRg9KuhTYU9LJwG2ky18jQkTMjYgpETGl6lzMzIajssVoOrAa+AnwCeAeWtPzrA/Yq0636l6gP3c21gfsXWf/3mxbq9syM7MOKtubbgvwD9mrlZaReuVNBJ7MrS/e11lG4X6OpINIz1RalotpVVtmZtZBZXvTPSvpmeKrBcd/iNQp4pzcsXqAM4D5ubj5wCmS3pxbdy7pmUoPtqEtMzProGbmpqvZg/QLf1yjnbJicFq2eCAwVtLZ2fI9EdEvaTYwU1If2waqjgKuyzV1PXAhcLukK4HDSANxr6510Y6IV1vV1qCOPbZhiJmZNafsZbqXCquulbQE+GyDXfcjdXbIqy0fShqQOptUMC4B9gEWAydHxC9yx++TdBIwh9RxYi1wDTvODNHKtszMrEPKPkLimNziKNKZUsN9I2I56cmwg8UE8MXsNVjcE8CJnWrLzMw6p+xluqty379BOqP5w5ZnMxQsXVp1BmZmw07Zy3Tva3ciQ0Z/f9UZmJkNO2Uv031qsO0RcXVr0jEzs5Gomd50xwG1ZxidAfyQHeeBsxImTL976/fLZ59eYSZmZt2hbDH6deCYiPglgKRZwN0R8ZF2JWZmZiNH2emAfhXYlFvelK0zMzPbZWXPjL4C/FDSt7LlDwE3tSUjMzMbccr2pvuipPnA72arPhYRP2pfWl1s/PjGMWZm1pSyl+kgTSS6PiK+DDwv6dA25dTdDjmk6gzMzIadshOlXgZ8mjTNDsBuwNfalZSZmY0sZc+Mfh/4IPAKQESsAt486B7DlQe9mpm1XNlitCmb9y0AJP1K+1Lqcp4OyMys5coWo1sl3QC8RdLHgYW0/kF7ZmY2QjXsTSdJwDdIT0ddD/wG8NmIuK/NuZmZ2QhR5jEQIemeiDgKcAEyM7OWK3uZ7lFJx7UjAUkPSIoBXu/JYpbX2fZinbaOkHS/pH5JqyRdLml0IUaSLpW0UtJGSd+T9M52vDczMyun7AwMvwV8RNJyUo86kU6ajm5BDn8BjC2suxx4F/BIbt3NbP/48Pz0REjqJd3LegI4Ezic9BymUcCMXOh0YCZwMdseTb5Q0pERsUOBMzOz9hu0GEk6OCKeA05pVwLZU1fzxxxDmiX8GxHxRm7TzyPi4UGaugDYEzgrItYD90kaC8yS9KWIWC9pD1IxuiIi5mTH+wHpYYGfZPuiZWZmHdLoMt23ASJiBXB1RKzIv9qU06lAL/D1JvebCizIClHNLaQCdUK2fDzpLOzWWkBEvALcme3f2OTJTaZlZmaNNCpGyn1/WDsTyTkPeB5YVFj/Z5I2SVonaZ6k4rw8k0iX3bbKzur6s221mM3s+BympbmYwfX0lAozM7PyGt0zigG+bwtJPaSZHm7IBtnW3AE8TCpSk4HLgEWSjoqIdVlML7C2TrN92bZazIaI2FwnpkfSmIjYVNiGpGnANICDDz54Z96amZkNolExeoek9aQzpD2z72FbB4Zix4NddQbwKxQu0UXERbnFRZIeAn4MfAy4tsU57CAi5gJzAabsu2/bi7KZ2UgzaDGKiNGDbW+D84CfRcTiwYIi4nFJTwLH5Fb3AXvXCe/NttVi9pI0unB21Av01zsr2sGaNQ1DzMysOc08QqKtJO1N6kRQtuPC1rnyMsso3PeRdBDp0RfLcjGjgYmFtna432RmZp3TNcWINDP47pQoRpKOJBWQJbnV84FTJOVnEz8X2Ag8mC0/RJrS6JxcWz2ky4PzdyV5MzPbeWUHvXbCecC/R8R202JLOh34CHAXsIpUhGYAzwE35kKvBy4Ebpd0Jan33yxSl/T1ABHxqqTZwExJfWwb9DqK7QfUmplZB3VFMZI0HjiJNDNC0UpgP1JHhbcALwH3ApfmxxRFRJ+kk4A5pHFDa4FrSAUpbzap+FwC7AMsBk6OiF+06v2YmVlzuqIYRcQa0tNj6217jFSoyrTzBHBig5gAvpi9zMysC3TTPaOhwYNezcxazsWoWZ4OyMys5VyMzMysci5GZmZWua7owDCkLFnSOKYJE6bfvfX75bNPb2nbZmZDhc+MzMysci5GZmZWORcjMzOrnO8Z7YT8fR4zM9t1PjMyM7PKuRiZmVnlXIya9MLYfatOwcxs2HExatLLPfUeJmtmZrvCxcjMzCpXeTGS9FFJUed1QS5Gki6VtFLSRknfk/TOOm0dIel+Sf2SVkm6XNLoQkyptgYyrn/drrxdMzOro5u6dp9IekR4zTO576eTHrx3MduezrpQ0pER8SKApF5gIfAEcCZwOHAVqeDOaKatwRy4fjVrdurtmZnZQLqpGD0SERuKKyXtQSogV0TEnGzdD4DlwCfZVmguAPYEzsqeAHufpLHALElfioj1TbRlZmYdVPlluhKOB8YCt9ZWRMQrpEeLT83FTQUW5B9FDtxCKlAnNNmWmZl1UDcVo6clvSHpSUmfyK2fBGwGnirEL8225eOW5QMi4jmgPxdXti0zM+ugbrhM93PSPZwfAqOB84DrJfVExDVAL7AhIjYX9usDeiSNiYhNWdzaOu33Zdtooi0zM+ugyotRRCwAFuRWzc/u7cyQ9OWK0tqOpGnANIBjK87FzGw46qbLdHnzgHHABNJZy17FLtqks5z+3JlMH1BvRGpvtq0WU6at7UTE3IiYEhFTmn4nZmbWULcWo8h9XUa6fDexEFO8R7SMwn0fSQcBPbm4sm0N6Cf7F3c1M7Nd1a3F6GxgDbACeAhYD5xT2yipBzgDmJ/bZz5wiqQ359adSxq79GC2XLYtMzProMrvGUn6JqnzwmOks5Zzs9eFEbEFeFXSbGCmpD62DVQdBVyXa+p64ELgdklXAocBs4Cra929I6JsW2Zm1kGVFyPgSeBPgYMAkWZQ+OOI+GouZjapYFwC7AMsBk6OiF/UAiKiT9JJwBzSuKG1wDWkgkQzbQ1m4pqVvNLc+zMzswYUEY2jbKspUqz59F1taXv57NPb0q6ZWdUkLRmsE1g3nBlZpvg4cxcnMxspurUDg5mZjSAuRmZmVjkXIzMzq5yLkZmZVc7FqEkv7zm26hTMzIYdF6MmvbD3flWnYGY27LgYmZlZ5VyMmrTn669VnYKZ2bDjYtSkiS+trDoFM7Nhx8XIzMwq52JkZmaVczEyM7PKeaLULpafONWTpprZcOYzIzMzq1zlxUjSOZK+I+kFSRskLZF0fiHmAUlR57VHIe5ASd+S9EtJayTNyR4rXjzmxyU9JenV7Hgntft9mpnZwLrhMt2ngGeBvwHWAKcBN0saHxH5R4H/P+DSwr5bB/1I2g1YAGwCzgPeAlydff1ILu580iPKZwHfBz4G3CXpuIh4vFGyP9vnIN7SzLszM7OGuqEYnRERa3LL35V0AKlI5YvRyxHx8CDtnA1MBiZGxLMAkl4HbpH0uYh4KoubBdwUEZ/PYh4E3gVMJ1e0BrJxt91djMzMWqzyy3SFQlTzI+CAJpuaCjxSK0SZb5POlE4FkHQY8Dbg1tzxtwC3ZfubmVkFKi9GA3gP8NPCuvdL6s9eCyQdXdg+CViWXxERm4Cns23kvm4XBywFxknat1FiB677zzL5m5lZE7quGGWdCT4EXJVb/SBwEXAKMA04GFgkaUIuphdYW6fJvmwbua/FuL7C9mJO0yQtlrR43Mb1Zd6GmZk1oauKUVZcbgbuiIgba+sj4rKI+KeIWBQRXwPeBwTw153IKyLmRsSUiJjSieOZmY00XVOMJI0D5gMrgA8PFhsRLwL/ChyTW90H7F0nvJdtZz61r8W43sJ2MzProG7oTUc2FuguYAzwgYjoL7FbZK+aZWy7J1RrdwxwGKkrdy2GLG5FLnQSqbfe6uaz7wzPxmBmw1nlZ0aS3kTqzfZW4NSIaNhDQNL+wO8AS3Kr5wPHSTokt+6DwO7AvQAR8QypY8Q5ubZGZcvzd+2dmJnZzuqGM6P/QxroehGwj6R9ctt+BPwGcAWpYK0gdV64BNgCXJuLnQd8Brhd0kzSpbhrgJtzY4wgjTP6mqTlpEt9f0IqhH/U4vdlZmYldUMxen/29ct1th0KvASIVJD2AX4JPAB8KCKeqwVGxOuSTgXmkMYRvQbcAlycbzAivi5pL+DTwEzgP0iXBhvOvgCw8U27l35jZmZWTuXFKCImlAg7rWRbz5O6hTeK+wfgH8q0WfSz8Qfxazuzo5mZDajye0ZmZmYuRmZmVjkXoyYd9eLPqk7BzGzYqfyekTUvP+YIPO7IzIY+nxmZmVnlXIzMzKxyLkZmZlY5FyMzM6ucOzAMA55E1cyGOp8ZNemFsQ0fBmtmZk1yMWrSyz31HplkZma7wpfphhlfsjOzochnRk0a17+u6hTMzIYdnxk16cD1q1lTdRIleaYGMxsqfGZkZmaVG5FnRpKOAK4D3gOsBf4R+FxEbK4yr3bz/SQz61YjrhhJ6gUWAk8AZwKHA1eRzhJnVJhaR7kwmVk3GXHFCLgA2BM4KyLWA/dJGgvMkvSlbN2I4ntLZla1kViMpgILCkXnFuBK4ATgzkqy6iLF4jQQFy0za5WRWIwmAd/Nr4iI5yT1Z9tGfDEqq2zRKsvFzWzkGonFqJfUaaGoL9u2A0nTgGnZ4gau/MCT7UmtLcbD0OiNriu3fjtkcs4ZajkPtXzBOXdCO/M9ZLCNI7EYNS0i5gJzq85jZ0haHBFTqs6jGc65/YZavuCcO6HKfEfiOKM+oN4Ec73ZNjMz67CRWIyWke4NbSXpIKAn22ZmZh02EovRfOAUSW/OrTsX2Ag8WE1KbTUULy865/YbavmCc+6EyvJVRFR17Epkg16fAB4ndec+DLgauDYiRsygVzOzbjLiihFsnQ5oDttPBzRruE8HZGbWrUZkMTIzs+4yEu8ZDRmSPiop6rwuyMVI0qWSVkraKOl7kt5Zp60jJN0vqV/SKkmXSxpdiCnVVi5+oqQbJD0mabOkB+rEdDy/wdoqmfPyOj/zF6vIWdI5kr4j6QVJGyQtkXR+nf0/LukpSa9mMSfViTlQ0rck/VLSGklzJPW0sq0y+Up6oM7PNyTt0el8s21nS3pI0ktZG09KmiFpTKv+HQsxrfgcl8m5az7Hxbi6IsKvLn0BHwUCeB/w7txrv1zMJaTOF58Efg+4hzRobf9cTC+wijRB7Mmk+fleAb5QOF7DtgrxZwIrgduApcADdWI6ml+jtkrmvBz458LP/JhCTEdyBn4A3Az8IXAi8HfZZ+KvcvufD2wGZmafla9kxzwyF7Mb6T7po8DpwIeBXwBfK+S7S22VzPcB0iwo7y681Ol8s+2fyH7Wv5+18emsjTld/Dkuk/NyuuRzXOr3XdW/cP0a5B9nWzHaa4DtewDrgM/m1v0KsDr/Acg+SH3A2Ny6vwX6a+vKtlU4/qjc9/Mo/GKvIr9GbTXKOVu/HPi7Bv82ncp5Qp1j3ww8m1t+Evi/+X8X4CfkfnGz7Zf2obl1fwhsAd7awrZ+s0S+DwDzGvx8O5XvWwc4/hdJ95PVqs9eq/9PDJZzF36O6+acf/ky3dB2POkX7K21FRHxCml+vam5uIEmh92TNDlsM22R276lC/MbtK0SOZfVqZyPqnPsHwEHAEg6DHhb4RhbSGd+xWM8EhHP5tZ9G9gEnNrCtn5rsHyb0Kl8Tx3g+C8BtUteXfc5LpFzWVXnvJWL0dDwtKQ3suvCn8itn0T6i++pQvxSth/YO4nCgN6IeI70F8ukXEyZtppRRX5l2irjzyRtkrRO0jxJxXm1qsz5PcBPc/tS3D87xjhJ+w5yjE3A04V8W9XWQPnWvD+7r9AvaYGkowvbO56v0j26Hkm/A1wI/H2kP++79nM8SM413fw53o7nputuPyddC/8hMBo4D7heUk9EXEO6RrshduyS3gf0SBqT/acrMzls2baaUUV+TU+EW8cdwMPA88Bk4DJgkaSjImJdLp+O55zdnP8Q8Ke5Y1Bn/77c9tVN5NuqtgbKF9Lg8puAn5Emz/wM6ef7johYnjtWp/N9Bdg9+/4rwMW5drr1czxQztDFn+N6XIy6WEQsABbkVs1X6nE0Q9KXK0pr2IuIi3KLiyQ9BPwY+BhwbRU5AUiaQLr/ckdE3FhVHmUNlG9EXJYLWyRpIekv6r/OXlU5njQt2G8CnyWNRfyLCvMpY8Ccu/VzPBBfpht65gHjgAmkvzj2qtN1shfoz53J9NF4ctiybTWjivzKtNWUiHicdKP8mNzqjuYsaRxpKqsVpN5g+Tyos39vYXvZfFvS1iD57iAiXgT+lZ37+bYk3yyPRyPi+xFxNemS159LOpwu/hwPkvMOuuFzPBgXo6Encl+XkS7fTSzEFK/dlpkctmxbzagivzJt7Yxg28++ozkrjYe5i3Rz+gMR0V/Yl+L+2fLLEbF6kGOMIU2Hlc93l9tqkO9Ayvx825LvAPk8mn09lKHzOc7nPJDKPseD5AS4GA1FZ5P6968AHgLWA+fUNma/CM4g/VVaU2Zy2LJtNaOK/Fo+Ea6kI0n/yZZUkPP3Sb3D3gqcGhH/mc8tIp4hdQ7IH2NUtlw8xnGFG9gfJN1vuLeFbd03WL71SNof+B12/Pl2It97B0jrt7OvzzJ0Psf5nHdQ8ee48f+9Rn2//aruBXyTNJhtKvAB4KvsOIDwElJvlb8ETgLuJhWrX83F9JI6Q9xHGrA2DdhA/YFtg7ZViO8hFcezSYMd/yO33FNFfo3aapQzaVDk10mXlt4H/DnwAvAM24+f6EjOpFmUg3QJ5t2F1+7Z/rVxNDOynG9k4IGfS4DTsn1eZOBBpDvVVqN8gaOzn8FHs/b/hPRX88vAwZ3ON9t+L/DfSf/P3g98Lvv531LV/7NGbTXKmS77HJf6fVf1L1y/BvnHgf9Busbbn/0HWwL8l0KMSL2Rns9iFgHvqtPWEaRR7xuzD8zngdE701YufgLbTvuLrwlV5TdYW41yJv2yvJ/UC+t10i+tG4EDqsiZNHBx0J9xtv/HSb3TXiNdrjmpzjF+nTS+ZgNpTMr/JvujoRC30201yhc4kDR6/+ekcT4vkf7omlRFvtm2z5OK1QZSb7BHgb8Cdqvy/9lgbTXKmS77HJf5feeJUs3MrHK+Z2RmZpVzMTIzs8q5GJmZWeVcjMzMrHIuRmZmVjkXIzMzq5yLkZmZVc7FyMzMKvf/AShvD+cko+USAAAAAElFTkSuQmCC\n"},"metadata":{"needs_background":"light"}}],"source":["# visualize blurry images\n","fd.vis.stats_gallery(metric='blur')"]},{"cell_type":"code","execution_count":23,"id":"150d2106","metadata":{"id":"150d2106","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1677675631824,"user_tz":-120,"elapsed":32160,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}},"outputId":"cab8b0b7-dd11-40c4-f767-22d656254bc6"},"outputs":[{"output_type":"stream","name":"stderr","text":["100%|██████████| 25/25 [00:00<00:00, 62.25it/s]\n"]},{"output_type":"stream","name":"stdout","text":["Stored mean visual view in fastdup_food101/galleries/stats_20230301125959/mean.html\n"]},{"output_type":"display_data","data":{"text/plain":[""],"text/html":[" \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," mean Image Report \n"," \n"," \n","\n","\n","\n"," \n"," \n"," \n"," \n"," \n","
\n","
\n","
mean Image Report
\n","
\n","
\n","
\n"," \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 2.0994 \n"," \n","\n"," filename \n"," food-101/images/breakfast_burrito/462294.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 8.9502 \n"," \n","\n"," filename \n"," food-101/images/spring_rolls/182658.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 11.9448 \n"," \n","\n"," filename \n"," food-101/images/ramen/1222396.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 12.5025 \n"," \n","\n"," filename \n"," food-101/images/samosa/987023.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 12.873 \n"," \n","\n"," filename \n"," food-101/images/oysters/933713.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 12.8861 \n"," \n","\n"," filename \n"," food-101/images/lasagna/1675979.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 13.0013 \n"," \n","\n"," filename \n"," food-101/images/pulled_pork_sandwich/555512.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 13.7173 \n"," \n","\n"," filename \n"," food-101/images/scallops/3314913.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 13.8949 \n"," \n","\n"," filename \n"," food-101/images/sashimi/2160399.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 14.2346 \n"," \n","\n"," filename \n"," food-101/images/bibimbap/1229620.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 14.4314 \n"," \n","\n"," filename \n"," food-101/images/crab_cakes/3694057.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 15.0424 \n"," \n","\n"," filename \n"," food-101/images/filet_mignon/3030737.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 15.2937 \n"," \n","\n"," filename \n"," food-101/images/pizza/3803596.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 15.7813 \n"," \n","\n"," filename \n"," food-101/images/sashimi/241368.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 15.8949 \n"," \n","\n"," filename \n"," food-101/images/beet_salad/2975894.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 15.9107 \n"," \n","\n"," filename \n"," food-101/images/beef_tartare/2559298.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 15.9927 \n"," \n","\n"," filename \n"," food-101/images/oysters/35376.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 16.188 \n"," \n","\n"," filename \n"," food-101/images/bread_pudding/444890.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 16.2864 \n"," \n","\n"," filename \n"," food-101/images/fish_and_chips/3471132.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 16.8724 \n"," \n","\n"," filename \n"," food-101/images/gnocchi/2624421.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 17.0322 \n"," \n","\n"," filename \n"," food-101/images/mussels/1525733.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 17.1672 \n"," \n","\n"," filename \n"," food-101/images/guacamole/485942.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 17.8383 \n"," \n","\n"," filename \n"," food-101/images/grilled_cheese_sandwich/3141489.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 17.8733 \n"," \n","\n"," filename \n"," food-101/images/cheese_plate/3119696.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 18.0098 \n"," \n","\n"," filename \n"," food-101/images/pho/123824.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n"," \n","
\n"," \n"," \n"," \n"," \n"," "]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":[""],"image/png":"iVBORw0KGgoAAAANSUhEUgAAAZgAAAENCAYAAAAykHOlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAiSklEQVR4nO3de5gddZ3n8fcn0RBaCDRJHFYkhAgaEWc0aWZXdB5GeBCCIC6SwQs7im4SVh0YcdkJmGjAcQjscHHJOElwZ0B5MshNkUvIEpCIIgtpcBEhyADhIhcJdgihA4Hw3T9+dZJK5aS7GrrO6T7n83qeek6fqm/9zu8UTb79q9+lFBGYmZkNthHNroCZmbUmJxgzM6uEE4yZmVXCCcbMzCrhBGNmZpVwgjEzs0o4wZiZWSWcYMzMrBJOMGZmVgknGLMGkLRa0qmS7pX0kqT/LelPJC2V9KKk5ZI6s9j/JOl2SWsl/T9Jf5kr5wRJD2TnPCJpVu7YX0p6UtLXJf1B0tOSTmj8tzVLnGDMGudTwKHAu4GjgKXA6cB40v+LJ0naA7ge+HtgN+C/A1dJGp+V8QfgSGAMcAJwvqQpuc/YHdgF2AP4EvBPtcRl1mhOMGaNc2FEPBsRvwduA/5vRNwTES8DPwY+CBwP3BARN0TE6xFxE7ASOAIgIq6PiIcjWQH8H+Avcp/xKnBmRLwaETcA64H3NO4rmm3hBGPWOM/mft5Q5/1OwF7A9Oz22FpJa4GPAP8BQNI0SXdI+mN27AhgXK6c5yPitdz73qxcs4Z7S7MrYGZbeQL4YUTMKB6QtANwFfDXwDUR8aqknwBqbBXNynELxmxouRQ4StJhkkZKGp113r8TGAXsADwHvCZpGvCxZlbWrC9OMGZDSEQ8ARxN6vx/jtSiORUYEREvAicBlwM9wGeBnzapqmb9kh84ZmZmVXALxszMKuEEY2ZmlXCCMTOzSjjBmJlZJTwPJjNu3LiYOHHi4Bfc3Z1ep04d/LLNzJqsu7t7TUSMr3fMCSYzceJEVq5cOfgFK5sDV0XZZmZNJumx7R3zLTIzM6uEE4yZmVXCCcbMzCrhBGNmZpVwJ3/VvBSPmbUpt2DMzKwSTjBmZlYJJ5iqTZ3qSZZm1pbcB1O1u+9udg3MzJrCCcasiSbOvn7zz6vnf7yJNTEbfL5FZmZmlXCCMTOzSlR2i0zSscApwHuAtwGPAT8EzomIjVnMamCvwqnPRsTuhbL2Ay4EPgSsBb4PnBERm3IxAk4D/hswDrgLOCkifj3IX81sQPK3wczaSZV9MGOBW4D/SUoKfw7MA3YHvpqLW0JKHjUb84VI6gSWA/cDRwPvAs4ltb7m5EJnA3OBU4FVpOS2XNL+EfHMIH0nMzMrqbIEExGLCrt+JmkM8BVJfxOxeYr70xFxRx9FnQjsCBwTEeuAm7Jy5kk6JyLWSRpNSjBnRcQCAEm/AlaTktmc+kU3wIwZTftoM7NmanQfzPPAqAGeMw1YliWXmstISeeg7P2BwBjg8lpARLwEXJud3zyLF6fNzKzNVJ5gJI2U1CHpI8BJwD/nWi8AX5K0UdILkq6UVOyTmUy65bVZRDwO9GbHajGbgIcK5z6QizEb0ibOvn7zZtYKGjEP5iVgh+znH5D6SGquAe4AngTeC3wLuE3S+yPihSymk9SHU9STHavFrM93+udiOiSNqg0syJM0E5gJMGHChAF+rZL8yGQza1ONSDAHAh2kTv5vAguALwNExMm5uNsk3Q78GjgBuKDqikXEYmAxQFdXVzXLHnd11T6skuJtaHqzrZDi+Z6EacNR5QkmImprpfxC0hrgEknnRsTDdWLvk/QgMCW3uwfYpU7RndmxWsxOkkYWWjGdQG+91ouZmVWr0Z38tWSzdx8xkW01qyj0o0jak9QqWpWLGQnsUyhrm/4bMzNrjEYnmA9nr4/WOyhpf1JS6M7tXgocJmnn3L7jgA3Aiuz97cA6YHqurA7gqOx8MzNrsCpn8t9ImiD5W9IIrw8DXwd+FBEPS/o4cDxwHfAUKbHMAR4HLs4VtZA0+uxqSWcDk0gTNs+rDV2OiJclzQfmSuphy0TLEWw9idPMzBqkyj6Yu4AvABOB14BHSEu5LMyOPwG8ndSZvytpjsyNwOn5OS8R0SPpENLggGtJI8rOJyWZvPmkhHIaaRWBlcChEfHs4H4ts21VPbTYqy7bcFTlTP65pKVbtnf8XuCQkmXdDxzcT0wA38k2MzNrMj8PpmorVza7BmZmTeEEUzVPsDSzNuXnwZiZWSWcYKo2c2bazMzajBNM1S66KG1mZm3GfTBmw4yHLNtw4QRj9gZ5WX2zvvkWmZmZVcIJxszMKuEEY2ZmlXAfTNWmTOk/xuwN8oPJbChzgqlad3f/MWZmLcgJxmwAPHLMrDz3wZiZWSWcYKompc3MrM04wZiZWSUqSzCSjpV0u6TnJb0s6UFJcySNysVI0umSnpC0QdLPJX2gTln7SbpZUq+kpySdKWlkIaZUWWatbOLs6zdvZs1WZQtmLHAL8F+BacC/AN8AzsvFzCY99fJs4ChgPbBc0u61AEmdwHIggKOBM4GvA2cUPq/fsszMrHGqfGTyosKun0kaA3xF0t8AO5CSwlkRsQBA0q+A1cBXgTnZeScCOwLHRMQ64KasnHmSzomIdZJGlyzLzMwapNF9MM8DtVtkBwJjgMtrByPiJeBaUounZhqwLEsuNZeRks5BAyzLzMwapPIEI2mkpA5JHwFOAv45IgKYDGwCHiqc8kB2rGYysCofEBGPA725uLJlmZlZgzRiouVLpNthAD8ATs1+7gTWR8SmQnwP0CFpVERszOLW1im3Jzs2kLK2ImkmMBNgwoQJA/pSpS0q3im04cSd5WZvXCMSzIFAB/DnwDeBBcCXG/C5/YqIxcBigK6urqjkQ/y4ZDNrU5UnmIi4O/vxF5LWAJdIOpfUuthJ0shCy6MT6M21OHqAXeoU3Zkdq8WUKcvMzBqk0Z38tWSzN6lfZSSwTyGm2OeyikI/iqQ9Sa2iVbmYMmU13uLFaTMzazONTjAfzl4fBW4H1gHTawcldZDmsCzNnbMUOEzSzrl9xwEbgBXZ+7JlNd6sWWkzM2szld0ik3QjaYLkb0kjvD5MmiD5o4h4OIuZD8yV1ENqaZxCSnoX5opaSBp9drWks4FJwDzgvNrQ5Yh4uWRZZm0jP0DBz4mxZqiyD+Yu4AvAROA14BHgNFLCqJlPSgKnkWb+rwQOjYhnawER0SPpENLggGtJI8rOJyUZBlKWmZk1TpUz+eeSlm7pKyaA72RbX3H3AwcPRllmZtYYXk3ZzMwq4SdamrWB4oRR98lYI7gFY2ZmlXALpmpRzQIBZmZDnVswZmZWCScYMzOrhG+RVW3q1PTa3d3celhpXkHZbHA4wVTt7rv7jzEza0G+RWZmZpVwC8asDXmdMmsEt2DMzKwSTjBmZlYJJxgzM6uE+2CqNmNGs2tgZtYUTjBV8+OSzaxNVXaLTNJ0ST+V9HtJ6yV1S/pMIeZWSVFnG12I20PSjyW9KGmNpAXZI5GLnzlD0kOSXs4+75Cqvp+ZmfWtyhbMKcCjwNeANcARwBJJ4yIi/xjjnwGnF859pfaDpLcCy4CNwKeBXYHzstfjc3GfIT0tcx7wC+AE4DpJB0TEfYP4vQamNoO/NqPfzKxNVJlgjoqINbn3t0h6Bynx5BPMHyPijj7KORZ4L7BPRDwKIOlV4DJJZ0TEQ1ncPOCSiPh2FrMC+CAwm1wiariurvTqVZXNrM1UdouskFxq7gHeMcCipgF31ZJL5iekFs3hAJImAe8GLs99/uvAFdn5ZmbWYI0epvwh4HeFfR+T1JttyyT9aeH4ZGBVfkdEbAQezo6Re90qDngA2E3S+DdfdTMzG4iGjSLLOtw/CXwxt3sFcAnw78BewDeA2yT9WUSszmI6gbV1iuzJjpF7Lcb15I4/V6dOM4GZABMmTCj7VawFeQVls8HXkAQjaSKwBLgmIi6u7Y+Ib+XCbpO0nNQK+dtsq1RELAYWA3R1dbmTxNpSMbl6bTIbLJXfIpO0G7AUeAz4XF+xEfEM8EtgSm53D7BLnfBOtrRQaq/FuM7CcTMza5BKE0w2V+U6YBRwZET0ljgtsq1mFVv6WGrljgImsaXPpfa6VVz2/o8Rsc3tMTMzq1aVEy3fQhrFtS9weET8ocQ5uwMfAfKPf1wKHCBpr9y+TwA7ADcCRMQjpMED03NljcjeL31z3+RNWrkybWZmbabKPpjvkSZXngyMlTQ2d+we4D3AWaQk9BgwATgNeB24IBd7Janz/2pJc0m3wc4HluTmwECaB3OppNWk22yfJyW3zw7y9xoYT7A0szZVZYL5WPb63TrH9gaeB0RKMmOBF4FbgU9GxOO1wIh4VdLhwALSPJdXgMuAU/MFRsS/SdoJ+DtgLvBb0m255s3iNzNrY5UlmIiYWCLsiJJlPUka4txf3EXARWXKbJiZM9OrF700szbj58FU7aKL0mZm1macYMzMrBKlEoyk91ddETMzay1lWzDfk3SnpC9Lqjfp0czMbCulOvkj4i8k7UtaR6xb0p3Av0bETZXWzqwiXnvMrHql+2CyOSdzSMOADwL+l6RVko6pqnJmZjZ8lWrBZEvonwB8HLiJ9DCxu7MHiP0KuLq6Kg5zU6b0H2Nm1oLKzoO5EPg+cHpEbKjtjIinJM2ppGatoru7/xizISR/+9ArK9ubUTbBfBzYEBGbYPM6X6MjojciflhZ7czMbNgq2wezHNgx974j22dmZlZX2QQzOiLW195kP3dUU6UWI6XNzKzNlE0wL0na3FstaSqwoY94MzNrc2X7YP4WuELSU6QVkHcHjquqUmZmNvyVnWh5l6TJpGe4ADwYEa9WVy0zMxvuBrJc/wHAxOycKZKIiB9UUiszMxv2yi52+UPgH0mPMz4g27r6OWe6pJ9K+r2k9ZK6JX2mTtwMSQ9JejmLOaROzB6SfizpRUlrJC2QtM0ggzJlmZlZY5RtwXQB+0VEDKDsU4BHga8Ba0gPF1siaVxEXAiQJZyFpMcd/4K0WsB1kg6oPYlS0luBZcBG4NPArsB52evxtQ8rU5aZmTVO2QRzH6lj/+kBlH1URKzJvb8lW1rmFNLKAJCSwSUR8W0ASSuADwKz2ZI8jgXeC+wTEY9mca8Cl0k6I1sjrWxZjbdoUdM+2sysmcommHHA/dkqyq/UdkbEJ7Z3QiG51NwDfApA0iTg3cDJuXNel3RFfh8wDbirllwyPyG1aA4HHhpAWY1Xe2SymVmbKZtg5g3S530I+F328+TsdVUh5gFgN0njI+K5LO7+fEBEbJT0cK6MsmVZG/MS/QPndcnszSg7THmFpL2AfSNiedbBPnIgH5R1uH+S9EwZgM7sdW0htCd3/LnstRhTi+vMxZYpq1inmcBMgAkTJvRZ/zds8eL06paMmbWZsqPIZgBXArUOhT1It6lKkTQRWAJcExEXD6iGFYqIxRHRFRFd48ePr+ZDZs1Km5lZmym7VMxXgA8D62Dzw8feXuZESbsBS4HHgM/lDtVaF8VHMHcWjvfUianF9RRi+yvLzMwapGyCeSUiNtbeSHoL0O+Q5exW2nXAKODIiOjNHa71l0wunDYZ+GOuz2RVMUbSKGBSroyyZZmZWYOUTTArJJ0O7CjpUOAK4Nq+TsiS0BXAvsDhEfGH/PGIeITU4T89d86I7P3SXOhS4ICsD6jmE8AOwI0DLMvMzBqk7Ciy2cCXgN8As4AbSE+47Mv3SJMrTwbGShqbO3ZPRLxCGp12qaTVwC+Bz5MS0mdzsVcC3wCuljSXdBvsfGBJbg4MJcsyM7MGKTuK7HXgomwr62PZ63frHNsbWB0R/yZpJ+DvgLnAb0m30jbPvI+IVyUdDiwALifNw7kMOLVQx37LMjOzximVYCQ9Sp0+l4iYtL1zImJimbIjot/EFRFPkoY4v+myzMysMQayFlnNaFLfxm6DX50WNKDl28yGruJEVU+8tP6U6uSPiOdz2+8j4gLAv11mZrZdZW+RTcm9HUFq0QzkWTJmTeHlYcyap2ySODf382vAauCvBr02rWjq1PTa3d3cepiZNVjZUWQfrboiLevuu5tdAzOzpih7i+yUvo5HxHmDUx0zM2sVAxlFdgDw0+z9UcCdwEPbPcPMzNpa2QTzTmBKRLwIIGkecH1ENO9JkWZmNqSVXYvsT0hPkKzZmO0zMzOrq2wL5gfAnZJ+nL3/JHBJJTUyM7OWUHYU2XckLQX+Itt1QkTcU121WsiMGc2ugVkl/Dhl689AJkt2AOsi4l8ljZe0d0Q8WlXFWkbtkclmZm2m7COTv0Vapfi0bNdbgUurqpSZmQ1/ZTv5/zPpIV8vAUTEU8DOVVWqpXR3exa/mbWlsrfINkZESAoASW+rsE6tpStbiNqrKptZmynbgrlc0iJgV0kzgOX4uStmZtaHfhOMJAE/Ij26+CrgPcA3I+LCEufuI2mRpHslbZJ0a52Y1ZKisD1TJ24/STdL6pX0lKQzJY0s1lXS6ZKekLRB0s8lfaC/eprZmzNx9vWbN7Oafm+RZbfGboiI9wM3DbD89wFHAHeQBgZszxIgn7DykzqR1ElqNd0PHA28i7TC8whgTi50NulxyacCq4BTgOWS9o+IbZKWmZlVp2wfzN2SDoiIuwZY/rURcQ2ApCuBcduJezoi7uijnBOBHYFjImIdcJOkMcA8SedExDpJo0kJ5qyIWJB95q9Ijxb4KlsnImtR/gvabOgo2wfzH4E7JD2c3e76jaR7+zspIl5/c9XbbBqwLEsuNZeRks5B2fsDgTHA5bnPfwm4NjvfzMwaqM8EI2lC9uNhwCTgYNJKykdmr4PlS5I2SnpB0pWS9iocn0y65bVZRDwO9GbHajGb2HaF5wdyMWZm1iD93SL7CWkV5cckXRURn6qgDteQ+mieBN4LfAu4TdL7I+KFLKYTWFvn3J7sWC1mfURsqhPTIWlURBT7dmYCMwEmTJhAJVaurKZcM7Mhrr8Eo9zPk6qoQEScnHt7m6TbgV8DJwAXVPGZuc9eDCwG6OrqqmaiSu2RyWZmbaa/PpjYzs+ViYj7gAeBKbndPcAudcI7s2O1mJ2KQ5ezmN5i68XMzKrVXwvmzyStI7Vkdsx+JnsfETGmonoFWye0VRT6USTtSVqAc1UuZiSwDylB1WzTf9NQM2emVy96aWZtps8WTESMjIgxEbFzRLwl+7n2vpLkIml/UlLIL+C1FDhMUn79s+OADcCK7P3twDpgeq6sDtJghKVV1LWUiy5Km5lZmxnIcv0Dlv0Df0T2dg9gjKRjs/c3AB8FjgeuA54iJZY5wOPAxbmiFgInAVdLOpvUHzQPOK82dDkiXpY0H5grqYctEy1HsPUkTmsxnvsytBT/e/hZMe2r0gQDvB24orCv9n5v4Iks5gJgV+B54Ebg9Pycl4jokXQIsIA0r2UtcD4pyeTNJyWU04CxwErg0Ih4dpC+j5mZlVRpgomI1Ww9Eq2eQ0qWdT9pHk5fMQF8J9vMzKyJys7kNzMzGxAnGDMzq0TVfTA2ZUr/MWZmLcgJpmp+XLKZtSnfIjMzs0o4wZiZWSV8i6xqykZpR0OWcmsLnlhpNjy4BWNmZpVwgjEzs0r4FpmZVSp/S9PrkrUXt2DMzKwSTjBmZlYJJxgzM6uE+2CqtmhRs2tgZtYUCs/PAKCrqytWrlzZ7GrYdnjuS+txh39rkNQdEV31jlV6i0zSPpIWSbpX0iZJt9aJkaTTJT0haYOkn0v6QJ24/STdLKlX0lOSzpQ08o2UZWZm1au6D+Z9pEcmPwj8bjsxs4G5wNnAUcB6YLmk3WsBkjqB5UAARwNnAl8HzhhoWQ23eHHazMzaTNUJ5tqI2DMipgO/LR6UNJqUFM6KiAURsRyYTkokX82FngjsCBwTETdFxEJScjlF0pgBltVYs2alzcyszVSaYCLi9X5CDgTGAJfnznkJuBaYloubBiyLiHW5fZeRks5BAyzLzMwaoNnDlCcDm4CHCvsfyI7l41blAyLicaA3F1e2LDMza4BmJ5hOYH1EbCrs7wE6JI3Kxa2tc35PdmwgZW0maaaklZJWPvfcc2/0O5iZWR3NTjBNFRGLI6IrIrrGjx/f7OqYmbWUZk+07AF2kjSy0PLoBHojYmMubpc653dmxwZSlg0TnvtiNrw1O8GsAkYC+5CGMtcU+1xWUehHkbQn0JGLK1uWmQ0BxT8gPPGy9TT7FtntwDrScGIAJHWQ5rAszcUtBQ6TtHNu33HABmDFAMtqrAg/zdLM2lKlLZjsH/gjsrd7AGMkHZu9vyEieiXNB+ZK6iG1NE4hJb4Lc0UtBE4CrpZ0NjAJmAecVxu6HBEvlyzLzMwaoOpbZG8Hrijsq73fG1gNzCclgdOAscBK4NCIeLZ2QkT0SDoEWECa17IWOJ+UZPL6LcvMzBqj0gQTEasB9RMTwHeyra+4+4GDB6Oshpo6Nb12dze3HmZmDdbsTv7Wd/fdza6BmVlTOMGY2ZCQH1XmEWWtodmjyMzMrEW5BWNDhidWmrUWt2DMzKwSTjBmZlYJ3yKr2owZza6B2bDjDv/W4ARTNT8u2czalG+RmZlZJdyCqVptBn9tRr9txSPHzFqXE0zVurrSq1dUNrM241tkZmZWCScYMzOrhBOMmZlVwn0w1nDu2DdrD01vwUj6gqSos52Yi5Gk0yU9IWmDpJ9L+kCdsvaTdLOkXklPSTpT0siGfiEzG1QTZ1+/1WbDx1BqwRwMbMi9fyT382xgLnAqWx6FvFzS/hHxDICkTmA5cD9wNPAu4FxSEp1Tee1tu/yPgll7GkoJ5q6IWF/cKWk0KcGcFRELsn2/Ij1u+atsSR4nAjsCx0TEOuAmSWOAeZLOyfY13sqVTflYM7Nma/otshIOBMYAl9d2RMRLwLXAtFzcNGBZIZFcRko6BzWgnvVNnepJlmbWloZSgnlY0muSHpQ0K7d/MrAJeKgQ/0B2LB+3Kh8QEY8DvYU4MzNrgKFwi+xpUv/KncBI4NPAQkkdEXE+0Amsj4hNhfN6gA5JoyJiYxa3tk75PdmxbUiaCcwEmDBhwiB8lTpmzkyvXvTSzNpM0xNMRCwDluV2Lc36XeZI+m7Fn70YWAzQ1dVVzVouF12UXp1gzAaFl/IfPobSLbK8K4HdgImkFshOdYYbdwK9WeuFLG6XOmV1ZsfMzKyBmt6C2Y7Iva4i3TrbB3gwF1Psc1lFoa9F0p5ARyHOGsBDk81sqLZgjgXWAI8BtwPrgOm1g5I6gKOApblzlgKHSdo5t+840tyaFVVX2MzMttb0Foykq0gd/PeSWirHZdtJEfE68LKk+cBcST1smWg5ArgwV9RC4CTgaklnA5OAecB5TZsD02bcajGzvKYnGNJtry8CewIizcT/64j4YS5mPimhnAaMBVYCh0bEs7WAiOiRdAiwgDRHZi1wPinJmJlZgyn8ICwgjSJbWcWs+9oky9qTLVuIWyw2lHhEWXNI6o6IrnrHhkILprW1WGJxUrGhqvi76YTTfEO1k9/MzIY5JxgzM6uEb5FVTUqvw7ivy7fFzOyNcIIxs5bkJWWaz7fIzMysEm7BmFnLc2umOZxgDPD/gGY2+JxgbBvu1DezweA+GDMzq4RbMFVbtKipH+/WiNnWPOO/cZxgqlZ7ZLKZWZtxgjGztuYBLtVxgqna4sXptYEtGd8WM7OhwAmmarNmpdcKE4wTipkNRU4wZmYZ3y4bXC2XYCTtR3qU8odIT7X8PnBGRGxqZr0Gg1sqZo3T1/9vTj7ltFSCkdQJLCc9dvlo4F3AuaT5PnOaWDUzayEe6lxOSyUY4ERgR+CYiFgH3CRpDDBP0jnZviHNrRSz4Wd7/9+2e+JptQQzDVhWSCSXAWcDBwHXNqVWBU4iZu2h3W+ztVqCmQzckt8REY9L6s2ODXqC6S9ZrC4ZZ2btZbD/TRiKCavVEkwnqWO/qCc7thVJM4Ha+OH1kh4c7Aqp9sPZRw520Y0wDljT7EoMYb4+ffP16dugXh+dPVglDdhe2zvQaglmQCJiMbC42fUYqiStjIiuZtdjqPL16ZuvT9/a4fq02mrKPcAudfZ3ZsfMzKxBWi3BrCL1tWwmaU+gIztmZmYN0moJZilwmKSdc/uOAzYAK5pTpWHNtw/75uvTN1+fvrX89VFENLsOgyabaHk/cB9paPIk4DzggojwREszswZqqQQDm5eKWcDWS8XMa4WlYszMhpOWSzBmZjY0tFofjA2ApC9IijrbibkYSTpd0hOSNkj6uaQPNLHalZG0j6RFku6VtEnSrXViSl0PSftJullSr6SnJJ0paWQjvkdVSl6f1XV+n56pE9dS10fSdEk/lfR7SesldUv6TJ24GZIekvRyFnNInZg9JP1Y0ouS1khaIKmjMd9kcLX1PBjb7GDSQIiaR3I/zwbmAqeSRuKdAiyXtH9EbPMPxzD3PuAI4A7grduJ6fd6tPCiq2WuD8AS0ormNRvzB1v0+pwCPAp8jTR58ghgiaRxEXEhQJZwFgLzgF8AJwDXSTogIu7LYt4KLCNds08Du5L6kXcFjm/c1xkkEeGtTTfgC0AAO23n+GjgBeCbuX1vA54D/r7Z9a/geozI/XwlcOsbuR7AaaR5V2Ny+/4H0JvfN9y2/q5Ptn818I/9lNNy1wcYV2ffEuDR3PsHgX/JX0/gN8CluX2fATYBe+f2/RXwOrBvs7/nQDffIrO+HAiMAS6v7YiIl0hruk1rVqWqEhGv9xNS9npsb9HVHUmLrg5LJa5PWS13fSKi3pIv9wDvAJA0CXg3W//uvA5cwba/O3dFxKO5fT8htWgOH9xaV88JxgAelvSapAclzcrtn0z6a+qhQvwDFCa0tomy12MyhYm9EfE46S/0drhuX5K0UdILkq6UVFyrql2uz4eA32U/175XccL3A8Buksbn4orXZiPwMMPw2rgPpr09TepPuBMYSbrnu1BSR0ScT1piZ31sO8S7B+iQNCr75W8XZa/HgBZdbTHXkPpongTeC3wLuE3S+yPihSym5a9P1nn/SeCL2a7a91pbCO3JHX+OFrs2TjBtLCKWkToUa5ZKGg3MkfTdJlXLhrGIODn39jZJtwO/JnVoX9CMOjWapImk/pdrIuLi5tamuXyLzIquBHYDJpL+atqpzvDRTqC3zVovUP56eNHVTKTRUQ8CU3K7W/b6SNqNtGTVY8Dncodq36v4vTsLx1vq2jjBWFHkXleRbp3tU4jZ5j5xmyh7Pbzo6taCLb9X0KLXJ5urch0wCjgyInpzh2vfq9iPMhn4Y0Q8l4srXptRpGWvht21cYKxomNJ4/gfA24H1gHTawez/4mOIv2V1m7KXg8vupqRtD/pH8zu3O6Wuz6S3kIaEbYvcHhE/CF/PCIeIXX45393RmTvi787BxQGRnwC2AG4sZraV8d9MG1M0lWkDv57SX+ZH5dtJ2VDKF+WNB+YK6mHLRMLR7D1RLqWkCWLI7K3ewBjJB2bvb8hInpLXo+FwEnA1ZJqi67OA84rDM0dVvq7PsBHSZMBrwOeIiWWOcDjwMW5olrx+nyPdG1OBsZKGps7dk9EvEL6jpdKWg38Evg8KSF9Nhd7JfAN0rWZS7pddj6wJCKKoxeHvmZPxPHWvA34B9L98V7SX4/dwH8pxIj0C/9kFnMb8MFm172i6zGRLbdzitvEgVwPYD/glizmaeDbwMhmf8cqrw/wp8DNpNFQrwLPkBLLO1r9+pAmmPb5u5PFzQD+HXgFuBs4pE5Z7yTNfVkPPA/8E9DR7O/4RjYvdmlmZpVwH4yZmVXCCcbMzCrhBGNmZpVwgjEzs0o4wZiZWSWcYMzMrBJOMGZmVgknGDMzq8T/B2JUN8elojjbAAAAAElFTkSuQmCC\n"},"metadata":{"needs_background":"light"}}],"source":["# visualize dark images\n","fd.vis.stats_gallery(metric='dark')"]},{"cell_type":"code","execution_count":24,"id":"3b40978e","metadata":{"id":"3b40978e","colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"status":"ok","timestamp":1677675695919,"user_tz":-120,"elapsed":31370,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}},"outputId":"30f0121c-594d-4e8a-e9fa-d49e3b412496"},"outputs":[{"output_type":"stream","name":"stderr","text":["100%|██████████| 25/25 [00:00<00:00, 77.49it/s]\n"]},{"output_type":"stream","name":"stdout","text":["Stored mean visual view in fastdup_food101/galleries/stats_20230301130104/mean.html\n"]},{"output_type":"display_data","data":{"text/plain":[""],"text/html":[" \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," mean Image Report \n"," \n"," \n","\n","\n","\n"," \n"," \n"," \n"," \n"," \n","
\n","
\n","
mean Image Report
\n","
\n","
\n","
\n"," \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 234.6172 \n"," \n","\n"," filename \n"," food-101/images/foie_gras/71445.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 233.1629 \n"," \n","\n"," filename \n"," food-101/images/foie_gras/3267247.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 231.8362 \n"," \n","\n"," filename \n"," food-101/images/risotto/71446.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 230.4804 \n"," \n","\n"," filename \n"," food-101/images/crab_cakes/445057.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 227.8502 \n"," \n","\n"," filename \n"," food-101/images/beef_carpaccio/3169022.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 227.2737 \n"," \n","\n"," filename \n"," food-101/images/foie_gras/71461.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 227.0895 \n"," \n","\n"," filename \n"," food-101/images/foie_gras/583722.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 226.0271 \n"," \n","\n"," filename \n"," food-101/images/bruschetta/2275519.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 225.7063 \n"," \n","\n"," filename \n"," food-101/images/hamburger/1585333.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 223.8659 \n"," \n","\n"," filename \n"," food-101/images/ice_cream/3214424.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 222.3561 \n"," \n","\n"," filename \n"," food-101/images/foie_gras/35694.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 220.7305 \n"," \n","\n"," filename \n"," food-101/images/hamburger/3578279.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 220.7305 \n"," \n","\n"," filename \n"," food-101/images/hamburger/3783650.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 220.2333 \n"," \n","\n"," filename \n"," food-101/images/macarons/2160644.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 219.5529 \n"," \n","\n"," filename \n"," food-101/images/sashimi/1245652.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 217.8185 \n"," \n","\n"," filename \n"," food-101/images/miso_soup/390698.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 216.7453 \n"," \n","\n"," filename \n"," food-101/images/baklava/1542333.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 216.5455 \n"," \n","\n"," filename \n"," food-101/images/omelette/1577972.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 215.9841 \n"," \n","\n"," filename \n"," food-101/images/cannoli/421018.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 215.8299 \n"," \n","\n"," filename \n"," food-101/images/ice_cream/612697.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 215.7077 \n"," \n","\n"," filename \n"," food-101/images/chocolate_cake/274940.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 215.5181 \n"," \n","\n"," filename \n"," food-101/images/beet_salad/3568296.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 214.709 \n"," \n","\n"," filename \n"," food-101/images/ravioli/3725624.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 213.5155 \n"," \n","\n"," filename \n"," food-101/images/red_velvet_cake/1843540.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n","
\n"," \n"," \n"," Info \n"," \n","\n"," mean \n"," 213.2664 \n"," \n","\n"," filename \n"," food-101/images/macarons/1057852.jpg \n"," \n"," \n","
\n","
\n","
\n","
\n"," \n","
\n"," \n"," \n"," \n"," \n"," "]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":[""],"image/png":"iVBORw0KGgoAAAANSUhEUgAAAZgAAAENCAYAAAAykHOlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAiWUlEQVR4nO3de5RdZZnn8e8v0RBKCBRJbEYkhAgaEbs1KXpGtBctLISgiIOk8cK0opOEURtaHKYDJhqwbQLTXBzSdlI43aCsNHJT5BIyBCSiyEAlOIgQpIFwEUSCFUKoQCA888e7T7Kzc6rqFNQ+t/p91trrnLP3s996z6bIU+9+L1sRgZmZ2XAb1egKmJlZe3KCMTOzUjjBmJlZKZxgzMysFE4wZmZWCicYMzMrhROMmZmVwgnGzMxK4QRjZmalcIIxqwNJayWdJuleSS9K+t+S/kTSMkkvSFohqTOL/U+S7pC0XtL/k/SXuXJOlPRAds4jkubkjv2lpCclfU3SHyQ9LenE+n9bs8QJxqx+PgkcDrwTOBpYBpwBTCT9v3iypL2AG4C/B/YA/jtwtaSJWRl/AD4GjANOBC6QNC33M/YEdgP2Ar4I/FMlcZnVmxOMWf1cFBHPRMTvgNuB/xsR90TES8CPgPcDJwA3RsSNEfFaRNwM9ABHAUTEDRHxcCQrgf8D/EXuZ7wCnBURr0TEjcBG4F31+4pm2zjBmNXPM7n3m6p83gXYB5iZ3R5bL2k98CHgPwBImiHpTkl/zI4dBUzIlfNcRLya+9yXlWtWd29qdAXMbDtPAD+IiFnFA5J2Aq4G/hq4NiJekfRjQPWtollt3IIxay6XAUdLOkLSaEljs877twNjgJ2AZ4FXJc0APtLIypoNxAnGrIlExBPAMaTO/2dJLZrTgFER8QJwMnAF0At8BvhJg6pqNij5gWNmZlYGt2DMzKwUTjBmZlYKJxgzMyuFE4yZmZXC82AyEyZMiMmTJze6GmZmQ7NqVXqdPr1BP37VuoiYWO2YE0xm8uTJ9PT0NLoaZmZDo2yebYP+/ZL0WH/HfIvMzMxK4QRjZmalcIIxM7NSOMGYmVkp3MlvZtbKmni5L7dgzMysFE4wZmZWCicYM7NWNn16wyZZDsZ9MGZmrWz16kbXoF9OMGYNNHnuDVvfr1340QbWxGz4+RaZmZmVwgnGzMxKUdotMknHAacC7wLeAjwG/AA4NyI2ZzFrgX0Kpz4TEXsWyjoAuAj4ALAe+B5wZkRsycUIOB34b8AE4G7g5Ij41TB/NbMhyd8GMxtJyuyDGQ/cCvxPUlL4c2ABsCfwlVzcUlLyqNicL0RSJ7ACuB84BngHcB6p9TUvFzoXmA+cBqwhJbcVkg6MiN8P03cyM7MalZZgImJJYddPJY0DvizpbyK2Tj99OiLuHKCok4CdgWMjYgNwc1bOAknnRsQGSWNJCebsiFgEIOmXwFpSMptXvWgzsxY3a1aja9CvevfBPAeMGeI5M4DlWXKpuJyUdA7JPh8MjAOuqARExIvAddn5Zmbtqbs7bU2o9AQjabSkDkkfAk4G/jnXegH4oqTNkp6XdJWkYp/MVNItr60i4nGgLztWidkCPFQ494FcjFlTmzz3hq2bWTuoxzyYF4GdsvffJ/WRVFwL3Ak8Cbwb+CZwu6T3RsTzWUwnqQ+nqDc7VonZmO/0z8V0SBpTGViQJ2k2MBtg0qRJQ/xaZmZNoMGPTB5IPRLMwUAHqZP/G8Ai4EsAEXFKLu52SXcAvwJOBC4su2IR0Q10A3R1dTXvkqTWct5oK6R4vidhWr+6utJrE66qXHqCiYjKOgY/l7QOuFTSeRHxcJXY+yQ9CEzL7e4FdqtSdGd2rBKzi6TRhVZMJ9BXrfViZmblqncnfyXZ7DtATGRbxRoK/SiS9ia1itbkYkYD+xXK2qH/xszM6qPeCeaD2euj1Q5KOpCUFFbldi8DjpC0a27f8cAmYGX2+Q5gAzAzV1YHcHR2vpmZ1VmZM/lvIk2Q/A1phNcHga8BP4yIhyV9FDgBuB54ipRY5gGPA5fkilpMGn12jaRzgCmkCZvnV4YuR8RLkhYC8yX1sm2i5Si2n8RpZmZ1UmYfzN3A54HJwKvAI6SlXBZnx58A3krqzN+dNEfmJuCM/JyXiOiVdBhpcMB1pBFlF5CSTN5CUkI5nbSKQA9weEQ8M7xfy2xHZQ8t9qrL1orKnMk/n7R0S3/H7wUOq7Gs+4FDB4kJ4NvZZmZmDebnwZiZtbKenkbXoF9OMGZmrawJJ1hW+HkwZmZWCicYM7NWNnt22pqQE4yZWSu7+OK0NSH3wZi1GA9ZtlbhBGP2OnlZfbOB+RaZmZmVwgnGzMxK4QRjZmalcB+MWQvzg8mMadMGj2kQJxgzs1a2atXgMQ3iBGM2BB45ZlY798GYmVkpnGDMzFqZlLYm5ARjZmalKC3BSDpO0h2SnpP0kqQHJc2TNCYXI0lnSHpC0iZJP5P0viplHSDpFkl9kp6SdJak0YWYmsoya2eT596wdTNrtDJbMOOBW4H/CswA/gX4OnB+LmYu6amX5wBHAxuBFZL2rARI6gRWAAEcA5wFfA04s/DzBi3LzMzqp8xHJi8p7PqppHHAlyX9DbATKSmcHRGLACT9ElgLfAWYl513ErAzcGxEbABuzspZIOnciNggaWyNZZmZWZ3Uuw/mOaByi+xgYBxwReVgRLwIXEdq8VTMAJZnyaXiclLSOWSIZZmZWZ2UnmAkjZbUIelDwMnAP0dEAFOBLcBDhVMeyI5VTAXW5AMi4nGgLxdXa1lmZlYn9Zho+SLpdhjA94HTsvedwMaI2FKI7wU6JI2JiM1Z3Poq5fZmx4ZS1nYkzQZmA0yaNGlIX8pGBneWW9NbUuyNaB71SDAHAx3AnwPfABYBX6rDzx1URHQD3QBdXV3R4OqYmQ1dkz4uGeqQYCJidfb255LWAZdKOo/UuthF0uhCy6MT6Mu1OHqB3aoU3Zkdq8TUUpaZmdVJvTv5K8lmX1K/ymhgv0JMsc9lDYV+FEl7k1pFa3IxtZRlZtZeurvT1oTqnWA+mL0+CtwBbABmVg5K6iDNYVmWO2cZcISkXXP7jgc2ASuzz7WWZWbWXubMSVsTKu0WmaSbSBMkf0Ma4fVB0gTJH0bEw1nMQmC+pF5SS+NUUtK7KFfUYtLos2sknQNMARYA51eGLkfESzWWZTZi5Aco+Dkx1ghl9sHcDXwemAy8CjwCnE5KGBULSUngdNLM/x7g8Ih4phIQEb2SDiMNDriONKLsAlKSYShlmZlZ/ZQ5k38+aemWgWIC+Ha2DRR3P3DocJRlZmb14dWUzcysFH6ipdkIUJww6j4Zqwe3YMzMrBRuwZiZtbJo3kVI3IIxM7NSOMGYmVkpfIvMrMArKFtLmT49va5a1dh6VOEEY2bWylavHjymQXyLzMzMSuEWjNkI5HXKrB7cgjEzs1I4wZiZWSmcYMzMrBTugzEza2WzZjW6Bv1ygjEza2VN+rhkKPEWmaSZkn4i6XeSNkpaJenThZjbJEWVbWwhbi9JP5L0gqR1khZlj0Qu/sxZkh6S9FL28w4r6/uZmdnAymzBnAo8CnwVWAccBSyVNCEi8o8x/ilwRuHclytvJL0ZWA5sBj4F7A6cn72ekIv7NOlpmQuAnwMnAtdLOigi7hvG72Vm1jwqM/grM/qbSJkJ5uiIWJf7fKukt5ESTz7B/DEi7hygnOOAdwP7RcSjAJJeAS6XdGZEPJTFLQAujYhvZTErgfcDc8klIjOzttLVlV6bcFXl0m6RFZJLxT3A24ZY1Azg7kpyyfyY1KI5EkDSFOCdwBW5n/8acGV2vpmZ1Vm9hyl/APhtYd9HJPVl23JJf1o4PhVYk98REZuBh7Nj5F63iwMeAPaQNPGNV93MzIaibqPIsg73TwBfyO1eCVwK/DuwD/B14HZJfxYRa7OYTmB9lSJ7s2PkXotxvbnjz1ap02xgNsCkSZNq/SrWhryCstnwq0uCkTQZWApcGxGXVPZHxDdzYbdLWkFqhfxttpUqIrqBboCurq7mu4FpVgfF5Oq1yWy4lH6LTNIewDLgMeCzA8VGxO+BXwDTcrt7gd2qhHeyrYVSeS3GdRaOm5lZnZSaYLK5KtcDY4CPRURfDadFtlWsYVsfS6XcMcAUtvW5VF63i8s+/zEidrg9ZmZm5SpzouWbSKO49geOjIg/1HDOnsCHgPyj2ZYBB0naJ7fv48BOwE0AEfEIafDAzFxZo7LPy97YNzEza2I9PWlrQmX2wXyXNLnyFGC8pPG5Y/cA7wLOJiWhx4BJwOnAa8CFudirSJ3/10iaT7oNdgGwNDcHBtI8mMskrSXdZvscKbl9Zpi/l5lZ82jCCZYVZSaYj2Sv36lybF/gOUCkJDMeeAG4DfhERDxeCYyIVyQdCSwizXN5GbgcOC1fYET8m6RdgL8D5gO/Id2W8yx+M7MGKC3BRMTkGsKOqrGsJ0lDnAeLuxi4uJYyzczawuzZ6bUJF73082DMzFrZxRenrQk5wZiZWSlqSjCS3lt2RczMrL3U2oL5rqS7JH1JUrVJj2ZmZtupqZM/Iv5C0v6kdcRWSboL+NeIuLnU2pmVxGuPmZWv5j6YbM7JPNIw4EOA/yVpjaRjy6qcmZm1rppaMNkS+icCHwVuJj1MbHX2ALFfAteUV0UzM+vXtGmDxzRIrfNgLgK+B5wREZsqOyPiKUnzSqmZmTVE/vahV1ZuAatWDR7TILUmmI8CmyJiC2xd52tsRPRFxA9Kq52ZmbWsWvtgVgA75z53ZPvMzMyqqjXBjI2IjZUP2fuOcqpkZmY1k9LWhGpNMC9K2tqTJGk6sGmAeDMzG+Fq7YP5W+BKSU+RVkDeEzi+rEqZmVnrq3Wi5d2SppKe4QLwYES8Ul61zMys1Q1luf6DgMnZOdMkERHfL6VWZmbW8mpd7PIHwD+SHmd8ULZ1DXLOTEk/kfQ7SRslrZL06SpxsyQ9JOmlLOawKjF7SfqRpBckrZO0SNIOgwxqKcvMzOqj1hZMF3BARMQQyj4VeBT4KrCO9HCxpZImRMRFAFnCWUx63PHPSasFXC/poMqTKCW9GVgObAY+BewOnJ+9nlD5YbWUZWZm9VNrgrmP1LH/9BDKPjoi1uU+35otLXMqaWUASMng0oj4FoCklcD7gblsSx7HAe8G9ouIR7O4V4DLJZ2ZrZFWa1lmZu1lyZJG16BftSaYCcD92SrKL1d2RsTH+zuhkFwq7gE+CSBpCvBO4JTcOa9JujK/D5gB3F1JLpkfk1o0RwIPDaEsM7P2UnlkchOqNcEsGKaf9wHgt9n7qdnrmkLMA8AekiZGxLNZ3P35gIjYLOnhXBm1lmUjmJfoHzqvS2ZvRK3DlFdK2gfYPyJWZB3so4fyg7IO90+QnikD0Jm9ri+E9uaOP5u9FmMqcZ252FrKKtZpNjAbYNKkSQPW38ysKXV3p9cmbMnUOopsFnAVULnZtxfpNlVNJE0GlgLXRsQlQ6phiSKiOyK6IqJr4sSJja6OmdnQzZmTtiZU61IxXwY+CGyArQ8fe2stJ0raA1gGPAZ8Nneo0rooPoK5s3C8t0pMJa63EDtYWWZmVie1JpiXI2Jz5YOkNwGDDlnObqVdD4wBPhYRfbnDlf6SqYXTpgJ/zPWZrCnGSBoDTMmVUWtZZmZWJ7UmmJWSzgB2lnQ4cCVw3UAnZEnoSmB/4MiI+EP+eEQ8Qurwn5k7Z1T2eVkudBlwUNYHVPFxYCfgpiGWZWZmdVLrKLK5wBeBXwNzgBtJT7gcyHdJkytPAcZLGp87dk9EvEwanXaZpLXAL4DPkRLSZ3KxVwFfB66RNJ90G+wCYGluDgw1lmVmZnVS6yiy14CLs61WH8lev1Pl2L7A2oj4N0m7AH8HzAd+Q7qVtnXmfUS8IulIYBFwBWkezuXAaYU6DlqWmZnVT00JRtKjVOlziYgp/Z0TEZNrKTsiBk1cEfEkaYjzGy7LzMzqYyhrkVWMJfVt7DH81TGzZlWcqOqJl01iSEtE1ldNnfwR8Vxu+11EXAj4t8vMzPpV6y2yabmPo0gtmqE8S8asIbw8jFnj1Jokzsu9fxVYC/zVsNfGzMyGZvr09LpqVWPrUUWto8g+XHZFzMzsdVi9utE16Fett8hOHeh4RJw/PNUxM7N2MZRRZAcBP8k+Hw3cBTzU7xlmZjai1Zpg3g5Mi4gXACQtAG6ICD8p0szMqqp1LbI/IT1BsmJzts/MzKyqWlsw3wfukvSj7PMngEtLqZGZmbWFWkeRfVvSMuAvsl0nRsQ95VXLzJqdH6fcJGbNanQN+jWUyZIdwIaI+FdJEyXtGxGPllUxMzOrQeWRyU2o1kcmf5O0SvHp2a43A5eVVSkzM2t9tXby/2fSQ75eBIiIp4Bdy6qUmZnVaNWqppzFD7XfItscESEpACS9pcQ6mZlZrbqyxe6bcFXlWlswV0haAuwuaRawAj93xczMBjBogpEk4IekRxdfDbwL+EZEXFTDuftJWiLpXklbJN1WJWatpChsv68Sd4CkWyT1SXpK0lmSRhfrKukMSU9I2iTpZ5LeN1g9zeyNmTz3hq2bWcWgt8iyW2M3RsR7gZuHWP57gKOAO0kDA/qzFMgnrPykTiR1klpN9wPHAO8grfA8CpiXC51LelzyacAa4FRghaQDI2KHpGVmZuWptQ9mtaSDIuLuIZZ/XURcCyDpKmBCP3FPR8SdA5RzErAzcGxEbABuljQOWCDp3IjYIGksKcGcHRGLsp/5S9KjBb7C9onI2pT/gjZrHrX2wfxH4E5JD2e3u34t6d7BToqI195Y9baaASzPkkvF5aSkc0j2+WBgHHBF7ue/CFyXnW9mZnU0YIKRNCl7ewQwBTiUtJLyx7LX4fJFSZslPS/pKkn7FI5PJd3y2ioiHgf6smOVmC3suMLzA7kYMzOrk8Fukf2YtIryY5KujohPllCHa0l9NE8C7wa+Cdwu6b0R8XwW0wmsr3Jub3asErMxIrZUiemQNCYiin07s4HZAJMmTcLMrOX09DS6Bv0aLMEo935KGRWIiFNyH2+XdAfwK+BE4MIyfmbuZ3cD3QBdXV3NN4jczGwwlUcmN6HB+mCin/eliYj7gAeBabndvcBuVcI7s2OVmF2KQ5ezmL5i68XMzMo1WAvmzyRtILVkds7ek32OiBhXUr2C7RPaGgr9KJL2Ji3AuSYXMxrYj5SgKnbovzEzaxuzZ6fXJlz0csAWTESMjohxEbFrRLwpe1/5XEpykXQgKSnkF9dZBhwhKb/+2fHAJmBl9vkOYAMwM1dWB2kwwrIy6mpm1nAXX5y2JjSU5fqHLPsH/qjs417AOEnHZZ9vBD4MnABcDzxFSizzgMeBS3JFLQZOBq6RdA6pP2gBcH5l6HJEvCRpITBfUi/bJlqOYvtJnNZmPPeluRT/e/hZMSNXqQkGeCtwZWFf5fO+wBNZzIXA7sBzwE3AGfk5LxHRK+kwYBFpXst64AJSkslbSEoopwPjgR7g8Ih4Zpi+j5mZ1ajUBBMRa9l+JFo1h9VY1v2keTgDxQTw7WwzM7MGqnUmv5mZ2ZA4wZiZWSnK7oMxM7MyTZs2eEyDOMGYmbWyJn1cMvgWmZmZlcQJxszMSuFbZNZyPLHSLEfZTJBovvV63YIxM7NSOMGYmVkpfIvMzEqVv6XpdclGFrdgzMysFE4wZmZWCicYMzMrhftgzMxa2ZIlja5BvxRNOHa6Ebq6uqKnp6fR1bB+eO5L+3GHf3uQtCoiuqodK/UWmaT9JC2RdK+kLZJuqxIjSWdIekLSJkk/k/S+KnEHSLpFUp+kpySdJWn06ynLzMzKV3YfzHtIj0x+EPhtPzFzgfnAOcDRwEZghaQ9KwGSOoEVQADHAGcBXwPOHGpZZmZtpbs7bU2o7ARzXUTsHREzgd8UD0oaS0oKZ0fEoohYAcwkJZKv5EJPAnYGjo2ImyNiMSm5nCpp3BDLMjNrH3PmpK0JlZpgIuK1QUIOBsYBV+TOeRG4DpiRi5sBLI+IDbl9l5OSziFDLMvMzOqg0cOUpwJbgIcK+x/IjuXj1uQDIuJxoC8XV2tZZmZWB41OMJ3AxojYUtjfC3RIGpOLW1/l/N7s2FDK2krSbEk9knqeffbZ1/sdzMysikYnmIaKiO6I6IqIrokTJza6OmZmbaXREy17gV0kjS60PDqBvojYnIvbrcr5ndmxoZRlLcJzX8xaW6MTzBpgNLAfaShzRbHPZQ2FfhRJewMdubhayzKzJlD8A8ITL9tPo2+R3QFsIA0nBkBSB2kOy7Jc3DLgCEm75vYdD2wCVg6xLDOz9hHRlE+zhJJbMNk/8EdlH/cCxkk6Lvt8Y0T0SVoIzJfUS2ppnEpKfBfliloMnAxcI+kcYAqwADi/MnQ5Il6qsSwzM6uDsm+RvRW4srCv8nlfYC2wkJQETgfGAz3A4RHxTOWEiOiVdBiwiDSvZT1wASnJ5A1alpmZ1UepCSYi1gIaJCaAb2fbQHH3A4cOR1lmZm1j+vT0umpVY+tRRaM7+c3M7I1YvbrRNeiXE4yZNYX8qDKPKGsPjR5FZmZmbcotGGsanlhp1l7cgjEzs1I4wZiZWSl8i8zMmo47/Idg1qxG16BfTjBmZq2sSR+XDL5FZmZmJXELxhrKI8fM3qDKDP7KjP4m4gRjZtbKurrSaxOuqOxbZGZmVgonGDMzK4UTjJmZlcJ9MFZ37tg3Gxka3oKR9HlJUWU7KRcjSWdIekLSJkk/k/S+KmUdIOkWSX2SnpJ0lqTRdf1CZjasJs+9YbvNWkcztWAOBTblPj+Sez8XmA+cxrZHIa+QdGBE/B5AUiewArgfOAZ4B3AeKYnOK7321i//o2A2MjVTgrk7IjYWd0oaS0owZ0fEomzfL0mPW/4K25LHScDOwLERsQG4WdI4YIGkc7N9Zmbtpaen0TXoV8NvkdXgYGAccEVlR0S8CFwHzMjFzQCWFxLJ5aSkc0gd6mlmVn/TpzflJEtorgTzsKRXJT0oaU5u/1RgC/BQIf6B7Fg+bk0+ICIeB/oKcWZmVgfNcIvsaVL/yl3AaOBTwGJJHRFxAdAJbIyILYXzeoEOSWMiYnMWt75K+b3ZsR1Img3MBpg0adIwfBUzszqbPTu9NuGilw1PMBGxHFie27Us63eZJ+k7Jf/sbqAboKurq/nWWTCzHXgp/4KLL06vTZhgmukWWd5VwB7AZFILZJcqw407gb6s9UIWt1uVsjqzY2ZmVkcNb8H0I3Kva0i3zvYDHszFFPtc1lDoa5G0N9BRiLM68NBkM2vWFsxxwDrgMeAOYAMws3JQUgdwNLAsd84y4AhJu+b2HU+aW7Oy7Aqbmdn2Gt6CkXQ1qYP/XlJL5fhsOzkiXgNekrQQmC+pl20TLUcBF+WKWgycDFwj6RxgCrAAON9zYOrDrRYzy2t4giHd9voCsDcg0kz8v46IH+RiFpISyunAeKAHODwinqkERESvpMOARaQ5MuuBC0hJxszM6kzRhA+paYSurq7oaeIZsc3ILRZrJiN2RFllkmXlyZZ1JmlVRHRVO9YMLRhrIU4q1qyKv5sjJuE0KLHUolk7+c3MrMU5wZiZWSl8i8wG5dtiZk1MSq9N2J/uBGNmbclLyjSeb5GZmVkp3IIxs7bn1kxjOMEY4P8BzWz4OcHYDtypb2bDwX0wZmZWCrdg2pxbI2bba7sZ/0uWNLoG/XKCMTNrZZVHJjchJxgzG9E8wKU8TjBtyLfFzEaQ7u702oQtGSeYNuCEYjaCzZmTXp1gzMyal2+XDa+2SzCSDiA9SvkDpKdafg84MyK2NLJew8EtFbP6Gej/Nyef2rRVgpHUCawgPXb5GOAdwHmk+T7zGlg1M2sjbTfUuSRtlWCAk4CdgWMjYgNws6RxwAJJ52b7mppbKWatp7//b0d64mm3BDMDWF5IJJcD5wCHANc1pFYFTiJmI8NIv83WbglmKnBrfkdEPC6pLzs27AnGycLMXo/h+rdj7bCUUo52SzCdpI79ot7s2HYkzQYqY/s2SnqwvKq1pAnAukZXoon5+gzM12dgw3J9tPWNBgor0z79HWi3BDMkEdENdDe6Hs1KUk9EdDW6Hs3K12dgvj4DGwnXp91WU+4FdquyvzM7ZmZmddJuCWYNqa9lK0l7Ax3ZMTMzq5N2SzDLgCMk7ZrbdzywCVjZmCq1NN8+HJivz8B8fQbW9tdHEdHoOgybbKLl/cB9pKHJU4DzgQsjwhMtzczqqK0SDGxdKmYR2y8Vs6AdlooxM2slbZdgzMysObRbH4wNgaTPS4oq20m5GEk6Q9ITkjZJ+pmk9zWw2qWRtJ+kJZLulbRF0m1VYmq6HpIOkHSLpD5JT0k6S9LoenyPstR4fdZW+X36fZW4tro+kmZK+omk30naKGmVpE9XiZsl6SFJL2Uxh1WJ2UvSjyS9IGmdpEWSOurzTYbXiJ4HY1sdShoIUfFI7v1cYD5wGmkk3qnACkkHRsQO/3C0uPcARwF3Am/uJ2bQ69HGi67Wcn0AlpJWNK/YnD/YptfnVOBR4KukyZNHAUslTYiIiwCyhLMYWAD8HDgRuF7SQRFxXxbzZmA56Zp9Ctid1I+8O3BC/b7OMIkIbyN0Az4PBLBLP8fHAs8D38jtewvwLPD3ja5/CddjVO79VcBtr+d6AKeT5l2Ny+37H0Bffl+rbYNdn2z/WuAfBymn7a4PMKHKvqXAo7nPDwL/kr+ewK+By3L7Pg1sAfbN7fsr4DVg/0Z/z6FuvkVmAzkYGAdcUdkRES+S1nSb0ahKlSUiXhskpNbr0d+iqzuTFl1tSTVcn1q13fWJiGpLvtwDvA1A0hTgnWz/u/MacCU7/u7cHRGP5vb9mNSiOXJ4a10+JxgDeFjSq5IelDQnt38q6a+phwrxD1CY0DpC1Ho9plKY2BsRj5P+Qh8J1+2LkjZLel7SVZKKa1WNlOvzAeC32fvK9ypO+H4A2EPSxFxc8dpsBh6mBa+N+2BGtqdJ/Ql3AaNJ93wXS+qIiAtIS+xsjB2HePcCHZLGZL/8I0Wt12NIi662mWtJfTRPAu8GvgncLum9EfF8FtP21yfrvP8E8IVsV+V7rS+E9uaOP0ubXRsnmBEsIpaTOhQrlkkaC8yT9J0GVctaWESckvt4u6Q7gF+ROrQvbESd6k3SZFL/y7URcUlja9NYvkVmRVcBewCTSX817VJl+Ggn0DfCWi9Q+/XwoquZSKOjHgSm5Xa37fWRtAdpyarHgM/mDlW+V/F7dxaOt9W1cYKxosi9riHdOtuvELPDfeIRotbr4UVXtxds+72CNr0+2VyV64ExwMcioi93uPK9iv0oU4E/RsSzubjitRlDWvaq5a6NE4wVHUcax/8YcAewAZhZOZj9T3Q06a+0kabW6+FFVzOSDiT9g7kqt7vtro+kN5FGhO0PHBkRf8gfj4hHSB3++d+dUdnn4u/OQYWBER8HdgJuKqf25XEfzAgm6WpSB/+9pL/Mj8+2k7MhlC9JWgjMl9TLtomFo9h+Il1byJLFUdnHvYBxko7LPt8YEX01Xo/FwMnANZIqi64uAM4vDM1tKYNdH+DDpMmA1wNPkRLLPOBx4JJcUe14fb5LujanAOMljc8duyciXiZ9x8skrQV+AXyOlJA+k4u9Cvg66drMJ90uuwBYGhHF0YvNr9ETcbw1bgP+gXR/vI/01+Mq4L8UYkT6hX8yi7kdeH+j617S9ZjMtts5xW3yUK4HcABwaxbzNPAtYHSjv2OZ1wf4U+AW0mioV4DfkxLL29r9+pAmmA74u5PFzQL+HXgZWA0cVqWst5PmvmwEngP+Ceho9Hd8PZsXuzQzs1K4D8bMzErhBGNmZqVwgjEzs1I4wZiZWSmcYMzMrBROMGZmVgonGDMzK4UTjJmZleL/A7HsM2qXQ/V2AAAAAElFTkSuQmCC\n"},"metadata":{"needs_background":"light"}}],"source":["# visualize bright images\n","fd.vis.stats_gallery(metric='bright')"]},{"cell_type":"markdown","id":"d2f376c8","metadata":{"id":"d2f376c8"},"source":["# Wrap up and collect file list"]},{"cell_type":"code","execution_count":25,"id":"f40c1b53","metadata":{"id":"f40c1b53","executionInfo":{"status":"ok","timestamp":1677675704901,"user_tz":-120,"elapsed":711,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[],"source":["# let's collect all files to remove\n","stats_df = fd.img_stats()\n","outlier_df = fd.outliers()\n","\n","# for outliers, we'll take images which their closest similarity is lower than 0.68\n","outliers_filtered = outlier_df[outlier_df.distance < 0.68].img_filename_outlier.tolist()\n","\n","images_to_remove = dict(\n"," duplicates = list(cluster_images_to_discard), # the duplicates threshold is 0.9, the same ccthreshold we ran fastdup with\n"," outliers_to_discard = outliers_filtered,\n"," invalid_images = fd.invalid_instances(),\n"," blurry_images = stats_df[stats_df['blur'] < 50], # we'll choose blur threshold as 50\n"," dark_images = stats_df[stats_df['mean'] < 13], # darkness threshold as 13\n"," bright_images = stats_df[stats_df['mean'] > 220.5], # and brightness threshold as 220.5\n",")"]},{"cell_type":"code","execution_count":26,"id":"b5e24a96","metadata":{"id":"b5e24a96","outputId":"a63f32ff-47c5-4eac-a3df-9181838b496e","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1677675707307,"user_tz":-120,"elapsed":284,"user":{"displayName":"Tom Shani","userId":"00667426488827942961"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["duplicates: 2153\n","outliers_to_discard: 174\n","invalid_images: 0\n","blurry_images: 30\n","dark_images: 6\n","bright_images: 13\n","Total - 2368 unique images\n"]}],"source":["# we'll create a set of unique images to remove\n","images_to_discard = []\n","\n","for key, data_slice in images_to_remove.items():\n"," ext = data_slice if isinstance(data_slice, list) else data_slice['img_filename'].tolist()\n"," print(f\"{key}: {len(data_slice)}\")\n"," images_to_discard.extend(ext)\n","print(f\"Total - {len(set(images_to_discard))} unique images\")"]},{"cell_type":"markdown","id":"17ceb0d6","metadata":{"id":"17ceb0d6"},"source":["# Summary \n","Even under restrictive thresholds, we have found 2,368 images that should not be included in our labeling or training using this data."]},{"cell_type":"code","source":[],"metadata":{"id":"Y1shK2sIQ-oL"},"id":"Y1shK2sIQ-oL","execution_count":null,"outputs":[]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.16"},"vscode":{"interpreter":{"hash":"5b6e8fba36db23bc4d54e0302cd75fdd75c29d9edcbab68d6cfc74e7e4b30305"}},"colab":{"provenance":[]}},"nbformat":4,"nbformat_minor":5}