{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " Try in Google Colab\n", " \n", " \n", " \n", " \n", " Share via nbviewer\n", " \n", " \n", " \n", " \n", " View on GitHub\n", " \n", " \n", " \n", " \n", " Download notebook\n", " \n", "
\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from os import path\n", "import glob\n", "import json\n", "\n", "import numpy as np\n", "import imagesize\n", "\n", "import fiftyone as fo\n", "\n", "# set to download path\n", "image_defect_root = '/PATH TO ARMBENCH DATASET GOES HERE'\n", "\n", "# maximum number of groups to load (set to None for entire dataset)\n", "max_groups = 100\n", "\n", "data_root = path.join(image_defect_root,'data')\n", "train_csv = path.join(image_defect_root,'train.csv')\n", "test_csv = path.join(image_defect_root,'test.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "def readlines(f):\n", " with open(f,'r') as fh:\n", " lines = [line.strip() for line in fh]\n", " return lines\n", "\n", "\n", "def load_json(f):\n", " with open(f,'r') as fh:\n", " j = json.load(fh)\n", " return j\n", "\n", "\n", "def parse_data_dir(data_dir):\n", " \"\"\"Parse one data directory (group)\n", " \n", " Args:\n", " data_dir: full path to a single data folder, eg /data/\n", "\n", " Returns:\n", " id, \n", " \"\"\"\n", " \n", " id = path.basename(data_dir)\n", " jpg_pat = path.join(data_dir,'*.jpg')\n", " ims = sorted(glob.glob(jpg_pat))\n", " jsons = [path.splitext(x)[0]+'.json' for x in ims]\n", " jsons = [load_json(x) for x in jsons]\n", " \n", " imsbase = [path.basename(x) for x in ims]\n", " imskey = [path.splitext(x)[0] for x in imsbase]\n", " json_files = [path.join(data_dir,x+'.json') for x in imskey] \n", " jsons = [load_json(x) for x in json_files]\n", " \n", " for im, json in zip(ims,jsons): \n", " imbase = path.basename(im)\n", " imkey = path.splitext(imbase)[0]\n", " assert json['id']==imbase or json['id']==imkey\n", " assert imkey.startswith(id + '_')\n", " slice = imkey[len(id)+1:]\n", " \n", " imw,imh = imagesize.get(im) \n", " new_info = {\n", " 'filepath': im,\n", " 'imw': imw,\n", " 'imh': imh,\n", " 'slice': slice,\n", " }\n", " json.update(new_info)\n", " \n", " return id, jsons\n", "\n", "\n", "def parse_all_data_dirs():\n", " \"\"\"Parse all data folders, up to max_groups\n", " \n", " Returns:\n", " list of (id,jsons)\n", " \"\"\"\n", " \n", " data_dirs = sorted(glob.glob(path.join(data_root,'*')))\n", " data_dirs = data_dirs[:max_groups]\n", " data_dir_infos = [parse_data_dir(x) for x in data_dirs]\n", " return data_dir_infos" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_set = set(readlines(train_csv))\n", "test_set = set(readlines(test_csv))\n", "data_dir_infos = parse_all_data_dirs()\n", "\n", "dataset = fo.Dataset('ARMBench-Image-Defect-Detection')\n", "dataset.persistent = True\n", "\n", "samples_all = []\n", " \n", "for id, grp_info in data_dir_infos:\n", "\n", " group = fo.Group() \n", " \n", " for info in grp_info:\n", " if id in train_set:\n", " tags = ['train']\n", " elif id in test_set:\n", " tags = ['test']\n", " else:\n", " tags = []\n", " \n", " if info['label']:\n", " tags.append(info['label'])\n", " if info['sublabel']:\n", " tags.append(info['sublabel'])\n", " \n", " sample = fo.Sample(filepath=info['filepath'], \n", " tags=tags,\n", " group=group.element(info['slice']))\n", " \n", " imw = info['imw']\n", " imh = info['imh']\n", " poly_pts = info['polygon']\n", " if poly_pts:\n", " poly_pts = np.array(poly_pts,dtype=np.float64)\n", " poly_pts[:,0] /= imw\n", " poly_pts[:,1] /= imh\n", " polyline = fo.Polyline(points=[poly_pts.tolist()],filled=True)\n", " detections = fo.Polylines(polylines=[polyline]).to_detections(frame_size=(imw,imh))\n", " sample['object'] = detections\n", "\n", " samples_all.append(sample)\n", " \n", "dataset.add_samples(samples_all)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "session = fo.launch_app(dataset)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import fiftyone.brain as fob\n", "from fiftyone import ViewField as F" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dataset = dataset.select_group_slices('4') \\\n", " .filter_labels('object',F()) \\\n", " .clone(name='ArmBench-Image-Defect-Slice4',persistent=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "labels = {\n", " 'book': ['book_jacket','open_book_jacket','open_book'],\n", " 'open_box': ['open_box'],\n", " 'partial_box':['partial_box'],\n", " 'crush_box':['crush_box'],\n", " 'bag': ['empty_bag','torn_bag'],\n", " 'multi_pick': ['multi_pick'],\n", " 'nominal': ['nominal'],\n", "}\n", "\n", "# set default defect label; this is overwritten for most samples\n", "dataset.set_values('object.detections.label',[['other_defect']]*len(dataset))\n", "\n", "for ty,tags in labels.items():\n", " view = dataset.match_tags(tags)\n", " view.set_values('object.detections.label',[[ty]]*len(view))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fob.compute_visualization(dataset,\n", " patches_field='object',\n", " embeddings='clip_embeddings',\n", " brain_key='object_clip',\n", " model='clip-vit-base32-torch')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "view_defects = dataset.match_tags('nominal',bool=False) \n", "\n", "fob.compute_visualization(view_defects,\n", " patches_field='object',\n", " embeddings='clip_embeddings',\n", " brain_key='dets_clip')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "session = fo.launch_app(view_defects)" ] } ], "metadata": { "language_info": { "name": "python" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }