{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<!-- Autogenerated by `scripts/make_examples.py` -->\n",
    "<table align=\"left\">\n",
    "    <td>\n",
    "        <a target=\"_blank\" href=\"https://colab.research.google.com/github/voxel51/fiftyone-examples/blob/master/examples/armbench_defect_detection.ipynb\">\n",
    "            <img src=\"https://user-images.githubusercontent.com/25985824/104791629-6e618700-5769-11eb-857f-d176b37d2496.png\" height=\"32\" width=\"32\">\n",
    "            Try in Google Colab\n",
    "        </a>\n",
    "    </td>\n",
    "    <td>\n",
    "        <a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/voxel51/fiftyone-examples/blob/master/examples/armbench_defect_detection.ipynb\">\n",
    "            <img src=\"https://user-images.githubusercontent.com/25985824/104791634-6efa1d80-5769-11eb-8a4c-71d6cb53ccf0.png\" height=\"32\" width=\"32\">\n",
    "            Share via nbviewer\n",
    "        </a>\n",
    "    </td>\n",
    "    <td>\n",
    "        <a target=\"_blank\" href=\"https://github.com/voxel51/fiftyone-examples/blob/master/examples/armbench_defect_detection.ipynb\">\n",
    "            <img src=\"https://user-images.githubusercontent.com/25985824/104791633-6efa1d80-5769-11eb-8ee3-4b2123fe4b66.png\" height=\"32\" width=\"32\">\n",
    "            View on GitHub\n",
    "        </a>\n",
    "    </td>\n",
    "    <td>\n",
    "        <a href=\"https://github.com/voxel51/fiftyone-examples/raw/master/examples/armbench_defect_detection.ipynb\" download>\n",
    "            <img src=\"https://user-images.githubusercontent.com/25985824/104792428-60f9cc00-576c-11eb-95a4-5709d803023a.png\" height=\"32\" width=\"32\">\n",
    "            Download notebook\n",
    "        </a>\n",
    "    </td>\n",
    "</table>\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from os import path\n",
    "import glob\n",
    "import json\n",
    "\n",
    "import numpy as np\n",
    "import imagesize\n",
    "\n",
    "import fiftyone as fo\n",
    "\n",
    "# set to download path\n",
    "image_defect_root = '/PATH TO ARMBENCH DATASET GOES HERE'\n",
    "\n",
    "# maximum number of groups to load (set to None for entire dataset)\n",
    "max_groups = 100\n",
    "\n",
    "data_root = path.join(image_defect_root,'data')\n",
    "train_csv = path.join(image_defect_root,'train.csv')\n",
    "test_csv = path.join(image_defect_root,'test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def readlines(f):\n",
    "    with open(f,'r') as fh:\n",
    "        lines = [line.strip() for line in fh]\n",
    "    return lines\n",
    "\n",
    "\n",
    "def load_json(f):\n",
    "    with open(f,'r') as fh:\n",
    "        j = json.load(fh)\n",
    "    return j\n",
    "\n",
    "\n",
    "def parse_data_dir(data_dir):\n",
    "    \"\"\"Parse one data directory (group)\n",
    "    \n",
    "    Args:\n",
    "        data_dir: full path to a single data folder, eg <image_defect_root>/data/<id>\n",
    "\n",
    "    Returns:\n",
    "        id, <list of dict>\n",
    "    \"\"\"\n",
    "    \n",
    "    id = path.basename(data_dir)\n",
    "    jpg_pat = path.join(data_dir,'*.jpg')\n",
    "    ims = sorted(glob.glob(jpg_pat))\n",
    "    jsons = [path.splitext(x)[0]+'.json' for x in ims]\n",
    "    jsons = [load_json(x) for x in jsons]\n",
    "    \n",
    "    imsbase = [path.basename(x) for x in ims]\n",
    "    imskey = [path.splitext(x)[0] for x in imsbase]\n",
    "    json_files = [path.join(data_dir,x+'.json') for x in imskey]    \n",
    "    jsons = [load_json(x) for x in json_files]\n",
    "    \n",
    "    for im, json in zip(ims,jsons): \n",
    "        imbase = path.basename(im)\n",
    "        imkey = path.splitext(imbase)[0]\n",
    "        assert json['id']==imbase or json['id']==imkey\n",
    "        assert imkey.startswith(id + '_')\n",
    "        slice = imkey[len(id)+1:]\n",
    "        \n",
    "        imw,imh = imagesize.get(im) \n",
    "        new_info = {\n",
    "            'filepath': im,\n",
    "            'imw': imw,\n",
    "            'imh': imh,\n",
    "            'slice': slice,\n",
    "        }\n",
    "        json.update(new_info)\n",
    "        \n",
    "    return id, jsons\n",
    "\n",
    "\n",
    "def parse_all_data_dirs():\n",
    "    \"\"\"Parse all data folders, up to max_groups\n",
    "    \n",
    "    Returns:\n",
    "        list of (id,jsons)\n",
    "    \"\"\"\n",
    "    \n",
    "    data_dirs = sorted(glob.glob(path.join(data_root,'*')))\n",
    "    data_dirs = data_dirs[:max_groups]\n",
    "    data_dir_infos = [parse_data_dir(x) for x in data_dirs]\n",
    "    return data_dir_infos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_set = set(readlines(train_csv))\n",
    "test_set = set(readlines(test_csv))\n",
    "data_dir_infos = parse_all_data_dirs()\n",
    "\n",
    "dataset = fo.Dataset('ARMBench-Image-Defect-Detection')\n",
    "dataset.persistent = True\n",
    "\n",
    "samples_all = []\n",
    "    \n",
    "for id, grp_info in data_dir_infos:\n",
    "\n",
    "    group = fo.Group()        \n",
    "        \n",
    "    for info in grp_info:\n",
    "        if id in train_set:\n",
    "            tags = ['train']\n",
    "        elif id in test_set:\n",
    "            tags = ['test']\n",
    "        else:\n",
    "            tags = []\n",
    "            \n",
    "        if info['label']:\n",
    "            tags.append(info['label'])\n",
    "        if info['sublabel']:\n",
    "            tags.append(info['sublabel'])\n",
    "                \n",
    "        sample = fo.Sample(filepath=info['filepath'], \n",
    "                           tags=tags,\n",
    "                           group=group.element(info['slice']))\n",
    "            \n",
    "        imw = info['imw']\n",
    "        imh = info['imh']\n",
    "        poly_pts = info['polygon']\n",
    "        if poly_pts:\n",
    "            poly_pts = np.array(poly_pts,dtype=np.float64)\n",
    "            poly_pts[:,0] /= imw\n",
    "            poly_pts[:,1] /= imh\n",
    "            polyline = fo.Polyline(points=[poly_pts.tolist()],filled=True)\n",
    "            detections = fo.Polylines(polylines=[polyline]).to_detections(frame_size=(imw,imh))\n",
    "            sample['object'] = detections\n",
    "\n",
    "        samples_all.append(sample)\n",
    "        \n",
    "dataset.add_samples(samples_all)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "session = fo.launch_app(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import fiftyone.brain as fob\n",
    "from fiftyone import ViewField as F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = dataset.select_group_slices('4') \\\n",
    "                 .filter_labels('object',F()) \\\n",
    "                 .clone(name='ArmBench-Image-Defect-Slice4',persistent=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = {\n",
    "    'book': ['book_jacket','open_book_jacket','open_book'],\n",
    "    'open_box': ['open_box'],\n",
    "    'partial_box':['partial_box'],\n",
    "    'crush_box':['crush_box'],\n",
    "    'bag': ['empty_bag','torn_bag'],\n",
    "    'multi_pick': ['multi_pick'],\n",
    "    'nominal': ['nominal'],\n",
    "}\n",
    "\n",
    "# set default defect label; this is overwritten for most samples\n",
    "dataset.set_values('object.detections.label',[['other_defect']]*len(dataset))\n",
    "\n",
    "for ty,tags in labels.items():\n",
    "    view = dataset.match_tags(tags)\n",
    "    view.set_values('object.detections.label',[[ty]]*len(view))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fob.compute_visualization(dataset,\n",
    "                          patches_field='object',\n",
    "                          embeddings='clip_embeddings',\n",
    "                          brain_key='object_clip',\n",
    "                          model='clip-vit-base32-torch')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "view_defects = dataset.match_tags('nominal',bool=False) \n",
    "\n",
    "fob.compute_visualization(view_defects,\n",
    "                          patches_field='object',\n",
    "                          embeddings='clip_embeddings',\n",
    "                          brain_key='dets_clip')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "session = fo.launch_app(view_defects)"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}