{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import glob\n", "import skimage.io as io\n", "import os.path\n", "import tensorflow as tf" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def fimg_to_fmask(img_path):\n", " # convert an image file path into a corresponding mask file path \n", " dirname, basename = os.path.split(img_path)\n", " maskname = basename.replace(\".tif\", \"_mask.tif\")\n", " return os.path.join(dirname, maskname)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "number of image segmentation pairs: 0\n" ] } ], "source": [ "origin_images_subset = [img for img in glob.glob(\"train_subset/*.tif\") if 'mask' not in img]\n", "paired_images_subset = [(img, fimg_to_fmask(img)) for img in origin_images_subset]\n", "print(\"number of image segmentation pairs: \", len(paired_images_subset))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "number of image segmentation pairs: 0\n" ] } ], "source": [ "origin_images_full = [img for img in glob.glob(\"train/*.tif\") if 'mask' not in img]\n", "paired_images_full = [(img, fimg_to_fmask(img)) for img in origin_images_full]\n", "print(\"number of image segmentation pairs: \", len(paired_images_full))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: 'train_subset/1_1.tif'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'matplotlib'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# check an image instance\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mimg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'train_subset/1_1.tif'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'train_subset/1_1_mask.tif'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda/lib/python3.5/site-packages/skimage/io/_io.py\u001b[0m in \u001b[0;36mimread\u001b[0;34m(fname, as_grey, plugin, flatten, **plugin_args)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mfile_or_url_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m \u001b[0mimg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcall_plugin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'imread'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplugin\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mplugin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mplugin_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ndim'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda/lib/python3.5/site-packages/skimage/io/manage_plugins.py\u001b[0m in \u001b[0;36mcall_plugin\u001b[0;34m(kind, *args, **kwargs)\u001b[0m\n\u001b[1;32m 209\u001b[0m (plugin, kind))\n\u001b[1;32m 210\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 211\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 212\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 213\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda/lib/python3.5/site-packages/skimage/io/_plugins/tifffile_plugin.py\u001b[0m in \u001b[0;36mimread\u001b[0;34m(fname, dtype, **kwargs)\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'img_num'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'key'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'img_num'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 29\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 30\u001b[0m \u001b[0mtif\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTiffFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtif\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'train_subset/1_1.tif'" ] } ], "source": [ "%matplotlib inline\n", "# check an image instance\n", "img = io.imread('train_subset/1_1.tif')\n", "mask =io.imread('train_subset/1_1_mask.tif')\n", "print(type(img))\n", "print(img.shape)\n", "io.imshow(img)\n", "io.show()\n", "io.imshow(mask)\n", "io.show()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] } ], "source": [ "from keras.preprocessing.image import ImageDataGenerator\n", "import numpy as np\n", "\n", "def image_augmentation(img, save_dir):\n", " datagen = ImageDataGenerator(\n", " rotation_range=180,\n", " horizontal_flip=True,\n", " fill_mode='nearest')\n", " img = np.expand_dims(img, 0)\n", " img = np.expand_dims(img, -1)\n", " \n", " i = 0\n", " for batch in datagen.flow(img, batch_size=1,\n", " save_to_dir=save_dir, save_prefix=\"mask\", save_format='jpeg'):\n", " i += 1\n", " if i > 10:\n", " break" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def images_split(paired_images, full=False, train=True):\n", " \n", " fcn_img = \"data_fcn\"\n", " fcn_mask = \"data_fcn\"\n", " simple_cnn_img = \"data_simple_cnn\"\n", " simple_cnn_mask = \"data_simple_cnn\"\n", " \n", " if full:\n", " fcn_img += \"_full\"\n", " fcn_mask += \"_full\"\n", " simple_cnn_img += \"_full\"\n", " simple_cnn_mask += \"_full\"\n", " \n", " if train:\n", " fcn_img += \"/train/images/images/\"\n", " fcn_mask += \"/train/masks/masks/\"\n", " simple_cnn_img += \"/train/no_mask/\"\n", " simple_cnn_mask += \"/train/mask/\"\n", " else: \n", " fcn_img += \"/validation/images/images/\"\n", " fcn_mask += \"/validation/masks/masks/\"\n", " simple_cnn_img += \"/validation/no_mask/\"\n", " simple_cnn_mask += \"/validation/mask/\"\n", " \n", " \n", " count_no_mask = 1\n", " count_mask = 1\n", " count_fcn = 1\n", " for raw_img, raw_mask in paired_images:\n", " img = io.imread(raw_img)\n", " mask = io.imread(raw_mask) / 255\n", " for i in range(6):\n", " for j in range(5):\n", " small_img = img[i*70:(i+1)*70, j*116:(j+1)*116]\n", " small_mask = mask[i*70:(i+1)*70, j*116:(j+1)*116]\n", " io.imsave(fcn_img + str(count_fcn) + \".jpg\", small_img / 255)\n", " io.imsave(fcn_mask + str(count_fcn) + \"_mask.jpg\", small_mask)\n", " count_fcn += 1\n", " if np.sum(mask[i*70:(i+1)*70, j*116:(j+1)*116]) >= 400:\n", " io.imsave(simple_cnn_mask + str(count_mask) + \"_mask.jpg\", small_img)\n", " count_mask += 1\n", " else:\n", " io.imsave(simple_cnn_img + str(count_no_mask) + \".jpg\", small_img)\n", " count_no_mask += 1\n", " print(\"Finished splitting and saving images and segmentations\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "images_split(paired_images_subset[:480], full=False, train=True)\n", "images_split(paired_images_subset[480:], full=False, train=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "images with no mask: 13982\n", "images with mask: 10275\n", "mask to no mask ratio 0.7348734086682878\n" ] } ], "source": [ "imgs_no_mask = [img for img in glob.glob(\"data_simple_cnn/train/no_mask/*\")]\n", "imgs_mask = [img for img in glob.glob(\"data_simple_cnn/train/mask/*\")]\n", "\n", "print(\"images with no mask: \", len(imgs_no_mask))\n", "print(\"images with mask: \", len(imgs_mask))\n", "print(\"mask to no mask ratio\", float(len(imgs_mask))/len(imgs_no_mask))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "# 5635\n", "images_split(paired_images_full[:5000], full=True, train=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "images_split(paired_images_full[5000:], full=True, train=False)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "images with no mask: 323227\n", "images with mask: 350497\n", "mask to no mask ratio 1.0843679519347085\n" ] } ], "source": [ "imgs_no_mask = [img for img in glob.glob(\"data_simple_cnn_full/train/no_mask/*\")]\n", "imgs_mask = [img for img in glob.glob(\"data_simple_cnn_full/train/mask/*\")]\n", "\n", "print(\"images with no mask: \", len(imgs_no_mask))\n", "print(\"images with mask: \", len(imgs_mask))\n", "print(\"mask to no mask ratio\", float(len(imgs_mask))/len(imgs_no_mask))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from keras.preprocessing.image import ImageDataGenerator\n", "import numpy as np\n", "\n", "def image_augmentation(img, save_dir, save_prefix):\n", " datagen = ImageDataGenerator(\n", " rotation_range=180,\n", " horizontal_flip=True,\n", " fill_mode='nearest')\n", " img = np.expand_dims(img, 0)\n", " img = np.expand_dims(img, -1)\n", " \n", " i = 0\n", " for batch in datagen.flow(img, batch_size=1,\n", " save_to_dir=save_dir, save_prefix=save_prefix, save_format='jpg'):\n", " i += 1\n", " if i > 5:\n", " break" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### original image classification" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "number of image segmentation pairs: 5635\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/charlioxumykj/miniconda3/envs/dlnd/lib/python3.6/site-packages/skimage/external/tifffile/tifffile.py:2611: RuntimeWarning: py_decodelzw encountered unexpected end of stream\n", " strip = decompress(strip)\n" ] } ], "source": [ "def fimg_to_fmask(img_path):\n", " # convert an image file path into a corresponding mask file path \n", " dirname, basename = os.path.split(img_path)\n", " maskname = basename.replace(\".tif\", \"_mask.tif\")\n", " return os.path.join(dirname, maskname)\n", "\n", "origin_images_full = [img for img in glob.glob(\"train/*.tif\") if 'mask' not in img]\n", "paired_images_full = [(img, fimg_to_fmask(img)) for img in origin_images_full]\n", "print(\"number of image segmentation pairs: \", len(paired_images_full))\n", "\n", "no_masked = \"data_simple_cnn/test/no_mask/\"\n", "masked = \"data_simple_cnn/test/mask/\"\n", " \n", "count = 1\n", "for raw_img, raw_mask in paired_images_full:\n", " img = io.imread(raw_img)\n", " mask = io.imread(raw_mask)\n", " if np.sum(mask[:,:]) > 0:\n", " io.imsave(masked + str(count) + \"_mask.jpg\", img)\n", " else:\n", " io.imsave(no_masked + str(count) + \".jpg\", img)\n", " count += 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### augment train/mask" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import random\n", "\n", "for count in range((len(imgs_no_mask) - len(imgs_mask))//5):\n", " n = len(imgs_mask)\n", " i = random.randint(1, n-1)\n", " small_img = io.imread(imgs_mask[i])\n", " image_augmentation(small_img, \"data_simple_cnn_full/train/mask/\", \"aug_\"+str(count))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### check image numbers" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "images with no mask: 323227\n", "images with mask: 350497\n", "mask to no mask ratio 1.0843679519347085\n" ] } ], "source": [ "imgs_no_mask = [img for img in glob.glob(\"data_simple_cnn_full/train/no_mask/*\")]\n", "imgs_mask = [img for img in glob.glob(\"data_simple_cnn_full/train/mask/*\")]\n", "\n", "print(\"images with no mask: \", len(imgs_no_mask))\n", "print(\"images with mask: \", len(imgs_mask))\n", "print(\"mask to no mask ratio\", float(len(imgs_mask))/len(imgs_no_mask))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### augment both train/mask and train/no_mask" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import random\n", "\n", "for count in range(300000//5):\n", " i = random.randint(1, len(imgs_mask)-1)\n", " small_img = io.imread(imgs_mask[i])\n", " image_augmentation(small_img, \"data_simple_cnn_full/train/mask/\", \"even_more_aug_\"+str(count))\n", " j = random.randint(1, len(imgs_no_mask)-1)\n", " small_img = io.imread(imgs_no_mask[j])\n", " image_augmentation(small_img, \"data_simple_cnn_full/train/no_mask/\", \"even_more_aug_\"+str(count))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### check image numbers" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "images with no mask: 683136\n", "images with mask: 710408\n", "mask to no mask ratio 1.0399217725313847\n" ] } ], "source": [ "imgs_no_mask = [img for img in glob.glob(\"data_simple_cnn_full/train/no_mask/*\")]\n", "imgs_mask = [img for img in glob.glob(\"data_simple_cnn_full/train/mask/*\")]\n", "\n", "print(\"images with no mask: \", len(imgs_no_mask))\n", "print(\"images with mask: \", len(imgs_mask))\n", "print(\"mask to no mask ratio\", float(len(imgs_mask))/len(imgs_no_mask))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### check validation image numbers" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "images with no mask: 43955\n", "images with mask: 47440\n", "mask to no mask ratio 1.0792856330337846\n" ] } ], "source": [ "imgs_no_mask_val = [img for img in glob.glob(\"data_simple_cnn_full/validation/no_mask/*\")]\n", "imgs_mask_val = [img for img in glob.glob(\"data_simple_cnn_full/validation/mask/*\")]\n", "\n", "print(\"images with no mask: \", len(imgs_no_mask_val))\n", "print(\"images with mask: \", len(imgs_mask_val))\n", "print(\"mask to no mask ratio\", float(len(imgs_mask_val))/len(imgs_no_mask_val))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### augment validation/mask" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "for count in range((len(imgs_no_mask_val) - len(imgs_mask_val))//5):\n", " n = len(imgs_mask_val)\n", " i = random.randint(1, n-1)\n", " small_img = io.imread(imgs_mask_val[i])\n", " image_augmentation(small_img, \"data_simple_cnn_full/validation/mask/\", \"aug_\"+str(count))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### check validation image numbers" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "images with no mask: 43955\n", "images with mask: 47440\n", "mask to no mask ratio 1.0792856330337846\n" ] } ], "source": [ "imgs_no_mask_val = [img for img in glob.glob(\"data_simple_cnn_full/validation/no_mask/*\")]\n", "imgs_mask_val = [img for img in glob.glob(\"data_simple_cnn_full/validation/mask/*\")]\n", "\n", "print(\"images with no mask: \", len(imgs_no_mask_val))\n", "print(\"images with mask: \", len(imgs_mask_val))\n", "print(\"mask to no mask ratio\", float(len(imgs_mask_val))/len(imgs_no_mask_val))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### augment both validation/mask and validation/no_mask" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "for count in range(60000//5):\n", " i = random.randint(1, len(imgs_mask_val)-1)\n", " small_img = io.imread(imgs_mask_val[i])\n", " image_augmentation(small_img, \"data_simple_cnn_full/validation/mask/\", \"more_aug_\"+str(count))\n", " j = random.randint(1, len(imgs_no_mask_val)-1)\n", " small_img = io.imread(imgs_no_mask_val[j])\n", " image_augmentation(small_img, \"data_simple_cnn_full/validation/no_mask/\", \"more_aug_\"+str(count))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### check image numbers" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "images with no mask: 115932\n", "images with mask: 119411\n", "mask to no mask ratio 1.030008970775972\n" ] } ], "source": [ "imgs_no_mask_val = [img for img in glob.glob(\"data_simple_cnn_full/validation/no_mask/*\")]\n", "imgs_mask_val = [img for img in glob.glob(\"data_simple_cnn_full/validation/mask/*\")]\n", "\n", "print(\"images with no mask: \", len(imgs_no_mask_val))\n", "print(\"images with mask: \", len(imgs_mask_val))\n", "print(\"mask to no mask ratio\", float(len(imgs_mask_val))/len(imgs_no_mask_val))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Augment data_fcn and data_fcn_full for FCN model training" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## augment images and corresponding masks if defected" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def defected(mask):\n", " return np.sum(mask[:,:]) >= 400\n", "\n", "train_masks = [ma for ma in glob.glob(\"data_fcn_full/train/masks/masks/*\")]\n", "defected_masks = [defected(io.imread(ma)) for ma in train_masks]\n", "print(\"No. of train masks: \", len(train_masks))\n", "print(\"No. of defected masks: \", sum(defected_masks))\n", "print(\"defected percentage: \", float(sum(defected_masks))/len(train_masks))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def image_augmentation(img, save_dir, save_prefix, seed):\n", " datagen = ImageDataGenerator(rotation_range=180, horizontal_flip=True, fill_mode='nearest')\n", " img = np.expand_dims(img, 0)\n", " img = np.expand_dims(img, -1)\n", " i = 0\n", " for batch in datagen.flow(img, batch_size=1, save_to_dir=save_dir, \n", " save_prefix=save_prefix, save_format='jpg', seed=seed):\n", " i += 1\n", " if i > 5:\n", " break" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "307371\n" ] } ], "source": [ "def fimg_to_fmask(img_path, mask_folder):\n", " dirname, basename = os.path.split(img_path)\n", " maskname = basename.replace(\".jpg\", \"_mask.jpg\")\n", " return os.path.join(mask_folder, maskname)\n", "\n", "img_folder = \"data_fcn_full/train/images/images/\"\n", "mask_folder = \"data_fcn_full/train/masks/masks/\"\n", "\n", "fcn_train_imgs = [img for img in glob.glob(img_folder+\"*\")]\n", "fcn_train_pairs = [(img, fimg_to_fmask(img, mask_folder)) for img in fcn_train_imgs]\n", "\n", "n = len(fcn_train_pairs)\n", "print(n)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### augmentation process" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import random\n", "count = 1\n", "for _ in range(n):\n", " i = random.randint(1, n-1)\n", " seed = random.randint(1, 10000001)\n", " small_mask = io.imread(fcn_train_pairs[i][1])\n", " if defected(small_mask):\n", " image_augmentation(small_mask, mask_folder, \"mask_aug_defect_\"+str(count), seed)\n", " small_img = io.imread(fcn_train_pairs[i][0])\n", " image_augmentation(small_img, img_folder, \"aug_defect_\"+str(count), seed)\n", " count += 1\n", " if count > 1000:\n", " break" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### check numbers" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "train_masks = [ma for ma in glob.glob(\"data_fcn_full/train/masks/masks/*\")]\n", "defected_masks = [defected(io.imread(ma)) for ma in train_masks]\n", "print(\"No. of train masks: \", len(train_masks))\n", "print(\"No. of defected masks: \", sum(defected_masks))\n", "print(\"defected percentage: \", float(sum(defected_masks))/len(train_masks))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### augment validation data" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "19050\n" ] } ], "source": [ "img_folder = \"data_fcn_full/validation/images/images/\"\n", "mask_folder = \"data_fcn_full/validation/masks/masks/\"\n", "\n", "def fimg_to_fmask(img_path, mask_folder):\n", " dirname, basename = os.path.split(img_path)\n", " maskname = basename.replace(\".jpg\", \"_mask.jpg\")\n", " return os.path.join(mask_folder, maskname)\n", "\n", "fcn_val_imgs = [img for img in glob.glob(img_folder+\"*\")]\n", "fcn_val_pairs = [(img, fimg_to_fmask(img, mask_folder)) for img in fcn_val_imgs]\n", "\n", "n = len(fcn_val_pairs)\n", "print(n)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import random\n", "count = 1\n", "\n", "def defected(mask):\n", " return np.sum(mask[:,:]) >= 400\n", "\n", "def image_augmentation(img, save_dir, save_prefix, seed):\n", " datagen = ImageDataGenerator(rotation_range=180, horizontal_flip=True, fill_mode='nearest')\n", " img = np.expand_dims(img, 0)\n", " img = np.expand_dims(img, -1)\n", " i = 0\n", " for batch in datagen.flow(img, batch_size=1, save_to_dir=save_dir, \n", " save_prefix=save_prefix, save_format='jpg', seed=seed):\n", " i += 1\n", " if i > 5:\n", " break\n", "\n", "for _ in range(n):\n", " i = random.randint(1, n-1)\n", " seed = random.randint(1, 10000001)\n", " small_mask = io.imread(fcn_val_pairs[i][1])\n", " if defected(small_mask):\n", " image_augmentation(small_mask, mask_folder, \"mask_aug_defect_\"+str(count), seed)\n", " small_img = io.imread(fcn_val_pairs[i][0])\n", " image_augmentation(small_img, img_folder, \"aug_defect_\"+str(count), seed)\n", " count += 1\n", " if count > 2000:\n", " break" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of validation masks: 32264\n", "No. of defected masks: 13025\n", "defected percentage: 0.40370071906769156\n" ] } ], "source": [ "val_masks = [ma for ma in glob.glob(\"data_fcn_full/validation/masks/masks/*\")]\n", "defected_masks = [defected(io.imread(ma)) for ma in val_masks]\n", "print(\"No. of validation masks: \", len(val_masks))\n", "print(\"No. of defected masks: \", sum(defected_masks))\n", "print(\"defected percentage: \", float(sum(defected_masks))/len(val_masks))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## augment both train/masks and train/no_masks to match fcn parameter numbers" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "307377\n", "('data_fcn_full/train/images/images/aug_defect_2026_0_2482.jpg', 'data_fcn_full/train/masks/masks/mask_aug_defect_2026_0_2482.jpg')\n", "307377\n", "307377\n" ] } ], "source": [ "img_folder = \"data_fcn_full/train/images/images/\"\n", "mask_folder = \"data_fcn_full/train/masks/masks/\"\n", "\n", "\n", "def fimg_to_fmask(img_path, mask_folder):\n", " dirname, basename = os.path.split(img_path)\n", " if basename[0].isalpha():\n", " maskname = \"mask_\"+basename\n", " else:\n", " maskname = basename.replace(\".jpg\", \"_mask.jpg\")\n", " return os.path.join(mask_folder, maskname)\n", "\n", "fcn_train_imgs = [img for img in glob.glob(img_folder+\"*\")]\n", "fcn_train_masks = [mask for mask in glob.glob(mask_folder+\"*\")]\n", "fcn_train_pairs = [(img, fimg_to_fmask(img, mask_folder)) for img in fcn_train_imgs]\n", "\n", "n = len(fcn_train_pairs)\n", "print(n)\n", "print(fcn_train_pairs[3458])\n", "print(len(fcn_train_imgs))\n", "print(len(fcn_train_masks))" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import random\n", "count = 1\n", "for _ in range(n):\n", " i = random.randint(1, n-1)\n", " seed = random.randint(1, 10000001)\n", " small_mask = io.imread(fcn_train_pairs[i][1])\n", " image_augmentation(small_mask, mask_folder, \"mask_aug_defect_\"+str(count), seed)\n", " small_img = io.imread(fcn_train_pairs[i][0])\n", " image_augmentation(small_img, img_folder, \"aug_defect_\"+str(count), seed)\n", " count += 1\n", " if count > 100000:\n", " break" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of train masks: 1506512\n", "No. of defected masks: 709056\n", "defected percentage: 0.47066070499272494\n" ] } ], "source": [ "train_masks = [ma for ma in glob.glob(\"data_fcn_full/train/masks/masks/*\")]\n", "defected_masks = [defected(io.imread(ma)) for ma in train_masks]\n", "print(\"No. of train masks: \", len(train_masks))\n", "print(\"No. of defected masks: \", sum(defected_masks))\n", "print(\"defected percentage: \", float(sum(defected_masks))/len(train_masks))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## augment both validation/masks and validation/no_masks" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "32264\n" ] } ], "source": [ "img_folder = \"data_fcn_full/validation/images/images/\"\n", "mask_folder = \"data_fcn_full/validation/masks/masks/\"\n", "\n", "fcn_val_imgs = [img for img in glob.glob(img_folder+\"*\")]\n", "fcn_val_pairs = [(img, fimg_to_fmask(img, mask_folder)) for img in fcn_val_imgs]\n", "\n", "n = len(fcn_val_pairs)\n", "print(n)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import random\n", "count = 1\n", "for _ in range(n):\n", " i = random.randint(1, n-1)\n", " seed = random.randint(1, 10000001)\n", " small_mask = io.imread(fcn_val_pairs[i][1])\n", " image_augmentation(small_mask, mask_folder, \"mask_aug_defect_\"+str(count), seed)\n", " small_img = io.imread(fcn_val_pairs[i][0])\n", " image_augmentation(small_img, img_folder, \"aug_defect_\"+str(count), seed)\n", " count += 1\n", " if count > 10000:\n", " break" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No. of validation masks: 152184\n", "No. of defected masks: 58921\n", "defected percentage: 0.3871694790516743\n" ] } ], "source": [ "val_masks = [ma for ma in glob.glob(\"data_fcn_full/validation/masks/masks/*\")]\n", "defected_masks = [defected(io.imread(ma)) for ma in val_masks]\n", "print(\"No. of validation masks: \", len(val_masks))\n", "print(\"No. of defected masks: \", sum(defected_masks))\n", "print(\"defected percentage: \", float(sum(defected_masks))/len(val_masks))" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" }, "widgets": { "state": {}, "version": "1.1.0" } }, "nbformat": 4, "nbformat_minor": 2 }