{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "\n", "import caffe\n", "\n", "from lib import run_net\n", "from lib import score_util\n", "\n", "from datasets.youtube import youtube\n", "from datasets.pascal_voc import pascal" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Configure Caffe and load net" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "caffe.set_device(0)\n", "caffe.set_mode_gpu()\n", "\n", "net = caffe.Net('../nets/stage-voc-fcn8s.prototxt',\n", " '../nets/voc-fcn8s-heavy.caffemodel',\n", " caffe.TEST)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Dataset details" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "YT = youtube('/x/youtube/')\n", "PV = pascal('/x/PASCAL/VOC2011')\n", "\n", "n_cl = len(YT.classes)\n", "inputs = YT.load_dataset()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set base clock/subsampling rate" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "CR = 10 # subsample amount -- we used only every 10 frames for paper" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Oracle per frame" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Oracle: Per frame\n", "acc\t\t cl acc\t\t mIU\t\t fwIU\n", "95.255650\t 82.298180\t 69.999789\t 91.424557\t\n" ] } ], "source": [ "hist_perframe = np.zeros((n_cl, n_cl))\n", "for (class_, vid, shot) in inputs:\n", " for f in YT.list_label_frames(class_, vid, shot):\n", " # skip the first 2 frames to align with pipeline\n", " if f < 2*CR+1:\n", " continue\n", " im = YT.load_frame(class_, vid, shot, f)\n", " \n", " out = run_net.segrun(net, YT.preprocess(im))\n", " out_yt = np.zeros(out.shape, dtype=np.uint8)\n", " for c in YT.classes:\n", " out_yt[out == PV.classes.index(c)] = YT.classes.index(c)\n", "\n", " label = YT.load_label(class_, vid, shot, f)\n", " label = YT.make_label(label, class_)\n", " hist_perframe += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)\n", "\n", "acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist_perframe)\n", "print 'Oracle: Per frame'\n", "print 'acc\\t\\t cl acc\\t\\t mIU\\t\\t fwIU'\n", "print '{:f}\\t {:f}\\t {:f}\\t {:f}\\t'.format(100*acc, 100*cl_acc, 100*mean_iu, 100*fw_iu)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Pipeline 2-stage" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pipeline 2-stage on subsample 10:\n", "acc\t\t cl acc\t\t mIU\t\t fwIU\n", "93.925326\t 76.476584\t 64.002186\t 89.178644\t\n" ] } ], "source": [ "hist = np.zeros((n_cl, n_cl))\n", "for (class_, vid, shot) in inputs:\n", " for f in YT.list_label_frames(class_, vid, shot):\n", " # skip the first 10 frames to align with pipeline\n", " if f < 2*CR+1:\n", " continue\n", " \n", " # Assume CR frame old is last full run\n", " im = YT.load_frame(class_, vid, shot, (f-CR))\n", " _ = run_net.segrun(net, YT.preprocess(im))\n", " \n", " # Run current frame through 2stage pipeline\n", " im = YT.load_frame(class_, vid, shot, f)\n", " out = run_net.pipeline_2stage_forward(net, YT.preprocess(im))\n", " out_yt = np.zeros(out.shape, dtype=np.uint8)\n", " for c in YT.classes:\n", " out_yt[out == PV.classes.index(c)] = YT.classes.index(c)\n", "\n", " label = YT.load_label(class_, vid, shot, f)\n", " label = YT.make_label(label, class_)\n", " hist += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)\n", "\n", "acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist)\n", "print 'Pipeline 2-stage on subsample {}:'.format(CR)\n", "print 'acc\\t\\t cl acc\\t\\t mIU\\t\\t fwIU'\n", "print '{:f}\\t {:f}\\t {:f}\\t {:f}\\t'.format(100*acc, 100*cl_acc,100*mean_iu, 100*fw_iu)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Pipeline 3-stage" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pipeline 3-stage on subsample 10:\n", "acc\t\t cl acc\t\t mIU\t\t fwIU\n", "92.591616\t 70.971741\t 58.125009\t 87.030027\t\n" ] } ], "source": [ "hist = np.zeros((n_cl, n_cl))\n", "for (class_, vid, shot) in inputs:\n", " for f in YT.list_label_frames(class_, vid, shot):\n", " # skip the first 10 frames to align with pipeline\n", " if f < 2*CR+1:\n", " continue\n", " \n", " # Push frames 2*CR and CR old through pipeline\n", " im = YT.load_frame(class_, vid, shot, (f-2*CR))\n", " _ = run_net.segrun(net, YT.preprocess(im))\n", " im = YT.load_frame(class_, vid, shot, f-CR)\n", " _ = run_net.pipeline_3stage_forward(net, YT.preprocess(im))\n", " \n", " # Run current frame through pipeline\n", " im = YT.load_frame(class_, vid, shot, f)\n", " out = run_net.pipeline_3stage_forward(net, YT.preprocess(im))\n", " out_yt = np.zeros(out.shape, dtype=np.uint8)\n", " for c in YT.classes:\n", " out_yt[out == PV.classes.index(c)] = YT.classes.index(c)\n", "\n", " label = YT.load_label(class_, vid, shot, f)\n", " label = YT.make_label(label, class_)\n", " hist += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)\n", " \n", "acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist)\n", "print 'Pipeline 3-stage on subsample {}:'.format(CR)\n", "print 'acc\\t\\t cl acc\\t\\t mIU\\t\\t fwIU'\n", "print '{:f}\\t {:f}\\t {:f}\\t {:f}\\t'.format(100*acc, 100*cl_acc,100*mean_iu, 100*fw_iu)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11+" } }, "nbformat": 4, "nbformat_minor": 0 }