In [1]:
import numpy as np

import caffe

from lib import run_net
from lib import score_util

from datasets.youtube import youtube
from datasets.pascal_voc import pascal

Configure Caffe and load net

In [2]:
caffe.set_device(0)
caffe.set_mode_gpu()

net = caffe.Net('../nets/stage-voc-fcn8s.prototxt',
 '../nets/voc-fcn8s-heavy.caffemodel',
 caffe.TEST)

Dataset details

In [3]:
YT = youtube('/x/youtube/')
PV = pascal('/x/PASCAL/VOC2011')

n_cl = len(YT.classes)
inputs = YT.load_dataset()

Set base clock/subsampling rate

In [4]:
CR = 10 # subsample amount -- we used only every 10 frames for paper

# Oracle per frame

In [5]:
hist_perframe = np.zeros((n_cl, n_cl))
for (class_, vid, shot) in inputs:
 for f in YT.list_label_frames(class_, vid, shot):
 # skip the first 2 frames to align with pipeline
 if f < 2*CR+1:
 continue
 im = YT.load_frame(class_, vid, shot, f)
 
 out = run_net.segrun(net, YT.preprocess(im))
 out_yt = np.zeros(out.shape, dtype=np.uint8)
 for c in YT.classes:
 out_yt[out == PV.classes.index(c)] = YT.classes.index(c)

 label = YT.load_label(class_, vid, shot, f)
 label = YT.make_label(label, class_)
 hist_perframe += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)

acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist_perframe)
print 'Oracle: Per frame'
print 'acc\t\t cl acc\t\t mIU\t\t fwIU'
print '{:f}\t {:f}\t {:f}\t {:f}\t'.format(100*acc, 100*cl_acc, 100*mean_iu, 100*fw_iu)

Oracle: Per frame
acc		 cl acc		 mIU		 fwIU
95.255650	 82.298180	 69.999789	 91.424557	


# Pipeline 2-stage

In [6]:
hist = np.zeros((n_cl, n_cl))
for (class_, vid, shot) in inputs:
 for f in YT.list_label_frames(class_, vid, shot):
 # skip the first 10 frames to align with pipeline
 if f < 2*CR+1:
 continue
 
 # Assume CR frame old is last full run
 im = YT.load_frame(class_, vid, shot, (f-CR))
 _ = run_net.segrun(net, YT.preprocess(im))
 
 # Run current frame through 2stage pipeline
 im = YT.load_frame(class_, vid, shot, f)
 out = run_net.pipeline_2stage_forward(net, YT.preprocess(im))
 out_yt = np.zeros(out.shape, dtype=np.uint8)
 for c in YT.classes:
 out_yt[out == PV.classes.index(c)] = YT.classes.index(c)

 label = YT.load_label(class_, vid, shot, f)
 label = YT.make_label(label, class_)
 hist += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)

acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist)
print 'Pipeline 2-stage on subsample {}:'.format(CR)
print 'acc\t\t cl acc\t\t mIU\t\t fwIU'
print '{:f}\t {:f}\t {:f}\t {:f}\t'.format(100*acc, 100*cl_acc,100*mean_iu, 100*fw_iu)

Pipeline 2-stage on subsample 10:
acc		 cl acc		 mIU		 fwIU
93.925326	 76.476584	 64.002186	 89.178644	


# Pipeline 3-stage

In [7]:
hist = np.zeros((n_cl, n_cl))
for (class_, vid, shot) in inputs:
 for f in YT.list_label_frames(class_, vid, shot):
 # skip the first 10 frames to align with pipeline
 if f < 2*CR+1:
 continue
 
 # Push frames 2*CR and CR old through pipeline
 im = YT.load_frame(class_, vid, shot, (f-2*CR))
 _ = run_net.segrun(net, YT.preprocess(im))
 im = YT.load_frame(class_, vid, shot, f-CR)
 _ = run_net.pipeline_3stage_forward(net, YT.preprocess(im))
 
 # Run current frame through pipeline
 im = YT.load_frame(class_, vid, shot, f)
 out = run_net.pipeline_3stage_forward(net, YT.preprocess(im))
 out_yt = np.zeros(out.shape, dtype=np.uint8)
 for c in YT.classes:
 out_yt[out == PV.classes.index(c)] = YT.classes.index(c)

 label = YT.load_label(class_, vid, shot, f)
 label = YT.make_label(label, class_)
 hist += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)
 
acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist)
print 'Pipeline 3-stage on subsample {}:'.format(CR)
print 'acc\t\t cl acc\t\t mIU\t\t fwIU'
print '{:f}\t {:f}\t {:f}\t {:f}\t'.format(100*acc, 100*cl_acc,100*mean_iu, 100*fw_iu)

Pipeline 3-stage on subsample 10:
acc		 cl acc		 mIU		 fwIU
92.591616	 70.971741	 58.125009	 87.030027	
