## CIFAR 10

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from fastai.conv_learner import *
PATH = Path("/home/ubuntu/data/cifar10/")
os.makedirs(PATH,exist_ok=True)

In [3]:
from torchvision import transforms, datasets

In [4]:
torch.cuda.set_device(0)

In [5]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

In [6]:
bs=256

In [7]:
num_workers = num_cpus()

In [7]:
num_workers = 16

In [32]:
traindir = str(PATH/'train')
valdir = str(PATH/'test')
tfms = [transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]

train_dataset = datasets.ImageFolder(
    traindir,
    transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
    ] + tfms))

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers, pin_memory=True)

val_dataset = datasets.ImageFolder(valdir, transforms.Compose(tfms))

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=bs*2, shuffle=False, num_workers=num_workers, pin_memory=True)

In [33]:
data = ModelData(PATH, train_loader, val_loader)
data.sz=32

In [26]:
class ConvLayer(nn.Module):
    def __init__(self, ni, nf, ks=3, stride=1):
        super().__init__()
        self.conv = nn.Conv2d(ni, nf, kernel_size=ks, bias=False, stride=stride, padding=ks//2)
        self.bn = nn.BatchNorm2d(nf, momentum=0.01)
        self.relu = nn.LeakyReLU(negative_slope=0.1, inplace=True)
        
    def forward(self, x): return self.relu(self.bn(self.conv(x)))

class ResLayer(nn.Module):
    def __init__(self, ni, pr_drop=0):
        super().__init__()
        self.pr_drop = pr_drop
        self.conv1=ConvLayer(ni, ni//2, ks=1)
        self.conv2=ConvLayer(ni//2, ni, ks=3)
        
    def forward(self, x): 
        drop = self.training and (random.random() < self.pr_drop)
        return (0 if drop else self.conv2(self.conv1(x))) + x

class Darknet(nn.Module):
    def make_group_layer(self, ch_in, num_blocks, widen, stride=1, pr_drop=0):
        return [ConvLayer(ch_in,ch_in*widen,stride=stride)
               ] + [(ResLayer(ch_in*widen, pr_drop=pr_drop)) for i in range(num_blocks)]

    def __init__(self, num_blocks, num_classes, start_nf=32, widen=2, pr_drop=0):
        super().__init__()
        nf = start_nf
        layers = [ConvLayer(3, nf, ks=3, stride=1)]
        for i,nb in enumerate(num_blocks):
            layers += self.make_group_layer(nf, nb, widen, stride=2-(i==1), pr_drop=pr_drop )
            nf *= widen; widen=2
        layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
        self.layers = nn.Sequential(*layers)
    
    def do_pr(self, m, pr):
        if hasattr(m,'pr_drop'): m.pr_drop=pr
    def set_pr_drop(self, pr): self.apply(lambda m: self.do_pr(m, pr))
    def forward(self, x): return self.layers(x)

In [34]:
from models.wideresnet import WideResNet

In [35]:
m = WideResNet(depth=22, num_classes=10, widen_factor=6, dropRate=0.)

In [36]:
m = nn.DataParallel(m, [0,1,2,3])

In [37]:
lr = 1.3

In [21]:
# m = Darknet([1, 2, 4, 6, 3], num_classes=10, start_nf=32)#, pr_drop=0.5)
# m = Darknet([1, 2, 2, 2, 2], num_classes=10, start_nf=32)#, pr_drop=0.5)
# m = Darknet([2, 2, 2, 2], num_classes=10, start_nf=64)

In [82]:
# from torch.nn.init import kaiming_normal

# def init_bn_bias(m):
#     if isinstance(m, nn.Conv2d): kaiming_normal(m.weight.data, 0.1)
#     elif isinstance(m, nn.BatchNorm2d): m.bias.data.zero_()
#     elif isinstance(m, nn.Linear): m.bias.data.zero_()

# m.apply(init_bn_bias);

In [38]:
learn = ConvLearner.from_model_data(m, data)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=1e-4

In [39]:
learn.half()

In [157]:
learn.model.set_pr_drop(0)

In [40]:
# DP: m = WideResNet(depth=22, num_classes=10, widen_factor=6, dropRate=0.)
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

                                                            

  x = Variable(T(x), volatile=volatile, requires_grad=requires_grad)


epoch      trn_loss   val_loss   accuracy   
    0      1.238358   1.026308   0.6298    



[1.0263078125, 0.6297999995231628]

In [16]:
%time learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.057594   1.163891   0.6072    
    1      0.791428   0.859953   0.7078                      
    2      0.642778   0.884299   0.7168                      
    3      0.586491   0.85431    0.7149                      
    4      0.530411   0.724244   0.7607                      
    5      0.492245   0.718871   0.764                       
    6      0.465657   0.546274   0.8112                      
    7      0.437823   0.536493   0.8182                      
    8      0.440909   0.69369    0.7729                      
    9      0.408925   1.115436   0.7126                      
    10     0.401172   0.902935   0.733                       
    11     0.397317   0.690258   0.7921                      
    12     0.376588   0.514558   0.8287                      
    13     0.366199   0.442919   0.8527                      
    14     0.345316   0.796473   0.7753                      
    15     0.333985   0.405

[array([0.20742]), 0.9427]

In [15]:
# DP: m = WideResNet(depth=22, num_classes=10, widen_factor=6, dropRate=0.)
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.294204   1.058497   0.6254    



[array([1.0585]), 0.6254]

In [16]:
%time learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.057594   1.163891   0.6072    
    1      0.791428   0.859953   0.7078                      
    2      0.642778   0.884299   0.7168                      
    3      0.586491   0.85431    0.7149                      
    4      0.530411   0.724244   0.7607                      
    5      0.492245   0.718871   0.764                       
    6      0.465657   0.546274   0.8112                      
    7      0.437823   0.536493   0.8182                      
    8      0.440909   0.69369    0.7729                      
    9      0.408925   1.115436   0.7126                      
    10     0.401172   0.902935   0.733                       
    11     0.397317   0.690258   0.7921                      
    12     0.376588   0.514558   0.8287                      
    13     0.366199   0.442919   0.8527                      
    14     0.345316   0.796473   0.7753                      
    15     0.333985   0.405

[array([0.20742]), 0.9427]

In [18]:
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.290646   1.08506    0.6043    



[array([1.08506]), 0.6043]

In [19]:
%time learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(10, 15, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.084963   1.085174   0.6082    
    1      0.825962   1.001847   0.6643                      
    2      0.702493   0.930496   0.6788                      
    3      0.593327   0.76002    0.7503                      
    4      0.543732   0.654882   0.7788                      
    5      0.503807   0.954524   0.7058                      
    6      0.460451   0.520503   0.8216                      
    7      0.429511   0.73571    0.7748                      
    8      0.419969   0.555502   0.8179                      
    9      0.415771   0.593286   0.8055                      
    10     0.40077    0.642513   0.8029                      
    11     0.385437   0.965159   0.7204                      
    12     0.383747   0.691304   0.783                       
    13     0.35558    0.705928   0.774                       
    14     0.355323   0.631229   0.8075                      
    15     0.354718   0.796

[array([0.20191]), 0.9448]

In [22]:
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.272734   1.096014   0.6024    



[array([1.09601]), 0.6024]

In [23]:
%time learn.fit(1., 1, wds=wd, cycle_len=30, use_clr_beta=(10, 25, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.05031    1.058887   0.6364    
    1      0.784812   1.010626   0.6695                      
    2      0.662347   0.962157   0.6972                      
    3      0.575188   0.63956    0.7755                      
    4      0.516394   0.830657   0.7348                      
    5      0.465992   0.802316   0.7552                      
    6      0.437692   0.666769   0.7819                      
    7      0.421664   0.699891   0.7839                      
    8      0.409448   0.716692   0.7687                      
    9      0.378664   0.57033    0.8045                      
    10     0.378573   0.713038   0.7787                      
    11     0.358377   0.843966   0.7419                      
    12     0.328598   0.72601    0.7865                      
    13     0.32008    0.702174   0.7723                      
    14     0.299527   0.478337   0.8429                      
    15     0.292695   0.692

[array([0.22854]), 0.9416]

In [15]:
%time learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(100, 15, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.428835   1.412945   0.5194    
    1      0.995178   2.106482   0.4173                      
    2      0.783325   1.311001   0.6189                      
    3      0.650752   0.948611   0.6966                      
    4      0.572025   0.611907   0.7906                      
    5      0.540067   0.853141   0.7339                      
    6      0.479793   0.552199   0.8167                      
    7      0.457727   0.951997   0.7204                      
    8      0.429621   0.825224   0.7421                      
    9      0.412101   1.17335    0.6826                      
    10     0.4009     0.808628   0.7315                      
    11     0.383221   0.60638    0.804                       
    12     0.36901    0.647075   0.7874                      
    13     0.368189   0.741168   0.7825                      
    14     0.366653   0.653589   0.7973                      
    15     0.355023   0.711

[array([0.20728]), 0.9427]

In [51]:
# darknet 2222 lr 1.3 65 cl
%time learn.fit(lr, 1, wds=wd, cycle_len=65, use_clr_beta=(30, 20, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.533084   1.725181   0.4627    
    1      1.224625   1.241789   0.5727                     
    2      0.995259   1.005162   0.6476                      
    3      0.865179   0.949657   0.6642                      
    4      0.756122   0.854677   0.6968                      
    5      0.692097   1.110497   0.6578                      
    6      0.635014   0.805524   0.7227                      
    7      0.588518   0.759759   0.7334                      
    8      0.567764   0.868924   0.7131                      
    9      0.547826   0.700656   0.7645                      
    10     0.524676   1.005113   0.6889                      
    11     0.50653    0.721323   0.7645                      
    12     0.493718   1.125408   0.6608                      
    13     0.479434   0.756994   0.7639                      
    14     0.475674   0.73913    0.7589                      
    15     0.464452   0.6123

[array([0.23373]), 0.9422]