# Teachable machine

**Create a deep model for your own task (webcam/other set of images)**

* **Set Data Pipeline** - DataSet, DataLoader, Transforms
* **Build Neural Network** - Network Module, Loss
* **Train Model** - Optimizer, Babysitting Learning
* **Transfer Learning** - Feature extractor, Fine-tuning

"Teach a machine using your camera" - [Experiment][teachable-experiment] / [YouTube Presentation][teachable-youtube]


 


[teachable-youtube]:https://youtu.be/3BhkeY974Rg
[teachable-experiment]:https://teachablemachine.withgoogle.com/

## Import libraries

In [None]:
import torch
print("Torch version:", torch.__version__)

import torchvision
print("Torchvision version:", torchvision.__version__)

import numpy as np
print("Numpy version:", np.__version__)

import matplotlib
print("Matplotlib version:", matplotlib.__version__)

import PIL
print("PIL version:", PIL.__version__)

import IPython
print("IPython version:", IPython.__version__)

import cv2
print('OpenCV version:', cv2.__version__)

In [None]:
# Setup Matplotlib
%matplotlib inline
#%config InlineBackend.figure_format = 'retina' # If you have a retina screen
import matplotlib.pyplot as plt

## Create Data Set

In [None]:
from IPython import display
import os, time

# Path to write images
img_path = os.path.join('images/normal')
prefix = 'session1'

# Connect to webcam
if 'webcam' not in locals() or webcam is None:
 webcam = cv2.VideoCapture(0)

try:
 # Try to read from the webcam
 webcam_found, _ = webcam.read()

 if webcam_found:
 # How many photos to save
 n_images = int(input("Number of photos: "))

 # Create figure to display webcam
 fig = plt.figure()
 axis = fig.gca()
 
 # Collect images
 live_in = 3
 image_taken = 0

 while image_taken < n_images:
 # Take a picture with the webcam
 _, image = webcam.read()

 # Process it
 image = cv2.resize(image, (250, 250)) # Reduce size
 image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # To RGB

 # Plot it
 axis.cla()
 axis.imshow(image_rgb)

 if live_in == 0:
 # We are live!
 image_taken += 1
 axis.set_title('Click ! ({}/{})'.format(image_taken, n_images))

 # Save the image
 path = os.path.join(img_path, '{}-{}.png'.format(prefix, image_taken))
 cv2.imwrite(path, image)

 # Time before taking the next picture
 sleep_time = 0.2

 else:
 # We are not live
 axis.set_title("We're live in .. {}".format(live_in))
 sleep_time = 1
 live_in -= 1

 display.clear_output(wait=True)
 display.display(fig)

 # Sleep
 time.sleep(sleep_time)
 
 # Clear output
 display.clear_output()

 else:
 print('Cannot read from webcam, do you have one connected?')
 
except KeyboardInterrupt:
 # Clear output
 display.clear_output()
 
finally: 
 # Disconnect webcam
 del(webcam)

## Set Data Pipeline

In [None]:
from torchvision import transforms

# Data transformations
normalize = transforms.Normalize(
 mean=[0.485, 0.456, 0.406], # values for PyTorch models
 std=[0.229, 0.224, 0.225]
)
train_transform = transforms.Compose([
 transforms.RandomCrop(224),
 transforms.ToTensor(),
 normalize
])
valid_transform = transforms.Compose([
 transforms.Resize((224, 224)),
 transforms.ToTensor(),
 normalize
])

In [None]:
# Create data set
trainset = torchvision.datasets.ImageFolder('images', train_transform)
validset = torchvision.datasets.ImageFolder('images', valid_transform)

classes = trainset.classes
n_classes = len(classes)
print('Classes:', classes)

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler

# Define train/validation sets
n_images = len(trainset) # number of images in our data set
idx = np.arange(n_images) # idx: 0 .. (n_images - 1)
np.random.shuffle(idx) # shuffle

# Create train/validation samplers
valid_size = 100
train_sampler = SubsetRandomSampler(idx[:-valid_size])
valid_sampler = SubsetRandomSampler(idx[-valid_size:])

print('Train set:', len(train_sampler))
print('Validation set:', len(valid_sampler))

In [None]:
# Create data loaders
train_loader = torch.utils.data.DataLoader(trainset, batch_size=4, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(validset, batch_size=4, sampler=valid_sampler)

In [None]:
# Plot a few samples
train_iter = iter(train_loader)
images, labels = next(train_iter)

print('Classes:', ', '.join(classes[i] for i in labels))
grid = torchvision.utils.make_grid(images, normalize=True)
plt.imshow(grid.numpy().transpose((1, 2, 0)))
plt.show()

## Build Neural Network - Transfer learning

**Can we reuse what's been learned on other tasks?** - Source [cs231n][cs231-transfer]

> In practice, very few people train an entire Convolutional Network from scratch (with random initialization), because it is relatively rare to have a dataset of sufficient size. Instead, it is common to pretrain a ConvNet on a very large dataset (e.g. ImageNet, which contains 1.2 million images with 1000 categories), and then use the ConvNet either as an initialization or a fixed feature extractor for the task of interest.

**Transfer Learning Scenarios**

* Pretrained network as a **Feature Extractor**
* Adjust weights - **Fine-tuning**

[cs231-transfer]:http://cs231n.github.io/transfer-learning/

In [None]:
def resnet_freezed():
 # Pretrained Network
 model = torchvision.models.resnet18(pretrained=True)

 # Freeze parameters
 for param in model.parameters():
 param.requires_grad = False

 # Classification layer
 model.fc = torch.nn.Linear(model.fc.in_features, len(classes))
 
 return model

resnet_freezed()

## Train Model

In [None]:
from collections import defaultdict

# Create model
model = resnet_freezed()

# Criterion and optimizer for "training"
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr=0.01)

# Backprop step
def compute_loss(output, target):
 y_tensor = torch.LongTensor(target)
 y_variable = torch.autograd.Variable(y_tensor)
 return criterion(output, y_variable)

def backpropagation(output, target):
 optimizer.zero_grad() # Clear the gradients
 loss = compute_loss(output, target) # Compute loss
 loss.backward() # Backpropagation
 optimizer.step() # Let the optimizer adjust our model
 return loss.data

# Helper function
def get_accuracy(output, y):
 predictions = torch.argmax(output, dim=1) # Max activation
 is_correct = np.equal(predictions, y)
 return is_correct.numpy().mean()
 
# Create a figure to visualize the results
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(8, 3))

def plot_learning():
 # Plot what the network learned
 fig.suptitle('Epoch {}, batch {:,}/{:,}'.format(epoch, batch, len(train_loader)))
 ax1.cla()
 ax2.cla()
 
 # Set titles
 if len(stats['val_t']) > 0:
 ax1.set_title('Loss, val: {:.3f}'.format(np.mean(stats['val_loss'][-10:])))
 ax2.set_title('Accuracy, val: {:.3f}'.format(np.mean(stats['val_acc'][-10:])))
 else:
 ax1.set_title('Loss')
 ax2.set_title('Accuracy')
 
 ax1.plot(stats['train_t'], stats['train_loss'], label='train')
 ax1.plot(stats['val_t'], stats['val_loss'], label='valid')
 ax1.legend()
 ax2.plot(stats['train_t'], stats['train_acc'], label='train')
 ax2.plot(stats['val_t'], stats['val_acc'], label='valid')
 ax2.set_ylim(0, 1)
 ax2.legend()
 
 # Jupyter trick
 IPython.display.clear_output(wait=True)
 IPython.display.display(fig)
 
# Collect loss / accuracy values
stats = defaultdict(list)
t = 0 # Number of samples seen
print_step = 10 # Refresh rate

# Train Network
epoch = 1
do_training = True

while do_training:
 # Set Model in "training" mode
 model.train()
 
 # Train by small batches of data
 for batch, (batch_X, batch_y) in enumerate(train_loader, 1):
 # Forward pass & backpropagation
 output = model(batch_X)
 loss = backpropagation(output, batch_y)

 # Log "train" stats
 stats['train_loss'].append(loss)
 stats['train_acc'].append(get_accuracy(output, batch_y))
 stats['train_t'].append(t)

 if t%print_step == 0:
 # Plot learning
 plot_learning()

 # Update t
 t += train_loader.batch_size
 
 # Set model in "validation" mode
 model.eval()

 # Log "validation" stats
 loss_vals, acc_vals = [], []
 for X, y in valid_loader:
 output = model(X)
 loss_vals.append(compute_loss(output, y).data)
 acc_vals.append(get_accuracy(output, y))

 stats['val_loss'].append(np.mean(loss_vals))
 stats['val_acc'].append(np.mean(acc_vals))
 stats['val_t'].append(t)
 
 # Plot learning
 plot_learning()

 # Should we continue?
 do_training = int(input('Continue training? 1 (yes) or 0 (no): '))
 epoch += 1
 
# Clear output
IPython.display.clear_output(wait=True)

## Fine-tuning - Smaller learning rate

In [None]:
def resnet():
 # Pretrained model
 model = torchvision.models.resnet18(pretrained=True)

 # Classification layer
 model.fc = torch.nn.Linear(model.fc.in_features, len(classes))

 return model

## Save Model

In [None]:
# Save the model and class names
state = {
 'model': model,
 'classes': classes
}
torch.save(state, os.path.join('data', 'webcam-model.p'))

---

# Live Test!

In [None]:
# Load libraries
import torch
print("Torch version:", torch.__version__)

import torchvision
print("Torchvision version:", torchvision.__version__)

import matplotlib
print("Matplotlib version:", matplotlib.__version__)

import numpy as np
print("Numpy version:", np.__version__)

import cv2
print('OpenCV version:', cv2.__version__)

In [None]:
# Setup Matplotlib
%matplotlib inline
#%config InlineBackend.figure_format = 'retina' # If you have a retina screen
import matplotlib.pyplot as plt

In [None]:
import os

# Load Model
state = torch.load(os.path.join('data', 'webcam-model.p'))
model = state['model']
classes = state['classes']
print('Classes:', classes)

In [None]:
from torchvision import transforms

# Define image transformation
image_transform = transforms.Compose([
 transforms.ToPILImage(), # Convert webcam images to PIL format
 transforms.Resize((224, 224)), # Resize
 transforms.ToTensor(),
 transforms.Normalize(
 mean=[0.485, 0.456, 0.406], # values for PyTorch models
 std=[0.229, 0.224, 0.225]
 )
])

In [None]:
# We will need some tools from PyTorch
from torch.autograd import Variable
import torch.nn as nn

# Tools to display webcam feed
from IPython import display
import time

# Connect to webcam
if 'webcam' not in locals() or webcam is None:
 webcam = cv2.VideoCapture(0)

try:
 # Try to read from the webcam
 webcam_found, _ = webcam.read()

 if webcam_found:
 # Create figure
 fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(6, 2))

 # Set network in "evaluation" mode
 model.eval()

 for i in range(100):
 # Take a picture with the webcam
 _, image = webcam.read()

 # Process it
 image = cv2.resize(image, (250, 250)) # Reduce size
 image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # To RGB
 image_pytorch = image_transform(image_rgb)

 # Classify image
 output = model(Variable(image_pytorch[None, :]))
 probs = nn.functional.softmax(output, 1).data.numpy()[0]

 # Plot the image
 ax1.cla()
 ax1.barh(np.arange(len(classes)), probs, height=0.5, tick_label=classes)
 ax1.set_xlim(0, 1)
 ax2.cla()
 ax2.imshow(image_rgb, aspect='auto')

 # Jupyter trick
 display.clear_output(wait=True)
 display.display(fig)

 # Rest a bit for CPU
 time.sleep(0.2)

 # Clear output
 display.clear_output()

 else:
 print('Cannot read from webcam, do you have one connected?')

except KeyboardInterrupt:
 # Clear output
 display.clear_output()
 
finally: 
 # Disconnect webcam
 del(webcam)