In [1]:
import torch
import torchvision

torch_model = torch.hub.load('pytorch/vision:v0.6.0', 'mobilenet_v2', pretrained=True)
torch_model.eval();

Using cache found in /Users/sseibert/.cache/torch/hub/pytorch_vision_v0.6.0


In [2]:
from torchvision import transforms
full_model = torch.nn.Sequential(
 torch_model,
 torch.nn.Softmax(dim=0),
)

In [3]:
# Trace with random data
example_input = torch.rand(1, 3, 224, 224)
traced_model = torch.jit.trace(full_model, example_input)

In [4]:
# Download class labels (from a separate file)
import urllib
label_url = 'https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt'
class_labels = urllib.request.urlopen(label_url).read().decode("utf-8").splitlines()

class_labels = class_labels[1:] # remove the first class which is background
assert len(class_labels) == 1000

In [5]:
import coremltools as ct
# Convert to Core ML using the Unified Conversion API
model = ct.convert(
 traced_model,
 inputs=[ct.ImageType(name="image", shape=example_input.shape,
 color_layout="RGB",
 bias=[-1, -1, -1], 
 scale=1/127.0)],
 classifier_config = ct.ClassifierConfig(class_labels) # provide only if step 4 was performed
)

Converting Frontend ==> MIL Ops: 100%|█████████▉| 386/387 [00:00<00:00, 1501.18 ops/s]
Running MIL optimization passes: 100%|██████████| 17/17 [00:00<00:00, 129.66 passes/s]
Translating MIL ==> MLModel Ops: 100%|██████████| 708/708 [00:00<00:00, 1953.60 ops/s]


In [6]:
# Save model
model.save("MobileNet_v2.mlmodel")

In [7]:
import PIL
import numpy as np

def load_image(path, resize_to=None):
 # resize_to: (Width, Height)
 img = PIL.Image.open(path)
 if resize_to is not None:
 img = img.resize(resize_to, PIL.Image.ANTIALIAS)
 img_np = np.array(img).astype(np.float32)
 return img_np, img

# Translated PyTorch Model

In [8]:
_, img = load_image('Aalto_table.jpeg', resize_to=(224,224))
out = model.predict({'image': img})
out['classLabel'], sorted(out['649'].items(), key=lambda x: -x[1])[:10]

('tench',
 [('Afghan hound', 1.0),
 ('African chameleon', 1.0),
 ('African crocodile', 1.0),
 ('African elephant', 1.0),
 ('African grey', 1.0),
 ('African hunting dog', 1.0),
 ('Airedale', 1.0),
 ('American Staffordshire terrier', 1.0),
 ('American alligator', 1.0),
 ('American black bear', 1.0)])

# Reference Model

MobileNet.mlmodel taken from https://github.com/sivu22/CoreMLCompare/tree/master/CoreMLCompare

In [9]:
ref = ct.models.MLModel('MobileNet.mlmodel')

In [10]:
_, img = load_image('Aalto_table.jpeg', resize_to=(224,224))
ref_out = ref.predict({'image': img})
ref_out['classLabel'], sorted(ref_out['classLabelProbs'].items(), key=lambda x: -x[1])[:10]

('bannister, banister, balustrade, balusters, handrail',
 [('bannister, banister, balustrade, balusters, handrail', 0.7715724110603333),
 ('shoji', 0.054597772657871246),
 ('four-poster', 0.029684294015169144),
 ('parallel bars, bars', 0.028104523196816444),
 ('dining table, board', 0.024609141051769257),
 ('sliding door', 0.01916561648249626),
 ('fire screen, fireguard', 0.01576521061360836),
 ('studio couch, day bed', 0.00406468752771616),
 ('pedestal, plinth, footstall', 0.004033055622130632),
 ('prayer rug, prayer mat', 0.003056224901229143)])