# Download data

In [None]:
!curl https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip --output dataset.zip

!unzip dataset.zip

## Split training/validation/testing subsets

In [None]:
!mkdir -p train/Cat
!mkdir -p train/Dog
!mkdir -p validation/Cat
!mkdir -p validation/Dog
!mkdir -p test/Cat
!mkdir -p test/Dog

Then, using Keras image preprocessing library, we load the images, resize them into 200px per 200px files then save the resulting in the folders. We take a total of 1000 image per class: 600 for training, 200 for validation and 200 for testing.

In [None]:
# importing libraries
from tensorflow.keras.utils import load_img, save_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from os import listdir
from tensorflow import keras

# load dogs vs cats dataset, reshape into 200px x 200px image files
classes = ['Cat','Dog']
photos, labels = list(), list()
files_per_class = 1000
for classe in classes:
 i = 0
 # enumerate files in the directory
 for file in listdir('PetImages/'+classe):
 if file.endswith(".jpg"):
 # determine class
 output = 0.0
 if classe == 'Dog':
 output = 1.0
 # load image
 print(file)
 photo = load_img('PetImages/'+classe +'/' + file, target_size=(200, 200))
 if i < 600:
 save_img('train/'+classe+'/'+file, photo)
 elif i < 800:
 save_img('validation/'+classe+'/'+file, photo)
 else:
 save_img('test/'+classe+'/'+file, photo)
 i = i + 1
 if i == files_per_class:
 break

# Training & evaluating the model

In [None]:
def get_accuracy_for_batch_size(opt, batch_size):
 print("Evaluating batch size " + str(batch_size))
 # prepare iterators
 datagen = ImageDataGenerator(rescale=1.0/255.0)
 train_it = datagen.flow_from_directory(directory='train/',
 class_mode='binary', batch_size=batch_size, target_size=(200, 200))
 val_it = datagen.flow_from_directory(directory='validation/',
 class_mode='binary', batch_size=batch_size, target_size=(200, 200))
 test_it = datagen.flow_from_directory('test/',
 class_mode='binary', batch_size=batch_size, target_size=(200, 200))
 
 # create model
 model = keras.Sequential()
 model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(200, 200, 3)))
 model.add(keras.layers.MaxPooling2D((2, 2)))
 model.add(keras.layers.Flatten())
 model.add(keras.layers.Dense(128, activation='relu', kernel_initializer='he_uniform'))
 model.add(keras.layers.Dense(1, activation='sigmoid'))

 # compile model
 model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

 # train the model
 model.fit(train_it,
 validation_data = train_it,
 steps_per_epoch = train_it.n//train_it.batch_size,
 validation_steps = val_it.n//val_it.batch_size,
 epochs=5, verbose=0)
 
 # evaluate model
 _, acc = model.evaluate(test_it, steps=len(test_it), verbose=0)
 return acc

## Comparing optimizers: SGD vs Adam

In [None]:
batch_size_array = [16,32,64,128]
accuracies_sgd = []
optimizer = "sgd"
for bs in batch_size_array:
 accuracies_sgd.append(get_accuracy_for_batch_size(optimizer, bs))

accuracies_adam = []
optimizer = "adam"
for bs in batch_size_array:
 accuracies_adam.append(get_accuracy_for_batch_size(optimizer, bs))

In [None]:
import matplotlib.pyplot as plt
plt.plot(batch_size_array, accuracies_sgd, color='green',label='SGD')
plt.plot(batch_size_array, accuracies_adam, color='red',label='Adam')
plt.show()

## Effect of data augmentation

In [None]:
!mkdir -p train_augmented/Cat
!mkdir -p train_augmented/Dog
!mkdir -p validation_augmented/Cat
!mkdir -p validation_augmented/Dog

Image preprocessing

In [None]:
from tensorflow import image
# load dogs vs cats dataset, reshape into 200x200 files
classes = ['Cat','Dog']
photos, labels = list(), list()
files_per_class = 1000
for classe in classes:
 i = 0
 # enumerate files in the directory
 for file in listdir('PetImages/'+classe):
 if file.endswith(".jpg"):
 # determine class
 output = 0.0
 if classe == 'Dog':
 output = 1.0
 # load image
 photo = load_img('PetImages/'+classe +'/' + file, target_size=(200, 200))
 photo_resized = photo.resize((250,250))
 photo_cropped = photo_resized.crop((0,0, 200, 200))
 if i < 600:
 save_img('train_augmented/'+classe+'/'+file, photo)
 save_img('train_augmented/'+classe+'/augmented_'+file, photo_cropped)
 elif i < 800:
 save_img('validation_augmented/'+classe+'/'+file, photo)
 save_img('validation_augmented/'+classe+'/augmented_'+file, photo_cropped)
 else:
 save_img('test/'+classe+'/'+file, photo)
 i = i + 1
 if i == files_per_class:
 break

In [None]:
def get_accuracy_for_batch_size_augmented_data(train_folder,validation_folder, batch_size):
 print("Evaluating batch size " + str(batch_size))
 # prepare iterators
 datagen = ImageDataGenerator(rescale=1.0/255.0)
 train_it = datagen.flow_from_directory(directory=train_folder,
 class_mode='binary', batch_size=batch_size, target_size=(200, 200))
 val_it = datagen.flow_from_directory(directory=validation_folder,
 class_mode='binary', batch_size=batch_size, target_size=(200, 200))
 test_it = datagen.flow_from_directory('test/',
 class_mode='binary', batch_size=batch_size, target_size=(200, 200))
 
 # create model
 model = keras.Sequential()
 model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(200, 200, 3)))
 model.add(keras.layers.MaxPooling2D((2, 2)))
 model.add(keras.layers.Flatten())
 model.add(keras.layers.Dense(128, activation='relu', kernel_initializer='he_uniform'))
 model.add(keras.layers.Dense(1, activation='sigmoid'))

 # compile model
 model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])

 # train the model
 model.fit(train_it,
 validation_data = train_it,
 steps_per_epoch = train_it.n//train_it.batch_size,
 validation_steps = val_it.n//val_it.batch_size,
 epochs=5, verbose=0)
 
 # evaluate model
 _, acc = model.evaluate(test_it, steps=len(test_it), verbose=0)
 return acc

## Evaluate the accuracy for different batch sizes

In [None]:
batch_size_array = [16,32,64,128,256]
accuracies_standard_data = []
for bs in batch_size_array:
 accuracies_standard_data.append(get_accuracy_for_batch_size_augmented_data("train/","validation/", bs))

accuracies_augmented_data = []
for bs in batch_size_array:
 accuracies_augmented_data.append(get_accuracy_for_batch_size_augmented_data("train_augmented/","validation_augmented/", bs))

In [None]:
import matplotlib.pyplot as plt
plt.plot(batch_size_array, accuracies_standard_data, color='green',label='Standard data')
plt.plot(batch_size_array, accuracies_augmented_data, color='red',label='Augmented data')
plt.legend(bbox_to_anchor =(1.25, 0.8))
plt.show()