{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Copyright (c) 2020 ZZH" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "from matplotlib import pyplot as plt\n", "from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, AveragePooling2D, MaxPool2D\n", "from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Flatten, Dense, DepthwiseConv2D\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras import Model\n", "import os\n", "import numpy as np\n", "import math" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "gpus = tf.config.experimental.list_physical_devices(device_type='GPU')\n", "print(gpus)\n", "for gpu in gpus:\n", " tf.config.experimental.set_memory_growth(gpu, True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "epochs = 100\n", "lr = 0.1\n", "batch_size = 128\n", "REGULARIZER = 0.0001\n", "checkpoint_save_path = './Model/ShuffleNetV2/'\n", "log_dir = os.path.join(\"Model\",\"ShuffleNetV2_logs\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#数据导入及数据增强\n", "cifar10 = tf.keras.datasets.cifar10\n", "(x_train,y_train),(x_test,y_test) = cifar10.load_data()\n", "x_train = x_train.astype('float32')\n", "x_test = x_test.astype('float32')\n", "y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)\n", "y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)\n", "mean = [125.307, 122.95, 113.865] #np.mean()\n", "std = [62.9932, 62.0887, 66.7048] #np.std()\n", "for i in range(3):\n", " x_train[:,:,:,i] = (x_train[:,:,:,i] - mean[i]) / std[i]\n", " x_test[:,:,:,i] = (x_test[:,:,:,i] - mean[i]) / std[i]\n", "\n", "DataGenTrain = tf.keras.preprocessing.image.ImageDataGenerator(\n", " rotation_range = 15,\n", " width_shift_range = 0.1,\n", " height_shift_range = 0.1,\n", " horizontal_flip = True,\n", " vertical_flip = False,\n", " shear_range=0.1,\n", " zoom_range = 0.1)\n", "DataGenTrain.fit(x_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def scheduler(epoch): #HTD(-6,3) with WarmingUp\n", " start = -6.0\n", " end = 3.0\n", " if epoch < 5:\n", " return 0.02 * epoch + 0.02\n", " return lr / 2.0 * (1 - math.tanh((end - start) * epoch / epochs + start))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def channel_shuffle(inputs,spilt,H,W):\n", " C = inputs.shape.as_list()[-1]\n", " x = tf.reshape(inputs, [-1, H, W, spilt, C // spilt])\n", " x = tf.transpose(x, [0, 1, 2, 4, 3])\n", " outputs = tf.reshape(x, [-1, H, W, C])\n", " return outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class BNRelu(Model):\n", " def __init__(self):\n", " super(BNRelu,self).__init__()\n", " self.bn = BatchNormalization(momentum=0.9)\n", " self.relu = Activation('relu')\n", " def call(self,inputs):\n", " x = self.bn(inputs)\n", " outputs = self.relu(x)\n", " return outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class SEBlock(Model):\n", " def __init__(self,channels):\n", " super(SEBlock,self).__init__()\n", " self.channels = channels\n", " self.p1 = GlobalAveragePooling2D()\n", " self.d1 = Dense(channels//16,activation='relu',kernel_initializer=\"he_normal\",use_bias=False,\n", " kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " self.d2 = Dense(channels,activation='sigmoid',kernel_initializer=\"he_normal\",use_bias=False,\n", " kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " self.m1 = tf.keras.layers.Multiply()\n", " def call(self,inputs):\n", " x = self.p1(inputs)\n", " x = self.d1(x)\n", " y = self.d2(x)\n", " y = tf.reshape(y, [-1,1,1,self.channels])\n", " outputs = self.m1([inputs,y])\n", " return outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class ShuffleBlock(Model):\n", " def __init__(self,channels,strides,H,W):\n", " super(ShuffleBlock,self).__init__()\n", " self.channels = channels\n", " self.strides = strides\n", " self.H = H\n", " self.W = W\n", " self.c1 = Conv2D(filters=channels//2, kernel_size=1, strides=1, padding='same',use_bias=False,\n", " kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " self.b1 = BNRelu()\n", " self.c2 = DepthwiseConv2D(kernel_size=3, strides=strides, padding='same', use_bias=False,\n", " depthwise_initializer=\"he_normal\",depthwise_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " self.b2 = BatchNormalization(momentum=0.9)\n", " self.c3 = Conv2D(filters=channels//2, kernel_size=1, strides=1, padding='same', use_bias=False,\n", " kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " self.b3 = BNRelu()\n", "# self.se = SEBlock(channels=channels)\n", " if self.strides != 1:\n", " self.c4 = DepthwiseConv2D(kernel_size=3, strides=strides, padding='same', use_bias=False,\n", " depthwise_initializer=\"he_normal\",depthwise_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " self.b4 = BNRelu()\n", " self.c5 = Conv2D(filters=channels//2, kernel_size=1, strides=1, padding='same',use_bias=False,\n", " kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " self.b5 = BNRelu()\n", "\n", " def call(self,inputs):\n", " if self.strides == 1:\n", " x1,x2 = tf.split(inputs,2,axis=3)\n", " else:\n", " x1 = inputs\n", " x2 = inputs\n", " x = self.c1(x1)\n", " x = self.b1(x)\n", " x = self.c2(x)\n", " x = self.b2(x)\n", " x = self.c3(x)\n", " x = self.b3(x)\n", "# x = self.se(x)\n", " if self.strides != 1:\n", " x2 = self.c4(x2)\n", " x2 = self.b4(x2)\n", " x2 = self.c5(x2)\n", " x2 = self.b5(x2)\n", " y = tf.concat([x,x2],axis=-1)\n", " outputs = channel_shuffle(y,spilt=2,H=self.H,W=self.W)\n", " return outputs" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class ShuffleNet(Model):\n", " def __init__(self,block_list):\n", " super(ShuffleNet,self).__init__()\n", " self.channels = 116\n", " self.H = 32\n", " self.W = 32\n", " self.c1 = Conv2D(filters=24, kernel_size=3, strides=1, padding='same', use_bias=False, input_shape=(128, 32, 32, 3),\n", " kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " self.b1 = BNRelu()\n", " self.blocks = Sequential()\n", " for i in range(len(block_list)):\n", " self.blocks.add(ShuffleBlock(channels=self.channels,strides=2,H=self.H,W=self.W))\n", " self.H = self.H // 2\n", " self.W = self.W // 2\n", " for _ in range(1,block_list[i]):\n", " self.blocks.add(ShuffleBlock(channels=self.channels,strides=1,H=self.H,W=self.W))\n", " self.channels *= 2\n", " self.c2 = Conv2D(filters=1024, kernel_size=1, strides=1, padding='same', use_bias=False,\n", " kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " self.p1 = GlobalAveragePooling2D()\n", " self.f1 = Dense(10,activation='softmax',kernel_initializer=\"he_normal\",\n", " kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n", " def call(self,inputs):\n", " x = self.c1(inputs)\n", " x = self.b1(x)\n", " x = self.blocks(x)\n", " x = self.c2(x)\n", " x = self.p1(x)\n", " y = self.f1(x)\n", " return y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "model = ShuffleNet([4,8,4])\n", "\n", "model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9, nesterov=True, clipnorm=2.),\n", " loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n", " metrics=['accuracy'])\n", "\n", "callbacks = [\n", " tf.keras.callbacks.LearningRateScheduler(scheduler), #学习率衰减表\n", " #tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, min_lr=0.0001, patience=10, cooldown=0)\n", " tf.keras.callbacks.ModelCheckpoint( #模型保存\n", " filepath = checkpoint_save_path,\n", " save_weights_only = False,\n", " monitor = 'val_accuracy',\n", " save_best_only = True),\n", "# tf.keras.callbacks.EarlyStopping( #早停\n", "# monitor = 'val_accuracy',\n", "# patience=15, \n", "# baseline=None),\n", " tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=False) #保存计算图\n", "]\n", "\n", "hist = model.fit(DataGenTrain.flow(x_train,y_train,batch_size=batch_size,shuffle=True),\n", " epochs=epochs,\n", " validation_data=(x_test,y_test),\n", " validation_freq=1,\n", " callbacks=callbacks)\n", "\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#结果可视化\n", "%matplotlib inline\n", "%config InlineBackend.figure_format = 'svg'\n", "plt.style.use({'figure.figsize':(6,4)})\n", "\n", "plt.plot(hist.history['loss'], label='loss')\n", "plt.plot(hist.history['val_loss'], label='val_loss')\n", "plt.legend()\n", "plt.show()\n", "plt.plot(hist.history['val_accuracy'], label='val_accuracy')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#tensorboard可视化\n", "#!tensorboard --logdir=./Model/ShuffleNetV2_logs\n", "#http://localhost:6006/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('best result: {:.2f}% ({}epochs)'.format(100*max(hist.history['val_accuracy']),1+hist.history['val_accuracy'].index(max(hist.history['val_accuracy']))))\n", "# best result : 92.41% (97epochs)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }