{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Copyright (c) 2020 ZZH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "from matplotlib import pyplot as plt\n",
    "from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, AveragePooling2D, MaxPool2D\n",
    "from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Flatten, Dense, DepthwiseConv2D\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras import Model\n",
    "import os\n",
    "import numpy as np\n",
    "import math"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gpus = tf.config.experimental.list_physical_devices(device_type='GPU')\n",
    "print(gpus)\n",
    "for gpu in gpus:\n",
    "    tf.config.experimental.set_memory_growth(gpu, True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "epochs = 100\n",
    "lr = 0.1\n",
    "batch_size = 128\n",
    "REGULARIZER  = 0.0001\n",
    "checkpoint_save_path =  './Model/ShuffleNetV2/'\n",
    "log_dir = os.path.join(\"Model\",\"ShuffleNetV2_logs\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#数据导入及数据增强\n",
    "cifar10 = tf.keras.datasets.cifar10\n",
    "(x_train,y_train),(x_test,y_test) = cifar10.load_data()\n",
    "x_train = x_train.astype('float32')\n",
    "x_test = x_test.astype('float32')\n",
    "y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)\n",
    "y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)\n",
    "mean = [125.307, 122.95, 113.865]  #np.mean()\n",
    "std = [62.9932, 62.0887, 66.7048]  #np.std()\n",
    "for i in range(3):\n",
    "    x_train[:,:,:,i] = (x_train[:,:,:,i] - mean[i]) / std[i]\n",
    "    x_test[:,:,:,i] = (x_test[:,:,:,i] - mean[i]) / std[i]\n",
    "\n",
    "DataGenTrain = tf.keras.preprocessing.image.ImageDataGenerator(\n",
    "               rotation_range = 15,\n",
    "               width_shift_range = 0.1,\n",
    "               height_shift_range = 0.1,\n",
    "               horizontal_flip = True,\n",
    "               vertical_flip = False,\n",
    "               shear_range=0.1,\n",
    "               zoom_range = 0.1)\n",
    "DataGenTrain.fit(x_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def scheduler(epoch):  #HTD(-6,3) with WarmingUp\n",
    "    start = -6.0\n",
    "    end = 3.0\n",
    "    if epoch < 5:\n",
    "        return 0.02 * epoch + 0.02\n",
    "    return lr / 2.0 * (1 - math.tanh((end - start) * epoch / epochs + start))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def channel_shuffle(inputs,spilt,H,W):\n",
    "        C = inputs.shape.as_list()[-1]\n",
    "        x = tf.reshape(inputs, [-1, H, W, spilt, C // spilt])\n",
    "        x = tf.transpose(x, [0, 1, 2, 4, 3])\n",
    "        outputs = tf.reshape(x, [-1, H, W, C])\n",
    "        return outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class BNRelu(Model):\n",
    "    def __init__(self):\n",
    "        super(BNRelu,self).__init__()\n",
    "        self.bn = BatchNormalization(momentum=0.9)\n",
    "        self.relu = Activation('relu')\n",
    "    def call(self,inputs):\n",
    "        x = self.bn(inputs)\n",
    "        outputs = self.relu(x)\n",
    "        return outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SEBlock(Model):\n",
    "    def __init__(self,channels):\n",
    "        super(SEBlock,self).__init__()\n",
    "        self.channels = channels\n",
    "        self.p1 = GlobalAveragePooling2D()\n",
    "        self.d1 = Dense(channels//16,activation='relu',kernel_initializer=\"he_normal\",use_bias=False,\n",
    "                        kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "        self.d2 = Dense(channels,activation='sigmoid',kernel_initializer=\"he_normal\",use_bias=False,\n",
    "                        kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "        self.m1 = tf.keras.layers.Multiply()\n",
    "    def call(self,inputs):\n",
    "        x = self.p1(inputs)\n",
    "        x = self.d1(x)\n",
    "        y = self.d2(x)\n",
    "        y = tf.reshape(y, [-1,1,1,self.channels])\n",
    "        outputs = self.m1([inputs,y])\n",
    "        return outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ShuffleBlock(Model):\n",
    "    def __init__(self,channels,strides,H,W):\n",
    "        super(ShuffleBlock,self).__init__()\n",
    "        self.channels = channels\n",
    "        self.strides = strides\n",
    "        self.H = H\n",
    "        self.W = W\n",
    "        self.c1 = Conv2D(filters=channels//2, kernel_size=1, strides=1, padding='same',use_bias=False,\n",
    "                         kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "        self.b1 = BNRelu()\n",
    "        self.c2 = DepthwiseConv2D(kernel_size=3, strides=strides, padding='same', use_bias=False,\n",
    "                                  depthwise_initializer=\"he_normal\",depthwise_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "        self.b2 = BatchNormalization(momentum=0.9)\n",
    "        self.c3 = Conv2D(filters=channels//2, kernel_size=1, strides=1, padding='same', use_bias=False,\n",
    "                         kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "        self.b3 = BNRelu()\n",
    "#         self.se = SEBlock(channels=channels)\n",
    "        if self.strides != 1:\n",
    "            self.c4 = DepthwiseConv2D(kernel_size=3, strides=strides, padding='same', use_bias=False,\n",
    "                                      depthwise_initializer=\"he_normal\",depthwise_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "            self.b4 = BNRelu()\n",
    "            self.c5 = Conv2D(filters=channels//2, kernel_size=1, strides=1, padding='same',use_bias=False,\n",
    "                         kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "            self.b5 = BNRelu()\n",
    "\n",
    "    def call(self,inputs):\n",
    "        if self.strides == 1:\n",
    "            x1,x2 = tf.split(inputs,2,axis=3)\n",
    "        else:\n",
    "            x1 = inputs\n",
    "            x2 = inputs\n",
    "        x = self.c1(x1)\n",
    "        x = self.b1(x)\n",
    "        x = self.c2(x)\n",
    "        x = self.b2(x)\n",
    "        x = self.c3(x)\n",
    "        x = self.b3(x)\n",
    "#         x = self.se(x)\n",
    "        if self.strides != 1:\n",
    "            x2 = self.c4(x2)\n",
    "            x2 = self.b4(x2)\n",
    "            x2 = self.c5(x2)\n",
    "            x2 = self.b5(x2)\n",
    "        y = tf.concat([x,x2],axis=-1)\n",
    "        outputs = channel_shuffle(y,spilt=2,H=self.H,W=self.W)\n",
    "        return outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ShuffleNet(Model):\n",
    "    def __init__(self,block_list):\n",
    "        super(ShuffleNet,self).__init__()\n",
    "        self.channels = 116\n",
    "        self.H = 32\n",
    "        self.W = 32\n",
    "        self.c1 = Conv2D(filters=24, kernel_size=3, strides=1, padding='same', use_bias=False, input_shape=(128, 32, 32, 3),\n",
    "                        kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "        self.b1 = BNRelu()\n",
    "        self.blocks = Sequential()\n",
    "        for i in range(len(block_list)):\n",
    "            self.blocks.add(ShuffleBlock(channels=self.channels,strides=2,H=self.H,W=self.W))\n",
    "            self.H = self.H // 2\n",
    "            self.W = self.W // 2\n",
    "            for _ in range(1,block_list[i]):\n",
    "                self.blocks.add(ShuffleBlock(channels=self.channels,strides=1,H=self.H,W=self.W))\n",
    "            self.channels *= 2\n",
    "        self.c2 = Conv2D(filters=1024, kernel_size=1, strides=1, padding='same', use_bias=False,\n",
    "                        kernel_initializer=\"he_normal\",kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "        self.p1 = GlobalAveragePooling2D()\n",
    "        self.f1 = Dense(10,activation='softmax',kernel_initializer=\"he_normal\",\n",
    "                        kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))\n",
    "    def call(self,inputs):\n",
    "        x = self.c1(inputs)\n",
    "        x = self.b1(x)\n",
    "        x = self.blocks(x)\n",
    "        x = self.c2(x)\n",
    "        x = self.p1(x)\n",
    "        y = self.f1(x)\n",
    "        return y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "model = ShuffleNet([4,8,4])\n",
    "\n",
    "model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9, nesterov=True, clipnorm=2.),\n",
    "              loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n",
    "              metrics=['accuracy'])\n",
    "\n",
    "callbacks = [\n",
    "            tf.keras.callbacks.LearningRateScheduler(scheduler),  #学习率衰减表\n",
    "            #tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, min_lr=0.0001, patience=10, cooldown=0)\n",
    "            tf.keras.callbacks.ModelCheckpoint(     #模型保存\n",
    "                filepath = checkpoint_save_path,\n",
    "                save_weights_only = False,\n",
    "                monitor = 'val_accuracy',\n",
    "                save_best_only = True),\n",
    "#             tf.keras.callbacks.EarlyStopping(       #早停\n",
    "#                 monitor = 'val_accuracy',\n",
    "#                 patience=15, \n",
    "#                 baseline=None),\n",
    "            tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=False)  #保存计算图\n",
    "]\n",
    "\n",
    "hist = model.fit(DataGenTrain.flow(x_train,y_train,batch_size=batch_size,shuffle=True),\n",
    "                 epochs=epochs,\n",
    "                 validation_data=(x_test,y_test),\n",
    "                 validation_freq=1,\n",
    "                 callbacks=callbacks)\n",
    "\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#结果可视化\n",
    "%matplotlib inline\n",
    "%config InlineBackend.figure_format = 'svg'\n",
    "plt.style.use({'figure.figsize':(6,4)})\n",
    "\n",
    "plt.plot(hist.history['loss'], label='loss')\n",
    "plt.plot(hist.history['val_loss'], label='val_loss')\n",
    "plt.legend()\n",
    "plt.show()\n",
    "plt.plot(hist.history['val_accuracy'], label='val_accuracy')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#tensorboard可视化\n",
    "#!tensorboard --logdir=./Model/ShuffleNetV2_logs\n",
    "#http://localhost:6006/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('best result: {:.2f}%  ({}epochs)'.format(100*max(hist.history['val_accuracy']),1+hist.history['val_accuracy'].index(max(hist.history['val_accuracy']))))\n",
    "# best result : 92.41%  (97epochs)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}