{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<center><div style=\"direction:rtl;font-family:B Lotus, B Nazanin, Tahoma\">به نام خدا</div></center>\n",
    "<img src=\"./logo.png\" alt=\"class.vision\" style=\"width: 200px;\"/>\n",
    "<h1><center><div style=\"direction:rtl;font-family:B Lotus, B Nazanin, Tahoma\">طبقه بندی ویدیو با شبکه‌های بازگشتی - استخراج ویژگی</div></center></h1>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <div style=\"direction:rtl;text-align:right;font-family:B Lotus, B Nazanin, Tahoma\">مجموعه داده</div>\n",
    "\n",
    "\n",
    "<div style=\"direction:rtl;text-align:right;font-family:Tahoma\">\n",
    "قبلا  6 کلاس از دیتاست  UCF-101 را به عنوان نمونه انتخاب و فریم‌های ویدیوهای متعلق به این 6 کلاس از این مجموعه داده را استخراج کرده ایم و اطلاعات هر ویدیو نظیر اسم - کلاس و تعداد فریم را در یک فایل متنی قرار داده ایم.\n",
    "<br/>\n",
    "    \n",
    "این 6 کلاس که برای این آموزش آماده شده است را از اینجا دانلود کنید: \n",
    "</div>\n",
    "\n",
    "http://dataset.class.vision/rnn/RNN-Video-6action.zip\n",
    "\n",
    "<br/>\n",
    "<div style=\"direction:rtl;text-align:right;font-family:Tahoma\">\n",
    "    همچنین\n",
    "    دیتاست اصلی شامل 101 کلاس مختلف را می‌توانید از لینک زیر دانلود کنید:\n",
    "</div>\n",
    "\n",
    "<strong>UCF-101</strong>\n",
    "[https://www.crcv.ucf.edu/data/UCF101.php](https://www.crcv.ucf.edu/data/UCF101.php)\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from keras.preprocessing import image\n",
    "from keras.applications.inception_v3 import InceptionV3, preprocess_input\n",
    "from keras.models import Model, load_model\n",
    "from keras.layers import Input\n",
    "import numpy as np\n",
    "import os.path\n",
    "from tqdm import tqdm\n",
    "import csv\n",
    "import random\n",
    "import glob\n",
    "import os.path\n",
    "import sys\n",
    "import operator\n",
    "import threading\n",
    "from keras.utils import to_categorical\n",
    "from keras.preprocessing.image import img_to_array, load_img"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "seq_length= 40\n",
    "max_frames = 300\n",
    "image_shape=(224, 224, 3)\n",
    "base_path = \"D:/dataset/RNN-Video\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(os.path.join('D:/dataset/RNN-Video/data_file_5class.csv'), 'r') as fin:\n",
    "    reader = csv.reader(fin)\n",
    "    data = list(reader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CricketBowling',\n",
       " 'CricketShot',\n",
       " 'FieldHockeyPenalty',\n",
       " 'HandstandPushups',\n",
       " 'HandstandWalking',\n",
       " 'SoccerPenalty']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_path = os.path.join(base_path, 'train')\n",
    "classes =os.listdir(train_path)\n",
    "classes = sorted(classes)\n",
    "classes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div style=\"direction:rtl;text-align:right;font-family:Tahoma\">\n",
    "    در اینجا آن ویدیوهایی که حداقل 40 فریم و حداکثر 300 فریم دارند را لود می‌کنیم.\n",
    "</div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_clean = []\n",
    "for item in data:\n",
    "    if int(item[3]) >= seq_length and int(item[3]) <= max_frames:\n",
    "        data_clean.append(item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "439"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(data_clean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_n_sample_from_video(sample, seq_length):\n",
    "    path = os.path.join(base_path, sample[0], sample[1])\n",
    "    filename = sample[2]\n",
    "    images = sorted(glob.glob(os.path.join(path, filename + '*jpg')))\n",
    "\n",
    "    #Given a list and a size, return a rescaled/samples list. For example,\n",
    "    #if we want a list of size 5 and we have a list of size 25, return a new\n",
    "    #list of size five which is every 5th element of the origina list.\n",
    "    # Get the number to skip between iterations.\n",
    "    skip = len(images) // seq_length\n",
    "\n",
    "    # Build our new output.\n",
    "    output = [images[i] for i in range(0, len(images), skip)]\n",
    "\n",
    "    # Cut off the last one if needed.\n",
    "    return output[:seq_length]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['train', 'HandstandWalking', 'v_HandstandWalking_g24_c06', '151']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_clean[3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "40"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(get_n_sample_from_video(data_clean[3], 40))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get model with pretrained weights.\n",
    "base_model = InceptionV3(weights='imagenet', include_top=True)\n",
    "\n",
    "# We'll extract features at the final pool layer.\n",
    "model = Model(inputs=base_model.input,\n",
    "        outputs=base_model.get_layer('avg_pool').output)\n",
    "\n",
    "def model_predict(image_path):\n",
    "    img = image.load_img(image_path, target_size=(299, 299))\n",
    "    x = image.img_to_array(img)\n",
    "    x = np.expand_dims(x, axis=0)\n",
    "    x = preprocess_input(x)\n",
    "\n",
    "    # Get the prediction.\n",
    "    features = model.predict(x)\n",
    "    return features[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████| 439/439 [18:31<00:00,  2.85s/it]\n"
     ]
    }
   ],
   "source": [
    "os.makedirs('sequences', exist_ok=True)\n",
    "for video in tqdm(data_clean):\n",
    "\n",
    "    # Get the path to the sequence for this video.\n",
    "    path = os.path.join('sequences', video[2] + '-' + str(seq_length) + \\\n",
    "        '-features')  # numpy will auto-append .npy\n",
    "\n",
    "    # Check if we already have it.\n",
    "    if os.path.isfile(path + '.npy'):\n",
    "        continue\n",
    "\n",
    "    # Get the frames for this video.\n",
    "    frames = get_n_sample_from_video(video, seq_length)\n",
    "\n",
    "    # Now loop through and extract features to build the sequence.\n",
    "    sequence = []\n",
    "    for frame in frames:\n",
    "        features = model_predict(frame)\n",
    "        sequence.append(features)\n",
    "\n",
    "    # Save the sequence.\n",
    "    np.save(path, sequence)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-info\">\n",
    "<div style=\"direction:rtl;text-align:right;font-family:B Lotus, B Nazanin, Tahoma\"> دوره پیشرفته یادگیری عمیق<br>علیرضا اخوان پور<br>  آبان و آذر 1399<br>\n",
    "</div>\n",
    "<a href=\"http://class.vision\">Class.Vision</a> - <a href=\"http://AkhavanPour.ir\">AkhavanPour.ir</a> - <a href=\"https://github.com/Alireza-Akhavan/\">GitHub</a>\n",
    "\n",
    "</div>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tensorflow",
   "language": "python",
   "name": "tensorflow"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}