{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<center><div style=\"direction:rtl;font-family:B Lotus, B Nazanin, Tahoma\">به نام خدا</div></center>\n",
    "<img src=\"./logo.png\" alt=\"class.vision\" style=\"width: 200px;\"/>\n",
    "<h1><center><div style=\"direction:rtl;font-family:B Lotus, B Nazanin, Tahoma\">تخمین قیمت ارزهای دیجیتال </div></center></h1>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "## <div style=\"direction:rtl;text-align:right;font-family:B Lotus, B Nazanin, Tahoma\">مجموعه داده</div>\n",
    "\n",
    "<div style=\"direction:rtl;text-align:right;font-family:Tahoma\">مجموعه داده را می‌توانید از مسیر زیر دانلود کنید</div>\n",
    "\n",
    "http://dataset.class.vision/rnn/crypto_data.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         time        low       high       open      close      volume\n",
      "0  1528968660  96.580002  96.589996  96.589996  96.580002    9.647200\n",
      "1  1528968720  96.449997  96.669998  96.589996  96.660004  314.387024\n",
      "2  1528968780  96.470001  96.570000  96.570000  96.570000   77.129799\n",
      "3  1528968840  96.449997  96.570000  96.570000  96.500000    7.216067\n",
      "4  1528968900  96.279999  96.540001  96.500000  96.389999  524.539978\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv(\"crypto_data/LTC-USD.csv\", names=['time', 'low', 'high', 'open', 'close', 'volume'])\n",
    "\n",
    "print(df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BTC-USD\n",
      "LTC-USD\n",
      "BCH-USD\n",
      "ETH-USD\n",
      "            BTC-USD_close  BTC-USD_volume  LTC-USD_close  LTC-USD_volume  \\\n",
      "time                                                                       \n",
      "1528968720    6487.379883        7.706374      96.660004      314.387024   \n",
      "1528968780    6479.410156        3.088252      96.570000       77.129799   \n",
      "1528968840    6479.410156        1.404100      96.500000        7.216067   \n",
      "1528968900    6479.979980        0.753000      96.389999      524.539978   \n",
      "1528968960    6480.000000        1.490900      96.519997       16.991997   \n",
      "\n",
      "            BCH-USD_close  BCH-USD_volume  ETH-USD_close  ETH-USD_volume  \n",
      "time                                                                      \n",
      "1528968720     870.859985       26.856577      486.01001       26.019083  \n",
      "1528968780     870.099976        1.124300      486.00000        8.449400  \n",
      "1528968840     870.789978        1.749862      485.75000       26.994646  \n",
      "1528968900     870.000000        1.680500      486.00000       77.355759  \n",
      "1528968960     869.989990        1.669014      486.00000        7.503300  \n"
     ]
    }
   ],
   "source": [
    "main_df = pd.DataFrame() # begin empty\n",
    "\n",
    "ratios = [\"BTC-USD\", \"LTC-USD\", \"BCH-USD\", \"ETH-USD\"]  # the 4 ratios we want to consider\n",
    "for ratio in ratios:  # begin iteration\n",
    "    print(ratio)\n",
    "    dataset = f'crypto_data/{ratio}.csv'  # get the full path to the file.\n",
    "    df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume'])  # read in specific file\n",
    "\n",
    "    # rename volume and close to include the ticker so we can still which close/volume is which:\n",
    "    df.rename(columns={\"close\": f\"{ratio}_close\", \"volume\": f\"{ratio}_volume\"}, inplace=True)\n",
    "\n",
    "    df.set_index(\"time\", inplace=True)  # set time as index so we can join them on this shared time\n",
    "    df = df[[f\"{ratio}_close\", f\"{ratio}_volume\"]]  # ignore the other columns besides price and volume\n",
    "\n",
    "    if len(main_df)==0:  # if the dataframe is empty\n",
    "        main_df = df  # then it's just the current df\n",
    "    else:  # otherwise, join this data to the main one\n",
    "        main_df = main_df.join(df)\n",
    "\n",
    "main_df.fillna(method=\"ffill\", inplace=True)  # if there are gaps in data, use previously known values\n",
    "main_df.dropna(inplace=True)\n",
    "print(main_df.head())  # how did we do??"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "SEQ_LEN = 60  # how long of a preceeding sequence to collect for RNN\n",
    "FUTURE_PERIOD_PREDICT = 3  # how far into the future are we trying to predict?\n",
    "RATIO_TO_PREDICT = \"LTC-USD\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>BTC-USD_close</th>\n",
       "      <th>BTC-USD_volume</th>\n",
       "      <th>LTC-USD_close</th>\n",
       "      <th>LTC-USD_volume</th>\n",
       "      <th>BCH-USD_close</th>\n",
       "      <th>BCH-USD_volume</th>\n",
       "      <th>ETH-USD_close</th>\n",
       "      <th>ETH-USD_volume</th>\n",
       "      <th>future</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1528968720</th>\n",
       "      <td>6487.379883</td>\n",
       "      <td>7.706374</td>\n",
       "      <td>96.660004</td>\n",
       "      <td>314.387024</td>\n",
       "      <td>870.859985</td>\n",
       "      <td>26.856577</td>\n",
       "      <td>486.01001</td>\n",
       "      <td>26.019083</td>\n",
       "      <td>96.389999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1528968780</th>\n",
       "      <td>6479.410156</td>\n",
       "      <td>3.088252</td>\n",
       "      <td>96.570000</td>\n",
       "      <td>77.129799</td>\n",
       "      <td>870.099976</td>\n",
       "      <td>1.124300</td>\n",
       "      <td>486.00000</td>\n",
       "      <td>8.449400</td>\n",
       "      <td>96.519997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1528968840</th>\n",
       "      <td>6479.410156</td>\n",
       "      <td>1.404100</td>\n",
       "      <td>96.500000</td>\n",
       "      <td>7.216067</td>\n",
       "      <td>870.789978</td>\n",
       "      <td>1.749862</td>\n",
       "      <td>485.75000</td>\n",
       "      <td>26.994646</td>\n",
       "      <td>96.440002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1528968900</th>\n",
       "      <td>6479.979980</td>\n",
       "      <td>0.753000</td>\n",
       "      <td>96.389999</td>\n",
       "      <td>524.539978</td>\n",
       "      <td>870.000000</td>\n",
       "      <td>1.680500</td>\n",
       "      <td>486.00000</td>\n",
       "      <td>77.355759</td>\n",
       "      <td>96.470001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1528968960</th>\n",
       "      <td>6480.000000</td>\n",
       "      <td>1.490900</td>\n",
       "      <td>96.519997</td>\n",
       "      <td>16.991997</td>\n",
       "      <td>869.989990</td>\n",
       "      <td>1.669014</td>\n",
       "      <td>486.00000</td>\n",
       "      <td>7.503300</td>\n",
       "      <td>96.400002</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            BTC-USD_close  BTC-USD_volume  LTC-USD_close  LTC-USD_volume  \\\n",
       "time                                                                       \n",
       "1528968720    6487.379883        7.706374      96.660004      314.387024   \n",
       "1528968780    6479.410156        3.088252      96.570000       77.129799   \n",
       "1528968840    6479.410156        1.404100      96.500000        7.216067   \n",
       "1528968900    6479.979980        0.753000      96.389999      524.539978   \n",
       "1528968960    6480.000000        1.490900      96.519997       16.991997   \n",
       "\n",
       "            BCH-USD_close  BCH-USD_volume  ETH-USD_close  ETH-USD_volume  \\\n",
       "time                                                                       \n",
       "1528968720     870.859985       26.856577      486.01001       26.019083   \n",
       "1528968780     870.099976        1.124300      486.00000        8.449400   \n",
       "1528968840     870.789978        1.749862      485.75000       26.994646   \n",
       "1528968900     870.000000        1.680500      486.00000       77.355759   \n",
       "1528968960     869.989990        1.669014      486.00000        7.503300   \n",
       "\n",
       "               future  \n",
       "time                   \n",
       "1528968720  96.389999  \n",
       "1528968780  96.519997  \n",
       "1528968840  96.440002  \n",
       "1528968900  96.470001  \n",
       "1528968960  96.400002  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "main_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def classify(current, future):\n",
    "    if float(future) > float(current):\n",
    "        return 1\n",
    "    else:\n",
    "        return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}_close'], main_df['future']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>BTC-USD_close</th>\n",
       "      <th>BTC-USD_volume</th>\n",
       "      <th>LTC-USD_close</th>\n",
       "      <th>LTC-USD_volume</th>\n",
       "      <th>BCH-USD_close</th>\n",
       "      <th>BCH-USD_volume</th>\n",
       "      <th>ETH-USD_close</th>\n",
       "      <th>ETH-USD_volume</th>\n",
       "      <th>future</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>time</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1528968720</th>\n",
       "      <td>6487.379883</td>\n",
       "      <td>7.706374</td>\n",
       "      <td>96.660004</td>\n",
       "      <td>314.387024</td>\n",
       "      <td>870.859985</td>\n",
       "      <td>26.856577</td>\n",
       "      <td>486.01001</td>\n",
       "      <td>26.019083</td>\n",
       "      <td>96.389999</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1528968780</th>\n",
       "      <td>6479.410156</td>\n",
       "      <td>3.088252</td>\n",
       "      <td>96.570000</td>\n",
       "      <td>77.129799</td>\n",
       "      <td>870.099976</td>\n",
       "      <td>1.124300</td>\n",
       "      <td>486.00000</td>\n",
       "      <td>8.449400</td>\n",
       "      <td>96.519997</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1528968840</th>\n",
       "      <td>6479.410156</td>\n",
       "      <td>1.404100</td>\n",
       "      <td>96.500000</td>\n",
       "      <td>7.216067</td>\n",
       "      <td>870.789978</td>\n",
       "      <td>1.749862</td>\n",
       "      <td>485.75000</td>\n",
       "      <td>26.994646</td>\n",
       "      <td>96.440002</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1528968900</th>\n",
       "      <td>6479.979980</td>\n",
       "      <td>0.753000</td>\n",
       "      <td>96.389999</td>\n",
       "      <td>524.539978</td>\n",
       "      <td>870.000000</td>\n",
       "      <td>1.680500</td>\n",
       "      <td>486.00000</td>\n",
       "      <td>77.355759</td>\n",
       "      <td>96.470001</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1528968960</th>\n",
       "      <td>6480.000000</td>\n",
       "      <td>1.490900</td>\n",
       "      <td>96.519997</td>\n",
       "      <td>16.991997</td>\n",
       "      <td>869.989990</td>\n",
       "      <td>1.669014</td>\n",
       "      <td>486.00000</td>\n",
       "      <td>7.503300</td>\n",
       "      <td>96.400002</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            BTC-USD_close  BTC-USD_volume  LTC-USD_close  LTC-USD_volume  \\\n",
       "time                                                                       \n",
       "1528968720    6487.379883        7.706374      96.660004      314.387024   \n",
       "1528968780    6479.410156        3.088252      96.570000       77.129799   \n",
       "1528968840    6479.410156        1.404100      96.500000        7.216067   \n",
       "1528968900    6479.979980        0.753000      96.389999      524.539978   \n",
       "1528968960    6480.000000        1.490900      96.519997       16.991997   \n",
       "\n",
       "            BCH-USD_close  BCH-USD_volume  ETH-USD_close  ETH-USD_volume  \\\n",
       "time                                                                       \n",
       "1528968720     870.859985       26.856577      486.01001       26.019083   \n",
       "1528968780     870.099976        1.124300      486.00000        8.449400   \n",
       "1528968840     870.789978        1.749862      485.75000       26.994646   \n",
       "1528968900     870.000000        1.680500      486.00000       77.355759   \n",
       "1528968960     869.989990        1.669014      486.00000        7.503300   \n",
       "\n",
       "               future  target  \n",
       "time                           \n",
       "1528968720  96.389999       0  \n",
       "1528968780  96.519997       0  \n",
       "1528968840  96.440002       0  \n",
       "1528968900  96.470001       1  \n",
       "1528968960  96.400002       0  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "main_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <div style=\"direction:rtl;text-align:right;font-family:B Lotus, B Nazanin, Tahoma\">جدا کردن دیتای آموزش و ارزیابی</div>\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "times = sorted(main_df.index.values)  # get the times\n",
    "last_5pct = sorted(main_df.index.values)[-int(0.05*len(times))]  # get the last 5% of the times\n",
    "\n",
    "validation_main_df = main_df[(main_df.index >= last_5pct)]  # make the validation data where the index is in the last 5%\n",
    "main_df = main_df[(main_df.index < last_5pct)]  # now the main_df is all the data up to the last 5%"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Next, we need to **balance** and **normalize** this data. \n",
    "\n",
    "By **balance**, we want to make sure the classes have equal amounts when training, so our model doesn't just always predict one class.\n",
    "\n",
    "One way to counteract this is to use class weights, which allows you to weight loss higher for lesser-frequent classifications. That said, I've never personally seen this really be comparable to a real balanced dataset.\n",
    "\n",
    "We also need to take our data and make sequences from it.\n",
    "\n",
    "So...we've got some work to do! We'll start by making a function that will process the dataframes, so we can just do something like:\n",
    "\n",
    "    train_x, train_y = preprocess_df(main_df) \n",
    "    validation_x, validation_y = preprocess_df(validation_main_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's start by **removing the future column** (the actual target is called literally target and only needed the future column temporarily to create it). \n",
    "\n",
    "Then, we need to **scale our data**:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import preprocessing  # pip install sklearn ... if you don't have it!\n",
    "\n",
    "def preprocess_df(df):\n",
    "    df = df.drop(\"future\", 1)  # don't need this anymore.\n",
    "\n",
    "    for col in df.columns:  # go through all of the columns\n",
    "        if col != \"target\":  # normalize all ... except for the target itself!\n",
    "            df[col] = df[col].pct_change()  # pct change \"normalizes\" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)\n",
    "            df.dropna(inplace=True)  # remove the nas created by pct_change\n",
    "            df[col] = preprocessing.scale(df[col].values)  # scale between 0 and 1.\n",
    "\n",
    "    df.dropna(inplace=True)  # cleanup again... jic. Those nasty NaNs love to creep in."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Alright, we've normalized and scaled the data! \n",
    "\n",
    "Next up, **we need to create our actual sequences**. \n",
    "\n",
    "To do this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from collections import deque\n",
    "import random\n",
    "\n",
    "\n",
    "sequential_data = []  # this is a list that will CONTAIN the sequences\n",
    "prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in\n",
    "\n",
    "for i in df.values:  # iterate over the values\n",
    "    prev_days.append([n for n in i[:-1]])  # store all but the target\n",
    "    if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!\n",
    "        sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!\n",
    "\n",
    "random.shuffle(sequential_data)  # shuffle for good measure."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <div style=\"direction:rtl;text-align:right;font-family:B Lotus, B Nazanin, Tahoma\">منبع:</div>\n",
    "\n",
    "https://becominghuman.ai/recurrent-neural-networks-rnn-deep-learning-w-python-tensorflow-keras-p-7-c21bc374d4dc"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div class=\"alert alert-block alert-info\">\n",
    "<div style=\"direction:rtl;text-align:right;font-family:B Lotus, B Nazanin, Tahoma\"> دوره پیشرفته یادگیری عمیق<br>علیرضا اخوان پور<br>  آبان و آذر 1399<br>\n",
    "</div>\n",
    "<a href=\"http://class.vision\">Class.Vision</a> - <a href=\"http://AkhavanPour.ir\">AkhavanPour.ir</a> - <a href=\"https://github.com/Alireza-Akhavan/\">GitHub</a>\n",
    "\n",
    "</div>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tf2-GPU",
   "language": "python",
   "name": "tf2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}