{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tracking memory leaks / memory fragmentation\n",
    "\n",
    "This notebook is for testing that the peak memory consumption is efficient and doesn't necessarily require more GPU RAM than needed.\n",
    "\n",
    "The detection comes from reading the output of [IPyExperimentsPytorch](https://github.com/stas00/ipyexperiments/) per-cell reports and `fastai.callbacks.mem.PeakMemMetric` metric."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastai.vision import *\n",
    "from fastai.utils.mem import *\n",
    "from fastai.callbacks.mem import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#! pip install ipyexperiments\n",
    "from ipyexperiments import IPyExperimentsPytorch\n",
    "from ipyexperiments.utils.mem import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
    "assert str(device) == 'cuda:0', f\"we want GPU, got {device}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import Markdown, display\n",
    "def alert(string, color='red'):\n",
    "    display(Markdown(f\"<span style='color:{color}'>**{string}**</span>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prep dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "*** Experiment started with the Pytorch backend\n",
      "Device: ID 0, GeForce GTX 1070 Ti (8119 RAM)\n",
      "\n",
      "\n",
      "*** Current state:\n",
      "RAM:  Used  Free  Total      Util\n",
      "CPU:  2275 18518  31588 MB   7.20% \n",
      "GPU:   503  7616   8119 MB   6.19% \n",
      "\n",
      "\n",
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.000\n",
      "･ CPU:         0       0     2275 MB |\n",
      "･ GPU:         0       0      503 MB |\n"
     ]
    }
   ],
   "source": [
    "exp1 = IPyExperimentsPytorch()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.003\n",
      "･ CPU:         0       1     2277 MB |\n",
      "･ GPU:         0       0      503 MB |\n"
     ]
    }
   ],
   "source": [
    "path = untar_data(URLs.MNIST)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ImageDataBunch;\n",
       "\n",
       "Train: LabelList\n",
       "y: CategoryList (60000 items)\n",
       "[Category 4, Category 4, Category 4, Category 4, Category 4]...\n",
       "Path: /home/stas/.fastai/data/mnist_png\n",
       "x: ImageItemList (60000 items)\n",
       "[Image (1, 28, 28), Image (1, 28, 28), Image (1, 28, 28), Image (1, 28, 28), Image (1, 28, 28)]...\n",
       "Path: /home/stas/.fastai/data/mnist_png;\n",
       "\n",
       "Valid: LabelList\n",
       "y: CategoryList (10000 items)\n",
       "[Category 4, Category 4, Category 4, Category 4, Category 4]...\n",
       "Path: /home/stas/.fastai/data/mnist_png\n",
       "x: ImageItemList (10000 items)\n",
       "[Image (1, 28, 28), Image (1, 28, 28), Image (1, 28, 28), Image (1, 28, 28), Image (1, 28, 28)]...\n",
       "Path: /home/stas/.fastai/data/mnist_png;\n",
       "\n",
       "Test: None"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.837\n",
      "･ CPU:        34       3     2359 MB |\n",
      "･ GPU:         0       0      503 MB |\n"
     ]
    }
   ],
   "source": [
    "# setup\n",
    "defaults.cmap='binary'\n",
    "\n",
    "tfms = ([*rand_pad(padding=3, size=28, mode='zeros')], [])\n",
    "num_workers=0\n",
    "#bs=512\n",
    "bs=128\n",
    "data = (ImageItemList.from_folder(path, convert_mode='L')\n",
    "      .split_by_folder(train='training', valid='testing')\n",
    "      .label_from_folder()\n",
    "      .transform(tfms)\n",
    "      .databunch(bs=bs, num_workers=num_workers)\n",
    "      .normalize(imagenet_stats)\n",
    "       )  \n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train and Validate\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.000\n",
      "･ CPU:         0       0     2359 MB |\n",
      "･ GPU:         0       0      503 MB |\n"
     ]
    }
   ],
   "source": [
    "#arch=\"resnet34\"\n",
    "arch=\"resnet50\"\n",
    "model = getattr(models, arch) # models.resnetXX"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:01.360\n",
      "･ CPU:         0       0     2518 MB |\n",
      "･ GPU:       106       0      609 MB |\n"
     ]
    }
   ],
   "source": [
    "learn = create_cnn(data, model, metrics=[accuracy], callback_fns=PeakMemMetric)\n",
    "#learn.opt_func\n",
    "#learn.opt_func = partial(optim.SGD, momentum=0.9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:02.562\n",
      "･ CPU:         0       0     2519 MB |\n",
      "･ GPU:      7210       0     7819 MB |\n"
     ]
    }
   ],
   "source": [
    "# must leave at least the size of the 2nd epoch peak\n",
    "# with resnet50\n",
    "# - with bs=128 it's about 300MB\n",
    "# - with bs=512 it's about 900MB\n",
    "x=gpu_mem_leave_free_mbs(300)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Total time: 01:32 <p><table style='width:600px; margin-bottom:10px'>\n",
       "  <tr>\n",
       "    <th>epoch</th>\n",
       "    <th>train_loss</th>\n",
       "    <th>valid_loss</th>\n",
       "    <th>accuracy</th>\n",
       "    <th>cpu used</th>\n",
       "    <th>peak</th>\n",
       "    <th>gpu used</th>\n",
       "    <th>peak</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>1</th>\n",
       "    <th>0.128634</th>\n",
       "    <th>0.064438</th>\n",
       "    <th>0.981300</th>\n",
       "    <th>17</th>\n",
       "    <th>17</th>\n",
       "    <th>46</th>\n",
       "    <th>294</th>\n",
       "  </tr>\n",
       "  <tr>\n",
       "    <th>2</th>\n",
       "    <th>0.047700</th>\n",
       "    <th>0.023343</th>\n",
       "    <th>0.991700</th>\n",
       "    <th>5</th>\n",
       "    <th>6</th>\n",
       "    <th>0</th>\n",
       "    <th>226</th>\n",
       "  </tr>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:01:32.669\n",
      "･ CPU:         0       0     2544 MB |\n",
      "･ GPU:        38     256     7857 MB |\n"
     ]
    }
   ],
   "source": [
    "# some insights on the peak mem spike here:\n",
    "# https://discuss.pytorch.org/t/high-gpu-memory-usage-problem/34694/2\n",
    "\n",
    "learn.fit_one_cycle(2, max_lr=1e-2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.000\n",
      "･ CPU:         0       0     2544 MB |\n",
      "･ GPU:     -7210    7210      647 MB |\n"
     ]
    }
   ],
   "source": [
    "# can free memory if need be (useful on OOM so won't need to restart the kernel)\n",
    "del x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.287\n",
      "･ CPU:         0       0     2544 MB |\n",
      "･ GPU:         2     128      649 MB |\n"
     ]
    }
   ],
   "source": [
    "learn.save(f'reload1')\n",
    "_=learn.load(f'reload1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.181\n",
      "･ CPU:         0       0     2477 MB |\n",
      "･ GPU:         0       0      649 MB |\n"
     ]
    }
   ],
   "source": [
    "learn.export()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Inference via learn.export"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.131\n",
      "･ CPU:         3       0     2480 MB |\n",
      "･ GPU:        94       0      743 MB |\n"
     ]
    }
   ],
   "source": [
    "learn = load_learner(path, test=ImageItemList.from_folder(path/'testing'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LabelList\n",
       "y: EmptyLabelList (10000 items)\n",
       "[EmptyLabel , EmptyLabel , EmptyLabel , EmptyLabel , EmptyLabel ]...\n",
       "Path: .\n",
       "x: ImageItemList (10000 items)\n",
       "[Image (3, 28, 28), Image (3, 28, 28), Image (3, 28, 28), Image (3, 28, 28), Image (3, 28, 28)]...\n",
       "Path: /home/stas/.fastai/data/mnist_png"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "10000"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.004\n",
      "･ CPU:         0       0     2480 MB |\n",
      "･ GPU:         0       0      743 MB |\n"
     ]
    }
   ],
   "source": [
    "learn.data.test_ds\n",
    "len(learn.data.test_ds)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "the inference peak happens only the first time it's run, same as with fit() and same as with fit() pytorch gracefully frees any overhead if there is little memory left, so all is good here too."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:02.543\n",
      "･ CPU:         0       0     2480 MB |\n",
      "･ GPU:      7176       0     7919 MB |\n"
     ]
    }
   ],
   "source": [
    "# same as with fit() if there is extra memory, the peak will be much larger, but if there isn't it still works.\n",
    "x=gpu_mem_leave_free_mbs(200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:02.487\n",
      "･ CPU:         0       0     2480 MB |\n",
      "･ GPU:         0     190     7919 MB |\n"
     ]
    }
   ],
   "source": [
    "predictions = learn.get_preds(ds_type=DatasetType.Test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:02.323\n",
      "･ CPU:         0       0     2480 MB |\n",
      "･ GPU:         0      30     7919 MB |\n"
     ]
    }
   ],
   "source": [
    "# re-run to check peak consumption\n",
    "predictions = learn.get_preds(ds_type=DatasetType.Test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "･ RAM: △Consumed △Peaked  Used Total | Exec time 0:00:00.000\n",
      "･ CPU:         0       0     2480 MB |\n",
      "･ GPU:     -7176       0      743 MB |\n"
     ]
    }
   ],
   "source": [
    "del x"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}