{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Axis transposition benchmarking\n",
    "This notebook compares performance of different implementations of transposing axes.\n",
    "\n",
    "**Note:** benchmarking results vary heavily depending on image size, kernel size, used operations, parameters and used hardware. Use this notebook to adapt it to your use-case scenario and benchmark on your target hardware. If you have different scenarios or use-cases, you are very welcome to submit your notebook as pull-request!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<NVIDIA GeForce RTX 3050 Ti Laptop GPU on Platform: NVIDIA CUDA (1 refs)>"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pyclesperanto_prototype as cle\n",
    "import numpy as np\n",
    "import time\n",
    "import cupy as cp\n",
    "\n",
    "# to measure kernel execution duration properly, we need to set this flag. It will slow down exection of workflows a bit though\n",
    "cle.set_wait_for_kernel_finish(True)\n",
    "\n",
    "# selet a GPU with the following in the name. This will fallback to any other GPU if none with this name is found\n",
    "cle.select_device('RTX')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# test data\n",
    "import numpy as np\n",
    "\n",
    "test_image = np.random.random([100, 512, 1024])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## clEsperanto"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pyclesperanto transpose duration: 0.06201291084289551\n",
      "pyclesperanto transpose duration: 0.04100918769836426\n",
      "pyclesperanto transpose duration: 0.040008544921875\n",
      "pyclesperanto transpose duration: 0.040008544921875\n",
      "pyclesperanto transpose duration: 0.0400090217590332\n",
      "pyclesperanto transpose duration: 0.04129624366760254\n",
      "pyclesperanto transpose duration: 0.042009592056274414\n",
      "pyclesperanto transpose duration: 0.04128861427307129\n",
      "pyclesperanto transpose duration: 0.04102063179016113\n",
      "pyclesperanto transpose duration: 0.04099869728088379\n",
      "(1024, 512, 100)\n"
     ]
    }
   ],
   "source": [
    "# transpose with pyclesperanto\n",
    "result_image = None\n",
    "\n",
    "test_image_gpu = cle.push_zyx(test_image)\n",
    "\n",
    "for i in range(0, 10):\n",
    "    start_time = time.time()\n",
    "    result_image = cle.transpose_xz(test_image_gpu, result_image)\n",
    "    print(\"pyclesperanto transpose duration: \" + str(time.time() - start_time))\n",
    "print(result_image.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## cupy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cupy transpose duration: 0.06301379203796387\n",
      "cupy transpose duration: 0.0009999275207519531\n",
      "cupy transpose duration: 0.0\n",
      "cupy transpose duration: 0.0\n",
      "cupy transpose duration: 0.0\n",
      "cupy transpose duration: 0.0\n",
      "cupy transpose duration: 0.0\n",
      "cupy transpose duration: 0.0\n",
      "cupy transpose duration: 0.0\n",
      "cupy transpose duration: 0.0\n",
      "(1024, 512, 100)\n"
     ]
    }
   ],
   "source": [
    "# transpose with numpy\n",
    "result_image = None\n",
    "cu_test_image = cp.asarray(test_image)\n",
    "\n",
    "for i in range(0, 10):\n",
    "    start_time = time.time()\n",
    "    result_image = cp.transpose(cu_test_image, (2, 1, 0))\n",
    "    cp.cuda.stream.get_current_stream().synchronize() # we need to wait here to measure time properly\n",
    "    print(\"cupy transpose duration: \" + str(time.time() - start_time))\n",
    "print(result_image.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "numpy transpose duration: 0.0\n",
      "numpy transpose duration: 0.0\n",
      "numpy transpose duration: 0.0\n",
      "numpy transpose duration: 0.0\n",
      "numpy transpose duration: 0.0\n",
      "numpy transpose duration: 0.0\n",
      "numpy transpose duration: 0.0\n",
      "numpy transpose duration: 0.0\n",
      "numpy transpose duration: 0.0\n",
      "numpy transpose duration: 0.0\n",
      "(1024, 512, 100)\n"
     ]
    }
   ],
   "source": [
    "# transpose with numpy\n",
    "result_image = None\n",
    "\n",
    "for i in range(0, 10):\n",
    "    start_time = time.time()\n",
    "    result_image = np.transpose(test_image, (2, 1, 0))\n",
    "    print(\"numpy transpose duration: \" + str(time.time() - start_time))\n",
    "print(result_image.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}