{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "import boto3\n",
    "from pprint import pprint\n",
    "from bokeh.io import output_notebook, show\n",
    "from bokeh.plotting import figure\n",
    "import ipywidgets as widgets\n",
    "import math"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "BUCKET_NAME = 'modin-jenkins-result'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Connect to s3\n",
    "s3 = boto3.resource('s3')\n",
    "client = boto3.client('s3')\n",
    "\n",
    "# Get list of folders in Bucket\n",
    "response = client.list_objects(\n",
    "    Bucket=BUCKET_NAME,\n",
    ")\n",
    "\n",
    "# Filter for folders containing perf data for commits merged into master\n",
    "# use commented out function eventually, just use this for now:\n",
    "master_commit_keys = [a['Key'] for a in response['Contents'] if \"-perf\" in a['Key']]\n",
    "#master_commit_keys = [a['Key'] for a in response['Contents'] if \"-perf-COMMIT\" in a['Key']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Fetch the Perf Data from S3 Bucket\n",
    "perf_data = []\n",
    "for key in master_commit_keys:\n",
    "    response = client.get_object(\n",
    "        Bucket=BUCKET_NAME,\n",
    "        Key=key\n",
    "    )\n",
    "    file_str = response['Body'].read()\n",
    "    # Convert the Data to JSON Object before storing\n",
    "    file_json = json.loads(file_str)\n",
    "    perf_data.append(file_json)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def json_parser(data):\n",
    "    \"\"\"\n",
    "    This function parses the a single performance json file.\n",
    "    Args:\n",
    "        data: json file corresponding to perf data for one commit\n",
    "\n",
    "    Returns:\n",
    "        commit_hash: hash of the commit for this file\n",
    "        commit_order: order of the commit from earliest to latest\n",
    "        test_data: parsed performance data for each test run in the suite\n",
    "    \"\"\"\n",
    "    commit_hash = commit_data[\"commit_info\"][\"id\"]\n",
    "    #commit_order = commit_data[\"commit_info\"][\"commit_number\"]\n",
    "    test_data = {}\n",
    "    for test in commit_data[\"benchmarks\"]:\n",
    "        name = test[\"name\"][5:]\n",
    "        test_data[name] = test[\"stats\"][\"mean\"]\n",
    "    #return commit_hash, commit_order, test_data\n",
    "    return commit_hash, test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "all_commits_data = {}\n",
    "commit_orders = {}\n",
    "i = 0\n",
    "\n",
    "# Loops through all the Perf Data files and gets the parsed data\n",
    "for commit_data in perf_data:\n",
    "    # use commented one later and get rid of the i lines\n",
    "    # commit_hash, commit_order, data = json_parser(commit_data)\n",
    "    commit_hash, data = json_parser(commit_data)\n",
    "    all_commits_data[commit_hash] = data\n",
    "    commit_orders[i] = commit_hash\n",
    "    i += 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def plot_function_perf(data, func_name):\n",
    "    commits = [commit_orders[i] for i in range(len(commit_orders))]\n",
    "    perf = [data[commit_hash][func_name] for commit_hash in commits]\n",
    "    commits = dict(enumerate([a[:7] for a in commits]))\n",
    "    commits_n = [i for i in range(len(commits))]\n",
    "    search_commit = {val:key for (key, val) in commits.items()}\n",
    "    p = figure(plot_width=800, plot_height=400, x_axis_label=\"commit hash\", \n",
    "               y_axis_label=\"seconds\", title=func_name + \" performance\", \n",
    "               x_range=(search_commit[s.value],search_commit[e.value]))\n",
    "    p.line(commits_n, perf, line_width=2)\n",
    "    p.xaxis.ticker = commits_n\n",
    "    p.xaxis.major_label_overrides = commits\n",
    "    p.xaxis.major_label_orientation = math.pi/2\n",
    "    show(p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "style = {'description_width': 'initial'}\n",
    "d = widgets.Dropdown(\n",
    "    options=list((list(all_commits_data.values())[0]).keys()),\n",
    "    value='fillna',\n",
    "    description='Test',\n",
    "    disabled=False,\n",
    "    style=style\n",
    ")\n",
    "s = widgets.Text(\n",
    "    disabled=False,\n",
    "    style=style\n",
    ")\n",
    "e = widgets.Text(\n",
    "    disabled=False,\n",
    "    style=style\n",
    ")\n",
    "commits = [commit_orders[i] for i in range(len(commit_orders))]\n",
    "commits = dict(enumerate([a[:7] for a in commits]))\n",
    "start_hash = commits[0]\n",
    "end_hash = commits[len(commits) - 1]\n",
    "s.description=\"Start Hash (Default = \" + start_hash + \")\"\n",
    "e.description=\"End Hash (Default = \" + end_hash + \")\"\n",
    "s.value = start_hash\n",
    "e.value = end_hash\n",
    "output_notebook()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(d)\n",
    "display(s)\n",
    "display(e)\n",
    "plot_function_perf(all_commits_data, d.value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Please Ignore Below This Cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "json_dir = os.getcwd() + \"/modin/.benchmarks/Darwin-CPython-3.6-64bit/\"\n",
    "master_hash = \"ae9f397109620cf00243169654f2f4bec7809b72\"\n",
    "\n",
    "data = []\n",
    "commit_order = {}\n",
    "master_data = []\n",
    "for filename in os.listdir(json_dir):\n",
    "    if filename.endswith(\".json\"):\n",
    "        order = int(filename.split(\"_\")[0])\n",
    "        commit = filename.split(\"_\")[1].split(\"_\")[0]\n",
    "        if commit == master_hash:\n",
    "            with open(json_dir + filename) as f:\n",
    "                master_data.append(json.load(f))\n",
    "        else:\n",
    "            commit_order[order] = commit\n",
    "            with open(json_dir + filename) as f:\n",
    "                data.append(json.load(f))\n",
    "            \n",
    "#commit_order = OrderedDict(sorted(commit_order.items(), key=lambda t: t))\n",
    "commit_order\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def json_parser(data):\n",
    "    parsed_data = {}\n",
    "    for commit_data in data:\n",
    "        commit_hash = commit_data[\"commit_info\"][\"id\"]\n",
    "        test_data = {}\n",
    "        for test in commit_data[\"benchmarks\"]:\n",
    "            name = test[\"name\"][5:]\n",
    "            test_data[name] = test[\"stats\"][\"mean\"]\n",
    "        parsed_data[commit_hash] = test_data\n",
    "    return parsed_data\n",
    "        \n",
    "\n",
    "pprint(json_parser(master_data))\n",
    "        \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def plot_function_perf(other_data, func_name):\n",
    "    commits = [commit_order[i] for i in range(1,len(commit_order)+1)]\n",
    "    perf = [other_data[commit_hash][func_name] for commit_hash in commits]\n",
    "    commits = [a[:7] for a in commits]\n",
    "    commits = commits[::-1]\n",
    "    perf = perf[::-1]\n",
    "    plt.plot(commits, perf)\n",
    "    plt.ylabel('seconds')\n",
    "    plt.xlabel('commit hash')\n",
    "    plt.title(func_name + ' performance')\n",
    "    plt.show()\n",
    "\n",
    "def plot_against_master(other_data, master_data, hash_to_compare, func_name):\n",
    "    #commits = [a for a in commit_order]\n",
    "    #perf = [other_data[commit_hash][func_name] for commit_hash in commits]\n",
    "    #commits = [a[:7] for a in commits]\n",
    "    #commits = commits[::-1]\n",
    "    #perf = perf[::-1]\n",
    "    commits = [hash_to_compare[:7], \"master\"]\n",
    "    compare_perf = other_data[hash_to_compare][func_name]\n",
    "    master_perf = master_data[master_hash][func_name]\n",
    "    perf = [compare_perf, master_perf]\n",
    "    #x = [\"master\"]\n",
    "    #x.append(commits[0])\n",
    "    #y = [master_data[master_hash][func_name]]\n",
    "    #y.append(perf[0])\n",
    "    print(commits)\n",
    "    print(perf)\n",
    "    plt.plot(commits, perf)\n",
    "    plt.ylabel('seconds')\n",
    "    plt.xlabel('commit hash')\n",
    "    plt.title(func_name + ' performance')\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#RUN\n",
    "other = json_parser(data)\n",
    "plot_function_perf(other, 'read_csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "hash_to_compare = 'a368735324669914efcd9020ac3c8ffffab9b641'\n",
    "master = json_parser(master_data)\n",
    "plot_against_master(other, master, hash_to_compare, 'read_csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}