{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import json\n", "import os\n", "import boto3\n", "from pprint import pprint\n", "from bokeh.io import output_notebook, show\n", "from bokeh.plotting import figure\n", "import ipywidgets as widgets\n", "import math" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "BUCKET_NAME = 'modin-jenkins-result'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Connect to s3\n", "s3 = boto3.resource('s3')\n", "client = boto3.client('s3')\n", "\n", "# Get list of folders in Bucket\n", "response = client.list_objects(\n", " Bucket=BUCKET_NAME,\n", ")\n", "\n", "# Filter for folders containing perf data for commits merged into master\n", "# use commented out function eventually, just use this for now:\n", "master_commit_keys = [a['Key'] for a in response['Contents'] if \"-perf\" in a['Key']]\n", "#master_commit_keys = [a['Key'] for a in response['Contents'] if \"-perf-COMMIT\" in a['Key']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Fetch the Perf Data from S3 Bucket\n", "perf_data = []\n", "for key in master_commit_keys:\n", " response = client.get_object(\n", " Bucket=BUCKET_NAME,\n", " Key=key\n", " )\n", " file_str = response['Body'].read()\n", " # Convert the Data to JSON Object before storing\n", " file_json = json.loads(file_str)\n", " perf_data.append(file_json)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def json_parser(data):\n", " \"\"\"\n", " This function parses the a single performance json file.\n", " Args:\n", " data: json file corresponding to perf data for one commit\n", "\n", " Returns:\n", " commit_hash: hash of the commit for this file\n", " commit_order: order of the commit from earliest to latest\n", " test_data: parsed performance data for each test run in the suite\n", " \"\"\"\n", " commit_hash = commit_data[\"commit_info\"][\"id\"]\n", " #commit_order = commit_data[\"commit_info\"][\"commit_number\"]\n", " test_data = {}\n", " for test in commit_data[\"benchmarks\"]:\n", " name = test[\"name\"][5:]\n", " test_data[name] = test[\"stats\"][\"mean\"]\n", " #return commit_hash, commit_order, test_data\n", " return commit_hash, test_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "scrolled": false }, "outputs": [], "source": [ "all_commits_data = {}\n", "commit_orders = {}\n", "i = 0\n", "\n", "# Loops through all the Perf Data files and gets the parsed data\n", "for commit_data in perf_data:\n", " # use commented one later and get rid of the i lines\n", " # commit_hash, commit_order, data = json_parser(commit_data)\n", " commit_hash, data = json_parser(commit_data)\n", " all_commits_data[commit_hash] = data\n", " commit_orders[i] = commit_hash\n", " i += 1\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def plot_function_perf(data, func_name):\n", " commits = [commit_orders[i] for i in range(len(commit_orders))]\n", " perf = [data[commit_hash][func_name] for commit_hash in commits]\n", " commits = dict(enumerate([a[:7] for a in commits]))\n", " commits_n = [i for i in range(len(commits))]\n", " search_commit = {val:key for (key, val) in commits.items()}\n", " p = figure(plot_width=800, plot_height=400, x_axis_label=\"commit hash\", \n", " y_axis_label=\"seconds\", title=func_name + \" performance\", \n", " x_range=(search_commit[s.value],search_commit[e.value]))\n", " p.line(commits_n, perf, line_width=2)\n", " p.xaxis.ticker = commits_n\n", " p.xaxis.major_label_overrides = commits\n", " p.xaxis.major_label_orientation = math.pi/2\n", " show(p)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "style = {'description_width': 'initial'}\n", "d = widgets.Dropdown(\n", " options=list((list(all_commits_data.values())[0]).keys()),\n", " value='fillna',\n", " description='Test',\n", " disabled=False,\n", " style=style\n", ")\n", "s = widgets.Text(\n", " disabled=False,\n", " style=style\n", ")\n", "e = widgets.Text(\n", " disabled=False,\n", " style=style\n", ")\n", "commits = [commit_orders[i] for i in range(len(commit_orders))]\n", "commits = dict(enumerate([a[:7] for a in commits]))\n", "start_hash = commits[0]\n", "end_hash = commits[len(commits) - 1]\n", "s.description=\"Start Hash (Default = \" + start_hash + \")\"\n", "e.description=\"End Hash (Default = \" + end_hash + \")\"\n", "s.value = start_hash\n", "e.value = end_hash\n", "output_notebook()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "display(d)\n", "display(s)\n", "display(e)\n", "plot_function_perf(all_commits_data, d.value)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Please Ignore Below This Cell" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "json_dir = os.getcwd() + \"/modin/.benchmarks/Darwin-CPython-3.6-64bit/\"\n", "master_hash = \"ae9f397109620cf00243169654f2f4bec7809b72\"\n", "\n", "data = []\n", "commit_order = {}\n", "master_data = []\n", "for filename in os.listdir(json_dir):\n", " if filename.endswith(\".json\"):\n", " order = int(filename.split(\"_\")[0])\n", " commit = filename.split(\"_\")[1].split(\"_\")[0]\n", " if commit == master_hash:\n", " with open(json_dir + filename) as f:\n", " master_data.append(json.load(f))\n", " else:\n", " commit_order[order] = commit\n", " with open(json_dir + filename) as f:\n", " data.append(json.load(f))\n", " \n", "#commit_order = OrderedDict(sorted(commit_order.items(), key=lambda t: t))\n", "commit_order\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def json_parser(data):\n", " parsed_data = {}\n", " for commit_data in data:\n", " commit_hash = commit_data[\"commit_info\"][\"id\"]\n", " test_data = {}\n", " for test in commit_data[\"benchmarks\"]:\n", " name = test[\"name\"][5:]\n", " test_data[name] = test[\"stats\"][\"mean\"]\n", " parsed_data[commit_hash] = test_data\n", " return parsed_data\n", " \n", "\n", "pprint(json_parser(master_data))\n", " \n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def plot_function_perf(other_data, func_name):\n", " commits = [commit_order[i] for i in range(1,len(commit_order)+1)]\n", " perf = [other_data[commit_hash][func_name] for commit_hash in commits]\n", " commits = [a[:7] for a in commits]\n", " commits = commits[::-1]\n", " perf = perf[::-1]\n", " plt.plot(commits, perf)\n", " plt.ylabel('seconds')\n", " plt.xlabel('commit hash')\n", " plt.title(func_name + ' performance')\n", " plt.show()\n", "\n", "def plot_against_master(other_data, master_data, hash_to_compare, func_name):\n", " #commits = [a for a in commit_order]\n", " #perf = [other_data[commit_hash][func_name] for commit_hash in commits]\n", " #commits = [a[:7] for a in commits]\n", " #commits = commits[::-1]\n", " #perf = perf[::-1]\n", " commits = [hash_to_compare[:7], \"master\"]\n", " compare_perf = other_data[hash_to_compare][func_name]\n", " master_perf = master_data[master_hash][func_name]\n", " perf = [compare_perf, master_perf]\n", " #x = [\"master\"]\n", " #x.append(commits[0])\n", " #y = [master_data[master_hash][func_name]]\n", " #y.append(perf[0])\n", " print(commits)\n", " print(perf)\n", " plt.plot(commits, perf)\n", " plt.ylabel('seconds')\n", " plt.xlabel('commit hash')\n", " plt.title(func_name + ' performance')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#RUN\n", "other = json_parser(data)\n", "plot_function_perf(other, 'read_csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hash_to_compare = 'a368735324669914efcd9020ac3c8ffffab9b641'\n", "master = json_parser(master_data)\n", "plot_against_master(other, master, hash_to_compare, 'read_csv')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }