{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Predicting DWPC Query runtime ahead of time" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import json\n", "\n", "import matplotlib.pyplot\n", "import pandas\n", "import numpy\n", "import seaborn\n", "import mpld3\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "path = 'data/all-features/metapaths.json'\n", "with open(path) as fp:\n", " metapaths = json.load(fp)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
metapathnonzeroseconds_per_queryaurocauroc_permuteddelta_aurocpval_auroclength
0CbGaD0.3120.01450.7150.5800.135000.0000032
1CbGdD0.1490.01360.5120.515-0.003320.9210002
\n", "
" ], "text/plain": [ " metapath nonzero seconds_per_query auroc auroc_permuted delta_auroc \\\n", "0 CbGaD 0.312 0.0145 0.715 0.580 0.13500 \n", "1 CbGdD 0.149 0.0136 0.512 0.515 -0.00332 \n", "\n", " pval_auroc length \n", "0 0.000003 2 \n", "1 0.921000 2 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "auroc_df = pandas.read_table('data/all-features/auroc.tsv')\n", "auroc_df.head(2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "cols = ['sequential_complexity', 'optimal_join_complexity', 'midpoint_join_complexity']\n", "\n", "rows = [[item['abbreviation']] + [item[col] for col in cols] for item in metapaths]\n", "complexity_df = pandas.DataFrame(rows, columns=['metapath'] + cols)\n", "complexity_df = auroc_df.merge(complexity_df)\n", "complexity_df['log10_seconds_per_query'] = numpy.log10(complexity_df['seconds_per_query'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
metapathnonzeroseconds_per_queryaurocauroc_permuteddelta_aurocpval_auroclengthsequential_complexityoptimal_join_complexitymidpoint_join_complexitylog10_seconds_per_query
0CbGaD0.3120.01450.7150.5800.135000.00000320.6204780.7137660.876638-1.838632
1CbGdD0.1490.01360.5120.515-0.003320.92100021.2067370.9661030.966103-1.866461
\n", "
" ], "text/plain": [ " metapath nonzero seconds_per_query auroc auroc_permuted delta_auroc \\\n", "0 CbGaD 0.312 0.0145 0.715 0.580 0.13500 \n", "1 CbGdD 0.149 0.0136 0.512 0.515 -0.00332 \n", "\n", " pval_auroc length sequential_complexity optimal_join_complexity \\\n", "0 0.000003 2 0.620478 0.713766 \n", "1 0.921000 2 1.206737 0.966103 \n", "\n", " midpoint_join_complexity log10_seconds_per_query \n", "0 0.876638 -1.838632 \n", "1 0.966103 -1.866461 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "complexity_df.head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## sequential_complexity" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "
\n", "" ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matplotlib.pyplot.figure(figsize=(10, 7))\n", "ax = seaborn.regplot('sequential_complexity', 'log10_seconds_per_query', data=complexity_df,\n", " lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)\n", "points = ax.collections[0]\n", "labels = complexity_df.metapath.tolist()\n", "tooltip = mpld3.plugins.PointLabelTooltip(points, labels)\n", "mpld3.plugins.connect(ax.figure, tooltip)\n", "mpld3.display()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## optimal_join_complexity" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "
\n", "" ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matplotlib.pyplot.figure(figsize=(10, 7))\n", "ax = seaborn.regplot('optimal_join_complexity', 'log10_seconds_per_query', data=complexity_df,\n", " lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)\n", "points = ax.collections[0]\n", "labels = complexity_df.metapath.tolist()\n", "tooltip = mpld3.plugins.PointLabelTooltip(points, labels)\n", "mpld3.plugins.connect(ax.figure, tooltip)\n", "mpld3.display()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## midpoint_join_complexity" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "
\n", "" ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matplotlib.pyplot.figure(figsize=(10, 7))\n", "ax = seaborn.regplot('midpoint_join_complexity', 'log10_seconds_per_query', data=complexity_df,\n", " lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)\n", "points = ax.collections[0]\n", "labels = complexity_df.metapath.tolist()\n", "tooltip = mpld3.plugins.PointLabelTooltip(points, labels)\n", "mpld3.plugins.connect(ax.figure, tooltip)\n", "mpld3.display()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }