{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Setting up tests for profiling" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from random import randint\n", "from astropy.table import Table, BST, FastRBT, SortedArray\n", "from astropy.table.sorted_array import _searchsorted\n", "from astropy.time import Time\n", "from time import time\n", "\n", "N = 100000\n", "\n", "class IndexProfiling:\n", " def __init__(self, engine):\n", " # initialize N rows with shuffled integer elements\n", " idx = np.arange(N)\n", " np.random.shuffle(idx)\n", " self.t = Table([idx])\n", " self.engine = engine\n", " self.val = self.t['col0'][N / 2]\n", "\n", " def time_init(self):\n", " if self.engine is not None:\n", " self.t.add_index('col0', engine=self.engine)\n", "\n", " def time_group(self):\n", " self.t.group_by('col0')\n", "\n", " def time_loc(self):\n", " if self.engine is not None:\n", " self.t.loc[self.val]\n", " else: # linear search\n", " for val in self.t['col0']:\n", " if val == self.val:\n", " break\n", "\n", " def time_loc_range(self):\n", " # from N/4 to 3N/4, inclusive\n", " if self.engine is not None:\n", " self.t.loc[N / 4 : 3 * N / 4]\n", " else:\n", " range_vals = []\n", " for val in self.t['col0']:\n", " if N / 4 <= val <= 3 * N / 4:\n", " range_vals.append(val)\n", "\n", " def time_add_row(self):\n", " self.t.add_row((randint(0, N * 10),))\n", "\n", " def time_modify(self):\n", " self.t['col0'][0] = randint(0, N * 10)\n", "\n", "def get_time(func):\n", " start = time()\n", " func()\n", " return time() - start" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Run tests for FastRBT, SortedArray, and a non-indexed Table" ] }, { "cell_type": "code", "collapsed": false, "input": [ "implementations = ['None', 'FastRBT', 'SortedArray']\n", "methods = ['init', 'group', 'loc', 'loc_range', 'add_row', 'modify']\n", "times = {}\n", "times2 = {}\n", "\n", "for i, impl in enumerate(implementations):\n", " profile = IndexProfiling(eval(impl))\n", " for j, method in enumerate(methods):\n", " func = getattr(profile, 'time_{0}'.format(method))\n", " running_time = get_time(func)\n", " times[(impl, method)] = running_time\n", " with profile.t.index_mode('discard_on_copy'):\n", " time2 = get_time(func)\n", " times2[(impl, method)] = time2" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "for method in methods:\n", " print('\\n' + method + '\\n**********')\n", " for impl in implementations:\n", " t = times[(impl, method)]\n", " msg = '{0}: {1}'.format(impl, t)\n", " if impl is not None and method != 'init':\n", " msg += ' ({0:.4}%)'.format(t / times[('None', method)] * 100)\n", " print(msg)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\n", "init\n", "**********\n", "None: 9.53674316406e-07\n", "FastRBT: 1.51089882851\n", "SortedArray: 0.0071108341217\n", "\n", "group\n", "**********\n", "None: 0.0517690181732 (100.0%)\n", "FastRBT: 0.0276219844818 (53.36%)\n", "SortedArray: 0.00372791290283 (7.201%)\n", "\n", "loc\n", "**********\n", "None: 0.00727581977844 (100.0%)\n", "FastRBT: 6.48498535156e-05 (0.8913%)\n", "SortedArray: 0.000172138214111 (2.366%)\n", "\n", "loc_range\n", "**********\n", "None: 0.0341680049896 (100.0%)\n", "FastRBT: 1.58422899246 (4.637e+03%)\n", "SortedArray: 0.00239109992981 (6.998%)\n", "\n", "add_row\n", "**********\n", "None: 0.000385046005249 (100.0%)\n", "FastRBT: 0.00041389465332 (107.5%)\n", "SortedArray: 0.0014979839325 (389.0%)\n", "\n", "modify\n", "**********\n", "None: 1.12056732178e-05 (100.0%)\n", "FastRBT: 8.9168548584e-05 (795.7%)\n", "SortedArray: 0.00265407562256 (2.369e+04%)\n" ] } ], "prompt_number": 3 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Profiling without index copies" ] }, { "cell_type": "code", "collapsed": false, "input": [ "for method in methods:\n", " print('\\n' + method + '\\n**********')\n", " for impl in implementations:\n", " t = times2[(impl, method)]\n", " msg = '{0}: {1}'.format(impl, t)\n", " if impl is not None and method != 'init':\n", " msg += ' ({0:.4}%)'.format(t / times2[('None', method)] * 100)\n", " print(msg)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\n", "init\n", "**********\n", "None: 0.0\n", "FastRBT: 1.52415585518\n", "SortedArray: 0.00721597671509\n", "\n", "group\n", "**********\n", "None: 0.0459520816803 (100.0%)\n", "FastRBT: 0.0275840759277 (60.03%)\n", "SortedArray: 0.00373697280884 (8.132%)\n", "\n", "loc\n", "**********\n", "None: 0.00726699829102 (100.0%)\n", "FastRBT: 2.38418579102e-05 (0.3281%)\n", "SortedArray: 0.000133037567139 (1.831%)\n", "\n", "loc_range\n", "**********\n", "None: 0.0328350067139 (100.0%)\n", "FastRBT: 0.0731410980225 (222.8%)\n", "SortedArray: 0.000694036483765 (2.114%)\n", "\n", "add_row\n", "**********\n", "None: 0.000391960144043 (100.0%)\n", "FastRBT: 0.000457048416138 (116.6%)\n", "SortedArray: 0.00168395042419 (429.6%)\n", "\n", "modify\n", "**********\n", "None: 6.91413879395e-06 (100.0%)\n", "FastRBT: 5.98430633545e-05 (865.5%)\n", "SortedArray: 0.00289297103882 (4.184e+04%)\n" ] } ], "prompt_number": 4 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Test mixin performance" ] }, { "cell_type": "code", "collapsed": false, "input": [ "t1 = Table([[randint(0, N * 2) * 1000. / N for i in range(N)]])\n", "t2 = Table([Time(t1['col0'], format='mjd')])\n", "print('Index setup\\n**********')\n", "print('Regular columns: {0}'.format(get_time(lambda: t1.add_index('col0'))))\n", "print('Time columns: {0}'.format(get_time(lambda: t2.add_index('col0'))))\n", "\n", "val = t1['col0'][N / 2]\n", "tval = t2['col0'][N / 2]\n", "low = t1.iloc[N / 4]['col0']\n", "high = t1.iloc[3 * N / 4]['col0']\n", "tlow = t2.iloc[N / 4]['col0']\n", "thigh = t2.iloc[3 * N / 4]['col0']\n", "\n", "print('\\nValue search\\n************')\n", "print('Regular column: {0}'.format(get_time(lambda: t1.loc[val])))\n", "print('Time column: {0}'.format(get_time(lambda: t2.loc[tval])))\n", "print('\\nRange search\\n************')\n", "print('Regular column: {0}'.format(get_time(lambda: t1.loc[low:high])))\n", "print('Time column: {0}'.format(get_time(lambda: t2.loc[tlow:thigh])))\n", "\n", "print('\\nsearchsorted\\n***********')\n", "print('Regular column: {0}'.format(\n", " get_time(lambda: np.searchsorted(t1['col0'], val))))\n", "print('Time column: {0}'.format(\n", " get_time(lambda: _searchsorted(t2['col0'], tval))))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "WARNING: ErfaWarning: ERFA function \"utctai\" yielded 100000 of \"dubious year (Note 3)\" [astropy._erfa.core]\n", "WARNING:astropy:ErfaWarning: ERFA function \"utctai\" yielded 100000 of \"dubious year (Note 3)\"\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "Index setup\n", "**********\n", "Regular columns: 0.00827789306641\n", "Time columns: 0.0804588794708" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "WARNING: ErfaWarning: ERFA function \"utctai\" yielded 50000 of \"dubious year (Note 3)\" [astropy._erfa.core]\n", "WARNING:astropy:ErfaWarning: ERFA function \"utctai\" yielded 50000 of \"dubious year (Note 3)\"\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "\n", "Value search\n", "************\n", "Regular column: 0.000174045562744\n", "Time column: 0.0105710029602\n", "\n", "Range search\n", "************\n", "Regular column: 0.00186204910278\n", "Time column: 0.0573830604553" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "\n", "searchsorted\n", "***********\n", "Regular column: 1.19209289551e-05\n", "Time column: 0.0693500041962" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 5 } ], "metadata": {} } ] }