{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# idioms" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# group creator\n", "def create_frame(n, n_groups):\n", " stamps = pd.date_range('20010101',periods=n,freq='ms')\n", " random.shuffle(stamps.values) \n", " return DataFrame({'name' : np.random.randint(0,n_groups,size=n),\n", " 'stamp' : stamps,\n", " 'value' : np.random.randint(0,n,size=n),\n", " 'value2' : np.random.randn(n)})\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# unwrapping groupby" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = create_frame(1000000,10000)\n", "\n", "def f_apply(df):\n", " return df.groupby('name').value2.apply(lambda x: (x-x.mean())/x.std())\n", "def f_unwrap(df):\n", " g = df.groupby('name').value2\n", " v = df.value2\n", " return (v-g.transform('mean'))/g.transform('std')" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.allclose(f_apply(df),f_unwrap(df))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 2.68 s per loop\n" ] } ], "source": [ "%timeit f_apply(df)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 635 ms per loop\n" ] } ], "source": [ "%timeit f_unwrap(df)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df = create_frame(1000000,100)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 310 ms per loop\n" ] } ], "source": [ "%timeit f_apply(df)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 267 ms per loop\n" ] } ], "source": [ "%timeit f_unwrap(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# if then" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df = create_frame(100000,1000)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def result_apply(df):\n", " return df.value.apply(lambda x: np.nan if x < 30000 else x)\n", "def result_where(df):\n", " return df.value.where(df.value>=30000)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result_apply(df).equals(result_where(df))" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10 loops, best of 3: 43.7 ms per loop\n" ] } ], "source": [ "%timeit result_apply(df)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "100 loops, best of 3: 2.1 ms per loop\n" ] } ], "source": [ "%timeit result_where(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }