{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# How to make pandas go slow" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.set_option('max_rows',12)\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = pd.DataFrame(np.random.randn(10000,2),columns=list('AB'))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def f_baseline(df):\n", " return df['A']+1\n", " \n", "def f_apply(df):\n", " return df.apply(lambda x: x['A'] + 1, axis=1)\n", "\n", "def f_itertuples(df):\n", " l = []\n", " for row in df.itertuples():\n", " l.append(row[1]+1)\n", " return Series(l,name='A')\n", "\n", "def f_iterrows(df):\n", " l = []\n", " for rowindex, row in df.iterrows():\n", " l.append(row['A']+1)\n", " return Series(l,name='A')\n", "\n", "def f_index(df):\n", " s = df.A.copy()\n", " for i in range(len(s)):\n", " s.iloc[i] += 1\n", " return s\n", "\n", "def f_index2(df):\n", " l = []\n", " for i in range(len(df)):\n", " l.append(df.iloc[i,0] + 1)\n", " return Series(l,name='A')\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "results = [f_baseline(df), f_itertuples(df), f_apply(df), f_iterrows(df), \n", " f_index(df), f_index2(df)]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from itertools import imap, permutations\n", "np.array(list(imap(lambda x: x[0].equals(x[1]), permutations(results, 2)))).all()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "100 loops, best of 1: 69.6 µs per loop\n" ] } ], "source": [ "%timeit -n 100 -r 1 f_baseline(df)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "100 loops, best of 3: 6.06 ms per loop\n" ] } ], "source": [ "%timeit f_itertuples(df)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10 loops, best of 3: 96.6 ms per loop\n" ] } ], "source": [ "%timeit f_apply(df)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 227 ms per loop\n" ] } ], "source": [ "%timeit f_iterrows(df)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 728 ms per loop\n" ] } ], "source": [ "%timeit f_index(df)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 loops, best of 3: 926 ms per loop\n" ] } ], "source": [ "%timeit f_index2(df)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZsAAAD7CAYAAAC8GzkWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFClJREFUeJzt3XuwpVV95vHvAw0MFwGtpLxwa4KXkgS0xbQOl3hiLIRo\nk+hoKUZHgyNWmBTMCErUcWhjVWKwkoBOdMYhkEQicTQZItEJoMPRprW60b5yDSAyooKMkQimJIC/\n+WOvM2yOfTvde519+uzvp+rUXu/a77vetVbBfvq97HenqpAkqac9xt0BSdLiZ9hIkrozbCRJ3Rk2\nkqTuDBtJUneGjSSpuyXj7sB8SOL93ZK0E6oqo2hnIsIGRjdhu7skK6tq5bj7sRA4F49zLh7nXDxu\nlP9Q9zSaJKk7w0aS1J1hM3mmx92BBWR63B1YQKbH3YEFZHrcHViMMgnPRktSXrORpLkZ5WenRzaS\npO4MG0lSd4aNJKm7ifmejV/slLSY7G7XoScmbMCskbRY7FY5A3gaTZI0DwwbSVJ3ho0kqbtuYZNk\n9RzXn0py1U7u6x1JbkqyMckXkhy+M+1IkvroFjZVdUKvtrdgHXBcVT0P+Axw4TzuW5K0HT2PbB5q\nr1NJppN8OsktSS4fWueUVvd14FVD9fsnuTTJmiTrkpzW6i9K8r5WfnmSLwFU1XRV/bhtvgY4tNe4\nJElz1/PW5+F7jZ8PHA18F1id5HgGRyMfB365qu5M8qmhbd4LfLGqzkhyMLAmybXAu4EbklwPXAyc\nuoX9vhX4fJcRSZJ2ynx9z2ZtVX0HIMkG4Ejgn4G7qurOts7lwJmtfDKwIsl5bXkf4PCqui3J24BV\nwDlVddfwTpK8EXgB8B9/ugsrh8pT7U+SNCPJFJ0+HOcrbB4eKj/W9jv7W5azv6X06qq6fQttHQvc\nDxzyhI2TlwHvAX6pqh756c1WzqnDkjRpqmqaoZ9YSHLBqNoe163PBdwKLE3yc63u9KH3rwbOnllI\nsqy9HgG8A1gGnJpk+dD7/xVYUVX/t3/3JUlz0TNsaivlQUXVwwxOm32u3SBw39B6HwD2SrIpyY3A\n+1v9JcC5VXUvg2szlyTZh8HdZ/sDn0myPsmVXUYkSdopE/PjaT4bTdLikXl5EKc/niZJ2q0YNpKk\n7gwbSVJ3ho0kqTvDRpLU3QT9Uufu98t2krRYTEzY7G6/1y1Ji4mn0SRJ3Rk2kqTuDBtJUneGjSSp\nO8NGktSdYSNJ6s6wkSR1Z9hIkrozbCRJ3Rk2kqTuDBtJUneGjSSpO8NGktSdYSNJ6s6wkSR1Z9hI\nkrqbmB9PS1Lj7oOk/vyhxIVpYsIGzBpp8TNnFipPo0mSujNsJEndGTaSpO7mNWySrJ7j+lNJrurV\nH0nS/JjXsKmqE+Zzf5KkhWG+j2weaq9TSaaTfDrJLUkuH1rnlFb3deBVQ/X7J7k0yZok65Kc1uov\nSvK+Vn55ki/N55gkSds337c+D99//HzgaOC7wOokxwPrgI8Dv1xVdyb51NA27wW+WFVnJDkYWJPk\nWuDdwA1JrgcuBk6dp7FIknbQOL9ns7aqvgOQZANwJPDPwF1VdWdb53LgzFY+GViR5Ly2vA9weFXd\nluRtwCrgnKq6a8u7WzlUnmp/kqQZSabo9OE4zrB5eKj8GIO+zP7m5exvaL26qm7fQlvHAvcDh2x9\ndyvn3EFJmiRVNQ1MzywnuWBUbS+kW58LuBVYmuTnWt3pQ+9fDZw9s5BkWXs9AngHsAw4Ncny+emu\nJGlHzXfY1FbKg4qqhxmcNvtcu0HgvqH1PgDslWRTkhuB97f6S4Bzq+pe4K3AJUn27jUASdLcpWrx\nPzNs8BDOxT9OSfFBnCOUpEY1nwvpNJokaZEybCRJ3Rk2kqTuDBtJUneGjSSpuwn6pU5vUJGkcZmY\nsPF2SEkaH0+jSZK6M2wkSd0ZNpKk7gwbSVJ3ho0kqTvDRpLUnWEjSerOsJEkdWfYSJK6M2wkSd0Z\nNpKk7gwbSVJ3ho0kqTvDRpLUnWEjSerOsJEkdTcxP56WpMbdh92FPzQnadQmJmzArNkx5oyk0fM0\nmiSpO8NGktSdYSNJ6m6XwibJ6vZ6RJLTR9MlSdJis0thU1UntOKRwBvmsm2SJdtaliQtHrt6ZPNQ\nK34QOCnJ+iTnJNkjyYeSrE2yMcmZbf2pJKuS/C1wU5KXDC3fmGSfJJcl2ZRkXZKptt3fJTmmldcn\neV8r/26Sf5fk6Um+3N7bnOTEXRmXJGm0dvVoYuZ+4vOB86pqBUALlweqanmSfYDrk1zT1l0G/HxV\n3d3CZHj5XOCxqjo2yXOAa5I8G1jFIMzuBh4Bjm9tnQi8HTgd+Puq+r0kAfbfxXFJkkZoVKeuZn85\n42TgmCSvacsHAs8EHgXWVtXdQ+sOL58AfBigqm5r4TITNmcDdwGfA16WZF/gyKq6PckNwKVJ9gKu\nrKqNIxqXJGkEel4n+e2quna4oh3J/GjWerOXZwdXATcALwS+AVwL/AxwJvA1gKpaleQk4JXAnyX5\no6r6xBObWTlUnmp/kqQZ7TN6qkfbowqbB4EnDS1fDZyV5LqqerSdCrtnB9pZBfwGcF3b5nDgtqp6\nJMk9wGuB9wM/C/whcCFAksOBb1fVJe203TJgG2EjSZqtqqaB6ZnlJBeMqu1RXbPZCDyWZANwGYNT\nYUuBde0ayveAV7X1a9b2w8sfBT6WZBODU25vrqpH2ntfBl5aVQ8nuR54BoNwgkESvzPJIwyC79/u\n4rgkSSOUqsX/zLDBQzgX/zhHIz6IUxIw+Owc1eeBTxCQJHVn2EiSujNsJEndGTaSpO4MG0lSdxP0\n8EtvsJKkcZmYsPF2XkkaH0+jSZK6M2wkSd0ZNpKk7gwbSVJ3ho0kqTvDRpLUnWEjSerOsJEkdWfY\nSJK6M2wkSd0ZNpKk7gwbSVJ3ho0kqTvDRpLUnWEjSerOsJEkdWfYSJK6m5hf6kxS49y/vxQqaZJN\nTNjAOLPGnJE02TyNJknqzrCRJHW3W4ZNkrck+ci4+yFJ2jG7Zdgw3gswkqQ5mvewSfI/k3wtyY1J\n3tbqHkryR63uC0l+ptVPJ7koyfokm5P84kwz7f0DknwjyZK2fGBb3nO+xyVJ2rpxHNmcUVUvBH4R\nODvJU4D9gBuq6heALwEXtHUL2LeqlgFnAZcON1RVDwHTwCta1euBv66qx7qPQpK0w8YRNuck2QB8\nFTgUeBbwE+BT7f3LgROH1r8CoKpWAQcmOWhWe5cAv9nKbwEu69NtSdLOmtfv2SSZAn4FeHFV/TjJ\ndcC/mnl76HVb12R+MrxQVV9JsrS1vWdV3bzlzVYOlafanyRpRvscnerR9nx/qfNA4ActaJ4LvLjV\n7wG8hsHRzRuAVa0+wOuA6SQnAg9U1YPJT31J8i+AvwR+d+u7XjmaEUjSIlVV0wwuTQCQ5IKtrjxH\n830a7e+BJUluBn6Pwak0gB8By5NsZpCqM6FRwI+TrAM+Crx1qH746OeTwJNpp9wkSQtLqsZ/F3GS\nB6vqSVuovw44t6rWbWf71wArqurNW3m/xv24Gp+NJml3k6RG9dm1UJ6NttNJ0L7c+XLgV0fXHUnS\nKC2II5vePLKRpLkb5ZHN7voEAUnSbsSwkSR1Z9hIkrpbKDcIzAMvmUjSuExM2HiBXpLGx9NokqTu\nDBtJUneGjSSpO8NGktSdYSNJ6s6wkSR1Z9hIkrozbCRJ3Rk2kqTuDBtJUneGjSSpO8NGktSdYSNJ\n6s6wkSR1Z9hIkrozbCRJ3Rk2kqTuJiZsklSSGnc/JGkSTUzYgDkjSeMyQWEjSRoXw0aS1J1hI0nq\nbpthk2R1ez0iyelzbTzJQUl+a2c719pYmeTcXWlDkjRe2wybqjqhFY8E3jCXhpMsAZ4MnLVzXXu8\nG7u4vSRpzLZ3ZPNQK34QOCnJ+iTnJNkjyYeSrE2yMcmZbf2pJKuS/C1wE/D7wFFtuwuTvCTJVUPt\n/5ckb27lbyb5gySbkqxJctQW+nNUkv+V5GtJvpzkOa3+tUk2J9mQ5EsjmRlJ0sgs2c77M0cV5wPn\nVdUKgBYuD1TV8iT7ANcnuaatuwz4+aq6O8kRwC9U1bK23dQW2q+h8gNVdWySNwEXAStm9ePjwNur\n6o4kLwI+CvwK8D7g5Kr6bpID5zIBkqT+thc2MzJr+WTgmCSvacsHAs8EHgXWVtXdW9lue65or38F\n/PETOpDsDxwPfDr5/83u3V5XA3+e5H8Af7PlplfOtLMSmK6q6Tn2TZIWtXZAMNWj7R0Nmy357aq6\ndriidfRH29jmUZ546m7fbaw7+1rNHsAPZo6SnrBi1W8lWQ68Avh6kuOq6h+fuNZK4P1U1cpt7FOS\nJlb7R/j0zHKSC0bV9o7e+vwg8KSh5auBs9pNACR5dpL9dmC7u4Gjk+yd5GDgpbPWf93Q61daOUCq\n6kHgrpmjqQwc28pHVdXaqroAuB84dAfHJUmaBzt6zWYj8FiSDcBlwIeBpcC6DM5pfQ94FU+8BkNV\nfT/J6iSbgc9X1fntVNeNwF3Auln7e3KSjcCPgZlbrYfb/A3gY0n+E7AXg9Num4ALkzyLQTB9oao2\nzWEOJEmdpWph3Fmc5C5gC6e/RtJ2DfIqVNVcryNJ0kRKUqP6zFxITxBYGKknSRq5BXNk05NHNpI0\nd4v1yEaStEgZNpKk7iYobDx7Jknjsitf6tyteK1GksZngo5sJEnjYthIkrozbCRJ3Rk2kqTuDBtJ\nUneGjSSpO8NGktSdYSNJ6s6wkSR1Z9hIkrozbCRJ3Rk2kqTuDBtJUneGjSSpO8NGktSdYSNJ6s6w\nkSR1Z9hIkrozbCRJ3Rk2kqTuDBtJUneGjSSpu50KmyRLk2wedWda21NJrmrlFUnO77EfSdL8WTLu\nDmxLVV0FXDXufkiSds2unEZbkuTyJDcn+XSSfZP85yRrk2xO8t9mVkxydpKbkmxMckWr2z/JpUnW\nJFmX5LTZO0jyliQfaeU/S3JxktVJ7kzyb4bWe2fb78YkK3dhTJKkDnYlbJ4D/ElVHQ38EDgL+EhV\nLa+qY4B9k7yyrXs+8Pyqeh7w9lb3XuCLVfUi4KXAh5Lst519Pq2qTgBeCXwQIMnJwDOrajmwDDgu\nyUm7MC5J0ojtymm0b1XVV1v5cuBs4JtJ3gXsCzwFuBH4O2AT8MkkVwJXtm1OBlYkOa8t7wMcto39\n1cy2VXVLkqcOtXNykvVteX/gmcCq4Y1nHfFMV9X0jg9Vkha/JFPAVI+2dyVsaqictvwnwHFV9e0k\nFzAIHYBXAL8ErADem+SYVv/qqrp9uNEkT9/GPv9l1j5n/H5VfXybna1aua33JWnStX+ET88st8/x\nkdiV02iHJ3lxK78BuL6Vv5/kAOC1QCUJcHgbxO8ABwEHAFczOBoCIMmynezH1cAZSfZv7RyS5Gd3\nsi1JUgc7e2RTwG3Av09yKXAT8DHgyQxOnd0LrGnr7gl8IslBDI5GLq6qf0ryAeCiJJsYhN43gNOG\n2p95HT6C+qlyVV2b5LnAVwe5xoPAG4H7d3JskqQRS1Vtf63dXJKqqmx/TUnSjFF+dvoEAUlSd4aN\nJKk7w0aS1J1hI0nqzrCRJHVn2EiSujNsJEndGTaSpO4MG0lSd4aNJKk7w2bCtEeIC+dimHPxOOei\nD8Nm8kyNuwMLyNS4O7CATI27AwvI1Lg7sBgZNpKk7gwbSVJ3E/MTA+PugyTtjkb1EwMTETaSpPHy\nNJokqTvDRpLU3aIPmySnJLk1ye1Jzh93f3pLcliS65LclOTGJGe3+qckuTbJPyS5JsnBQ9u8u83P\nrUlOHl/vRy/JnknWJ7mqLU/kPAAkOTjJZ5LckuTmJC+axPlo47opyeYkn0yyz6TMQ5JLk9yXZPNQ\n3ZzHnuS4Nn+3J7l4h3ZeVYv2D9gTuANYCuwFbACeO+5+dR7z04Dnt/IBwG3Ac4ELgXe1+vOBD7by\n0W1e9mrzdAewx7jHMcL5eAfwl8Bn2/JEzkMb458DZ7TyEuCgSZuPNpZvAPu05U8Bb56UeQBOApYB\nm4fq5jL2mev8a4Hlrfx54JTt7XuxH9ksB+6oqm9W1SPAXwG/NuY+dVVV91bVhlZ+CLgFOAQ4jcGH\nDe3111v514ArquqRqvomg/+gls9rpztJcijwq8AlwMwdNRM3DwBJDgJOqqpLAarq0ar6JyZvPn4I\nPALsl2QJsB/wHSZkHqpqFfCDWdVzGfuLkjwdeFJVrW3r/cXQNlu12MPmEOBbQ8v3tLqJkGQpg3/F\nrAGeWlX3tbfuA57ays9gMC8zFtMc/THwTuAnQ3WTOA8ARwL3J7ksybok/z3J/kzYfFTVPwJ/CPwf\nBiHzQFVdy4TNwyxzHfvs+m+zA3Oy2MNmYu/rTnIA8NfAOVX14PB7NTj23dbc7PbzluSVwPeqaj2P\nH9U8wSTMw5AlwAuAj1bVC4AfAb8zvMIkzEeSo4D/wOC00DOAA5K8cXidSZiHrdmBse+0xR423wYO\nG1o+jCcm8qKUZC8GQfOJqrqyVd+X5Gnt/acD32v1s+fo0Fa3uzseOC3JXcAVwEuTfILJm4cZ9wD3\nVNUNbfkzDMLn3gmbjxcCX6mq71fVo8DfAP+ayZuHYXP5f+KeVn/orPrtzsliD5uvAc9KsjTJ3sDr\ngM+OuU9dJQnwp8DNVXXR0FufZXAhlPZ65VD965PsneRI4FkMLv7t1qrqPVV1WFUdCbwe+N9V9SYm\nbB5mVNW9wLeSPLtVvQy4CbiKyZqPW4EXJ9m3/b/yMuBmJm8ehs3p/4n239IP292MAd40tM3Wjfvu\niHm4++JUBndk3QG8e9z9mYfxnsjgGsUGYH37OwV4CvAF4B+Aa4CDh7Z5T5ufW4GXj3sMHebkJTx+\nN9okz8PzgBuAjQz+RX/QJM4H8C4GQbuZwQXxvSZlHhgc5X8H+BcG17N/c2fGDhzX5u8O4MM7sm8f\nVyNJ6m6xn0aTJC0Aho0kqTvDRpLUnWEjSerOsJEkdWfYSJK6M2wkSd0ZNpKk7v4fDTB0VZHhb4gA\nAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "s = Series([66.1/1000,5.68,93.7,223,747,921],['baseline','itertuples','apply','iterrows','index','index2'])\n", "s.plot(kind='barh')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }