{ "metadata": { "name": "Subtable totals test" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "# Set up data\n", "N = 10000\n", "data = pd.DataFrame({\n", " 'Weight': 200*numpy.random.rand(N),\n", " 'Length': 40*numpy.random.rand(N),\n", " 'Count': numpy.random.randint(0, 100, N),\n", " 'Year': numpy.random.randint(1995, 2012, N),\n", " 'Width': 100*numpy.random.rand(N),\n", " 'Height': 30*numpy.random.rand(N),\n", " 'City': numpy.random.randint(0, 3, N),\n", " 'Cars': numpy.random.randint(0, 6, N),\n", "})\n", "city = 'Bangalore Chennai Delhi Mumbai'.split()\n", "cars = 'Mercedes Premier Jaguar BMW Maruti Audi Porsche MM'.split()\n", "data['City'] = data['City'].apply(lambda v: city[v])\n", "data['Cars'] = data['Cars'].apply(lambda v: cars[v])\n", "data['Weight Series'] = data['Weight']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 53 }, { "cell_type": "code", "collapsed": false, "input": [ "# Arguments\n", "calc = {\n", " 'Density': lambda v: v['Weight'] / (v['Length'] * v['Width'] * v['Height']),\n", " 'Height %': lambda v: v['Height'],\n", " 'Aspect ratio': lambda v: 1,\n", "}\n", "\n", "format = {\n", " 'City' : lambda x: '{:s}'.format(str(x)),\n", " 'Count': lambda x: '{:,.0f}'.format(float(x)),\n", " 'Year' : lambda x: '{:d}'.format(int(x)),\n", " 'Weight' : lambda x: '{:,.1f} kg'.format(x),\n", " 'Length' : lambda x: '{:,.1f} cm'.format(x),\n", " 'Width' : lambda x: '{:,.1f} cm'.format(x),\n", " 'Height' : lambda x: '{:,.1f} cm'.format(x),\n", " 'Height %': lambda x, data: '{:.0f}%'.format(100. * x / data['Height'].max()),\n", " 'Aspect ratio': lambda x, row: '{:,.1f}'.format(row['Length'] / row['Width']),\n", " 'Weight Series': lambda x: '{:.1f} kg'.format(x) if numpy.isscalar(x) else SVG('sparkline.svg', width=100, height=30, data=x[:20]) ,\n", "}\n", "\n", "headers = {\n", " 'Count': '#',\n", " 'Weight': 'Weight (kg)',\n", "}\n", "\n", "agg = {\n", " 'City': lambda v: v.iget(0) + ', ...',\n", " 'Count': numpy.sum,\n", " 'Cars': 'count', # lambda v: v.iget(0) + ', ...',\n", " 'Year': numpy.mean,\n", " 'Weight': numpy.mean,\n", " 'Length': numpy.mean,\n", " 'Width': numpy.mean,\n", " 'Height': numpy.mean,\n", " 'Weight Series': lambda v: tuple(v),\n", "}\n", "\n", "kwargs = {'data':data, 'format':format, 'agg': agg, 'headers': headers, 'classes':True}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 54 }, { "cell_type": "code", "collapsed": false, "input": [ "# Specific configuration\n", "groupby = ['City', 'Cars']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 55 }, { "cell_type": "code", "collapsed": false, "input": [ "groups = data.groupby(groupby)\n", "by = {k: v for k, v in agg.iteritems() if k not in groupby}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 56 }, { "cell_type": "code", "collapsed": false, "input": [ "total = len(groupby) - 1\n", "subgroupby = groupby[:total]\n", "subgroups = data.groupby(subgroupby)\n", "subby = {k: v for k, v in agg.iteritems() if k not in subgroupby}\n", "subdata = subgroups.sum() if not subby else subgroups.agg(subby)\n", "\n", "data = groups.sum() if not by else groups.agg(by)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 57 }, { "cell_type": "code", "collapsed": false, "input": [ "if calc is not None:\n", " for new_column, fn in calc.iteritems():\n", " data[new_column] = fn(data)\n", " if subdata is not None:\n", " subdata[new_column] = fn(subdata)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 61 }, { "cell_type": "code", "collapsed": false, "input": [ "indices = {False: [], None: groupby, True: data.index.names}.get(None, None)\n", "index_col = {col: i for i, col in enumerate(data.index.names)}\n", "index_pos = {col: index_col[col] for col in indices}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 64 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }