{ "metadata": { "name": "", "signature": "sha256:838b0fcd380ebb2a228a1b4b6dc09328c9ad34259298d27f0bef7396e0ce28e9" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import numpy as np\n", "\n", "pd.options.display.max_columns = 5200\n", "pd.options.display.max_rows = 5200\n", "\n", "pj = '/Users/danielmsheehan/Desktop/data/'\n", "pj = '/Users/danielmsheehan/Dropbox/data/'\n", "pj = '/Volumes/Hotel/Dropbox/data/'\n", "\n", "df = pd.read_csv(pj+'output/all/taxi_2013.csv', dtype={'geoid':object})\n", "\n", "dfg = df.groupby('geoid')\n", "\n", "dfg_desc = dfg['dist_roadbed'].describe()\n", "\n", "dfg_desc.to_csv(pj+'output/all/taxi_2013_describe.csv')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import numpy as np\n", "\n", "pj = '/Users/danielmsheehan/Desktop/data/'\n", "pj = '/Users/danielmsheehan/Dropbox/data/'\n", "pj = '/Volumes/Hotel/Dropbox/data/'\n", "\n", "df = pd.read_csv(pj+'output/all/taxi_2013_describe.csv', dtype={'geoid':object}, header=None)\n", "\n", "df.columns = ['geoid', 'stat','value']\n", "df.head(50)\n", "\n", "df_avg = df[(df.stat == 'mean')]\n", "df_med = df[(df.stat == '50%')]\n", "df_std = df[(df.stat == 'std')]\n", "df_cnt = df[(df.stat == 'count')]\n", "\n", "df_avg.columns = ['geoid', 'stat','avgbrdist']\n", "df_med.columns = ['geoid', 'stat','medbrdist']\n", "df_std.columns = ['geoid', 'stat','stdbrdist']\n", "df_cnt.columns = ['geoid', 'stat','count']\n", "\n", "df_avg = df_avg[['geoid','avgbrdist']]\n", "df_med = df_med[['geoid','medbrdist']]\n", "df_std = df_std[['geoid','stdbrdist']]\n", "df_cnt['count'] = df_cnt['count'].astype(int)\n", "df_cnt = df_cnt[['geoid','count']]\n", "\n", "df_std = df_std.fillna(0)\n", "df_std = df_std.replace(np.inf, 0)\n", "\n", "df_all = df_avg.merge(df_med, on='geoid', how='left').merge(df_std, on='geoid', how='left').merge(df_cnt, on='geoid', how='left')\n", "df_all.to_csv(pj+'output/all/taxi_2013_describe_stats.csv', index=False)\n", "\n", "df_avg.to_csv(pj+'output/all/taxi_2013_describe_avg.csv', index=False)\n", "df_med.to_csv(pj+'output/all/taxi_2013_describe_med.csv', index=False)\n", "df_std.to_csv(pj+'output/all/taxi_2013_describe_std.csv', index=False)\n", "df_cnt.to_csv(pj+'output/all/taxi_2013_describe_cnt.csv', index=False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "\n", "\n", "df_cnt.sort(['count'], ascending=False)\n", "\n", "df_cnt.sum()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 23, "text": [ "geoid -4976949899361650011\n", "count 335577813\n", "dtype: int64" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "%reset" ], "language": "python", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "stream": "stdout", "text": [ "Once deleted, variables cannot be recovered. Proceed (y/[n])? y\n" ] } ], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }