{ "metadata": { "name": "", "signature": "sha256:30d4e31075d813d052bdbb660414bf5a36cada4ac44613ae9f18cae1b6ca798e" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "\n", "pd.options.display.max_columns = 5200\n", "pd.options.display.max_rows = 5200\n", "\n", "# Insert your BigQuery Project ID Here\n", "# Can be found in the Google web console\n", "#projectid = \"dazzling-will-91618\"\n", "\n", "#df = pd.read_gbq('SELECT * FROM dazzling-will-91618:taxi_all.nycb2010_stats LIMIT 200', project_id = projectid)\n", "\n", "#http://stackoverflow.com/questions/18267749/importerror-no-module-named-apiclient-discovery\n", "#sudo pip install --upgrade google-api-python-client\n", "\n", "#data_frame = pd.read_gbq('SELECT * FROM test_dataset.test_table LIMIT 200',index_col='index_c_name',, project_id = projectid)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "\n", "#dfmIN = '/Volumes/Hotel/Dropbox/data/output/all/taxi_2013.csv'\n", "dfsIN = '/Volumes/Hotel/Dropbox/data/output/all/nycb2010_taxi_2013_stats_bldg_cnt.csv'\n", "dfbIN = '/Volumes/Hotel/Dropbox/data/input/building/bldg_dist_height.csv'\n", "\n", "dfs = pd.read_csv(dfsIN, dtype={'geoid':object})\n", "dfb = pd.read_csv(dfbIN).rename(columns=lambda x: x.lower())\n", "dfb = dfb[['geoid','building_block_int_dis_tbl_bulkdens']]\n", "dfb = dfb.fillna(0) \n", "dfb['geoid'] = dfb['geoid'].astype(str)\n", "print dfb.dtypes\n", "\n", "dfs = dfs[(dfs.areasqft >= 50000)]\n", "dfs = dfs[(dfs.areasqft <= 300000)]\n", "\n", "dfs = dfs[(dfs.countbldg >= 1)]\n", "\n", "df = dfs.merge(dfb, on='geoid', how='left')\n", "\n", "df = df[(df.building_block_int_dis_tbl_bulkdens >= 3)]\n", "\n", "df" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "geoid object\n", "building_block_int_dis_tbl_bulkdens float64\n", "dtype: object\n" ] }, { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
geoidavgbrdistmedbrdiststdbrdistcountareasqftcountbldgbuilding_block_int_dis_tbl_bulkdens
03608501120100010.0000000.0000000.00000006807.30NaN0.000000
13608502260000010.0000000.0000000.000000045247.20NaN0.000000
23608500070010000.0000000.0000000.000000058960.1010.037041
33608500090010002.6151340.0000006.13332730244590.00281.660016
436085001100100011.7672360.00000023.26619831314611.00842.380148
536085001700100011.6651430.00000030.00973310293775.00491.366823
63608500200110009.1327770.00000024.63599615111006.00191.222633
736085002700100013.0780750.00000023.0667728150407.00443.716140
83608500290010004.8209500.49950312.47447810373418.00901.586655
93608500360010009.4559270.00000014.16511211244605.00281.142436
103608500390010001.2300960.0000004.22463329469846.00611.710747
1136085004000100015.6276510.00000034.9444905141297.00191.042037
123608500470010009.1137710.00000013.433418431058270.00930.566017
1336085005900100034.4896290.000000103.354739714824710.0070.012687
1436085006400100012.5783830.00000024.24751627200785.00201.063275
153608500740010008.8743040.00000015.4942237139084.00361.875482
163608500750010000.0000000.0000000.0000003123561.00321.739414
173608500770010002.5847880.0000004.9747505205771.00322.054480
183608500960110003.7836010.0000007.81714030640395.00961.226833
193608501050010000.4147950.0000001.43689212288531.00311.422864
203608501120110006.7556720.00000017.91898012196112.00211.340195
213608501120210000.0000000.0000000.000000392737.80121.092448
223608501140110000.0000000.0000000.000000266684.10121.083342
2336085011402100054.20265654.20265610.0591572183061.00431.538521
2436085012100100016.7502730.00000024.6199935170251.00431.644445
253608501220010006.4921261.49084411.0859284139819.00241.239573
263608501250010007.7926710.00000013.1680256215178.00511.971548
2736085012804100032.0960270.02680155.767375472184280.00881.767975
283608501280510000.5877460.0000001.09779812635446.001181.299699
293608501280610000.0000000.0000000.0000000281051.00481.138468
...........................
386863604706440010034.7486470.00000014.29309556126338.00281.424973
386873604706420010006.8435420.00000015.789983256401457.00923.244198
3868836047064400100414.7781270.00000034.9675021516403.10NaN0.000000
386893604706440020046.5998470.00000018.42772883197433.00652.601140
386903604710100010023.2072780.0000007.56295355202878.00652.243298
3869136047101000100311.0446330.00000020.86784757202716.00563.036255
386923604710100020024.4590660.00000013.04967548202659.00721.803503
386933604710100020032.9332920.0000007.81372426109458.00311.710231
386943608101710010036.9391770.00000019.4507702633119689.00110.133345
386953608101710010047.0809890.00000022.4796806735369285.0099.983437
386963608100550010065.2375180.00000016.4230845749436.48NaN0.000000
386973608100550010075.9295130.00000019.269368252970482.6012.416705
3869836081094500202220.5448910.00000043.2360245813553480.00842.260479
3869936081094500202312.4768210.00000020.42306235118509.00201.532482
387003608109450020247.7185150.00000020.48859963192455.00631.750790
387013608502260020200.0000000.0000000.0000000591.06NaN0.000000
387023608502260030009.0740530.00000020.077495302356680.002310.823473
387033608502260030011.0900070.0000002.4373305126572.00352.327165
387043608502390020052.5575760.0000005.1151524178216.00321.420231
387053608502390020061.3327990.0000002.590392681905.40161.400480
387063608502390020021.4667040.0000002.5186697259159.00621.695666
387073608502390020030.0000000.0000000.0000003141550.00241.222984
3870836085023900200718.4573920.00000028.7042967273797.00531.362336
387093600502740200010.0000000.0000000.000000066837.40NaN0.000000
387103608502480030010.0000000.0000000.0000000633172.0021.220499
387113608502480030020.0000000.0000000.0000000857302.0070.047088
3871236085022600401725.3720983.20114343.2402816484725.00541.346176
3871336085022600402725.6171307.88163739.44197991755900.00410.531735
387143606102750010067.0363600.00000019.46652813393888.00NaN0.000000
387153606102650030026.2989100.00000018.609811169657478.40NaN0.000000
\n", "

38716 rows \u00d7 8 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ " geoid avgbrdist medbrdist stdbrdist count areasqft \\\n", "0 360850112010001 0.000000 0.000000 0.000000 0 6807.30 \n", "1 360850226000001 0.000000 0.000000 0.000000 0 45247.20 \n", "2 360850007001000 0.000000 0.000000 0.000000 0 58960.10 \n", "3 360850009001000 2.615134 0.000000 6.133327 30 244590.00 \n", "4 360850011001000 11.767236 0.000000 23.266198 31 314611.00 \n", "5 360850017001000 11.665143 0.000000 30.009733 10 293775.00 \n", "6 360850020011000 9.132777 0.000000 24.635996 15 111006.00 \n", "7 360850027001000 13.078075 0.000000 23.066772 8 150407.00 \n", "8 360850029001000 4.820950 0.499503 12.474478 10 373418.00 \n", "9 360850036001000 9.455927 0.000000 14.165112 11 244605.00 \n", "10 360850039001000 1.230096 0.000000 4.224633 29 469846.00 \n", "11 360850040001000 15.627651 0.000000 34.944490 5 141297.00 \n", "12 360850047001000 9.113771 0.000000 13.433418 43 1058270.00 \n", "13 360850059001000 34.489629 0.000000 103.354739 71 4824710.00 \n", "14 360850064001000 12.578383 0.000000 24.247516 27 200785.00 \n", "15 360850074001000 8.874304 0.000000 15.494223 7 139084.00 \n", "16 360850075001000 0.000000 0.000000 0.000000 3 123561.00 \n", "17 360850077001000 2.584788 0.000000 4.974750 5 205771.00 \n", "18 360850096011000 3.783601 0.000000 7.817140 30 640395.00 \n", "19 360850105001000 0.414795 0.000000 1.436892 12 288531.00 \n", "20 360850112011000 6.755672 0.000000 17.918980 12 196112.00 \n", "21 360850112021000 0.000000 0.000000 0.000000 3 92737.80 \n", "22 360850114011000 0.000000 0.000000 0.000000 2 66684.10 \n", "23 360850114021000 54.202656 54.202656 10.059157 2 183061.00 \n", "24 360850121001000 16.750273 0.000000 24.619993 5 170251.00 \n", "25 360850122001000 6.492126 1.490844 11.085928 4 139819.00 \n", "26 360850125001000 7.792671 0.000000 13.168025 6 215178.00 \n", "27 360850128041000 32.096027 0.026801 55.767375 47 2184280.00 \n", "28 360850128051000 0.587746 0.000000 1.097798 12 635446.00 \n", "29 360850128061000 0.000000 0.000000 0.000000 0 281051.00 \n", "... ... ... ... ... ... ... \n", "38686 360470644001003 4.748647 0.000000 14.293095 56 126338.00 \n", "38687 360470642001000 6.843542 0.000000 15.789983 256 401457.00 \n", "38688 360470644001004 14.778127 0.000000 34.967502 15 16403.10 \n", "38689 360470644002004 6.599847 0.000000 18.427728 83 197433.00 \n", "38690 360471010001002 3.207278 0.000000 7.562953 55 202878.00 \n", "38691 360471010001003 11.044633 0.000000 20.867847 57 202716.00 \n", "38692 360471010002002 4.459066 0.000000 13.049675 48 202659.00 \n", "38693 360471010002003 2.933292 0.000000 7.813724 26 109458.00 \n", "38694 360810171001003 6.939177 0.000000 19.450770 2633 119689.00 \n", "38695 360810171001004 7.080989 0.000000 22.479680 6735 369285.00 \n", "38696 360810055001006 5.237518 0.000000 16.423084 574 9436.48 \n", "38697 360810055001007 5.929513 0.000000 19.269368 2529 70482.60 \n", "38698 360810945002022 20.544891 0.000000 43.236024 581 3553480.00 \n", "38699 360810945002023 12.476821 0.000000 20.423062 35 118509.00 \n", "38700 360810945002024 7.718515 0.000000 20.488599 63 192455.00 \n", "38701 360850226002020 0.000000 0.000000 0.000000 0 591.06 \n", "38702 360850226003000 9.074053 0.000000 20.077495 30 2356680.00 \n", "38703 360850226003001 1.090007 0.000000 2.437330 5 126572.00 \n", "38704 360850239002005 2.557576 0.000000 5.115152 4 178216.00 \n", "38705 360850239002006 1.332799 0.000000 2.590392 6 81905.40 \n", "38706 360850239002002 1.466704 0.000000 2.518669 7 259159.00 \n", "38707 360850239002003 0.000000 0.000000 0.000000 3 141550.00 \n", "38708 360850239002007 18.457392 0.000000 28.704296 7 273797.00 \n", "38709 360050274020001 0.000000 0.000000 0.000000 0 66837.40 \n", "38710 360850248003001 0.000000 0.000000 0.000000 0 633172.00 \n", "38711 360850248003002 0.000000 0.000000 0.000000 0 857302.00 \n", "38712 360850226004017 25.372098 3.201143 43.240281 6 484725.00 \n", "38713 360850226004027 25.617130 7.881637 39.441979 9 1755900.00 \n", "38714 360610275001006 7.036360 0.000000 19.466528 133 93888.00 \n", "38715 360610265003002 6.298910 0.000000 18.609811 1696 57478.40 \n", "\n", " countbldg building_block_int_dis_tbl_bulkdens \n", "0 NaN 0.000000 \n", "1 NaN 0.000000 \n", "2 1 0.037041 \n", "3 28 1.660016 \n", "4 84 2.380148 \n", "5 49 1.366823 \n", "6 19 1.222633 \n", "7 44 3.716140 \n", "8 90 1.586655 \n", "9 28 1.142436 \n", "10 61 1.710747 \n", "11 19 1.042037 \n", "12 93 0.566017 \n", "13 7 0.012687 \n", "14 20 1.063275 \n", "15 36 1.875482 \n", "16 32 1.739414 \n", "17 32 2.054480 \n", "18 96 1.226833 \n", "19 31 1.422864 \n", "20 21 1.340195 \n", "21 12 1.092448 \n", "22 12 1.083342 \n", "23 43 1.538521 \n", "24 43 1.644445 \n", "25 24 1.239573 \n", "26 51 1.971548 \n", "27 88 1.767975 \n", "28 118 1.299699 \n", "29 48 1.138468 \n", "... ... ... \n", "38686 28 1.424973 \n", "38687 92 3.244198 \n", "38688 NaN 0.000000 \n", "38689 65 2.601140 \n", "38690 65 2.243298 \n", "38691 56 3.036255 \n", "38692 72 1.803503 \n", "38693 31 1.710231 \n", "38694 1 10.133345 \n", "38695 9 9.983437 \n", "38696 NaN 0.000000 \n", "38697 1 2.416705 \n", "38698 84 2.260479 \n", "38699 20 1.532482 \n", "38700 63 1.750790 \n", "38701 NaN 0.000000 \n", "38702 231 0.823473 \n", "38703 35 2.327165 \n", "38704 32 1.420231 \n", "38705 16 1.400480 \n", "38706 62 1.695666 \n", "38707 24 1.222984 \n", "38708 53 1.362336 \n", "38709 NaN 0.000000 \n", "38710 2 1.220499 \n", "38711 7 0.047088 \n", "38712 54 1.346176 \n", "38713 41 0.531735 \n", "38714 NaN 0.000000 \n", "38715 NaN 0.000000 \n", "\n", "[38716 rows x 8 columns]" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "df = pd.read_csv('/Users/danielmsheehan/Dropbox/data/output/all/nycb2010_taxi_2013_stats_bldg_cnt_pctcbbldg.csv', dtype={'geoid':object})\n", " \n", "#dfs[(dfs.areasqft >= 50000)] pctcbbldg > 0.20 AND count > 1000 \n", "#blaahahaahahah\n", "\n", "\n", "\n", "df = df[(df.pctcbbldg > 0.333)] #df.pctcbbldg > 0.2 & df.count > 1000\n", "\n", "df = df[(df['count'].astype(float) > 1000)]\n", "\n", "df.dtypes\n", "df.count()\n", "#df.head(50)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 31, "text": [ "geoid 4611\n", "avgbrdist 4611\n", "medbrdist 4611\n", "stdbrdist 4611\n", "count 4611\n", "areasqft 4611\n", "countbldg 4611\n", "bldgareaft 4611\n", "pctcbbldg 4611\n", "dist_bldg_hght 4611\n", "dtype: int64" ] } ], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": [ "# #%matplotlib inline\n", "\n", "# #from http://stackoverflow.com/questions/7714677/r-scatterplot-with-too-many-points\n", "# import numpy as np\n", "# import matplotlib.pyplot as plt\n", "\n", "# # N = 10000\n", "# # mean = [0, 0]\n", "# # cov = [[2, 2], [0, 2]]\n", "# # x,y = np.random.multivariate_normal(mean, cov, N).T\n", "\n", "# x = df.dist_bldg_hght\n", "# y = df.avgbrdist\n", "\n", "# plt.xlabel('building_block_int_dis_tbl_bulkdens')\n", "# plt.ylabel('avgbrdist')\n", "\n", "# plt.scatter(x, y, s=70, alpha=0.03)\n", "# plt.ylim((0, 30))\n", "# plt.xlim((0, 40))\n", "# plt.show()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 117 }, { "cell_type": "code", "collapsed": false, "input": [ "from scipy.stats import linregress\n", "\n", "linregress(x,y) #x and y are arrays or lists." ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 140, "text": [ "(0.15933978469092655,\n", " 7.0073655131505124,\n", " 0.67502521913795477,\n", " 5.4965139892681322e-302,\n", " 0.0036561371976772286)" ] } ], "prompt_number": 140 }, { "cell_type": "code", "collapsed": false, "input": [ "df['avgbrdist_meters'] = df['avgbrdist'] * 0.3048\n", "\n", "#http://code.hammerpig.com/log-transformations-in-python.html\n", "import math\n", "z = math.exp(10)\n", "\n", "print z\n", "\n", "df['avgbrdist_logtran'] = df['avgbrdist'] * z" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "22026.4657948\n" ] } ], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "x = df.dist_bldg_hght\n", "y = df.avgbrdist_logtran\n", "\n", "fig, ax = plt.subplots()\n", "\n", "plt.xlabel('dist_bldg_hght')\n", "plt.ylabel('avgbrdist')\n", "\n", "plt.scatter(x, y, s=70, alpha=0.03)\n", "\n", "# plt.ylim((0, 30))\n", "# plt.xlim((0, 40))\n", "plt.ylim((0, 1000000))\n", "plt.xlim((0, 160))\n", "\n", "\n", "fit = np.polyfit(x, y, deg=1)\n", "ax.plot(x, fit[0] * x + fit[1], color='red')\n", "\n", "plt.show()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 35 }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy\n", "numpy.corrcoef(x, y)[0, 1]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 36, "text": [ "0.6482169495740886" ] } ], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "from scipy import stats\n", "import numpy as np\n", "\n", "slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "print \"r-squared:\", r_value**2 #http://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.linregress.html\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "r-squared: 0.420185213715\n" ] } ], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "df.to_csv('/Users/danielmsheehan/Dropbox/data/output/all/nycb2010_taxi_2013_stats_bldg_cnt_pctcbbldg_LIMIT_third_bldg_1000_pts.csv', index=False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 39 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }