{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "An example of machine learning techniques -- principal component analysis and k-means clustering -- for visual analysis of high-dimensional disaster recovery data\n", "=================================================================================================\n", "**Data is simulated using ResilUS ([Miles and Chang, 2011](http://www.ingentaconnect.com/content/cagis/cagis/2011/00000038/00000001/art00003)).** \n", "*Coded by* [*Scott Miles*](http://resilscience.com/?page_id=2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Import some cool stuff for doing the analysis: [Scikit Learn](http://scikit-learn.org/stable/), [Matplotlib](http://matplotlib.org), and [Pandas](http://pandas.pydata.org).\n" ] }, { "cell_type": "code", "collapsed": true, "input": [ "from sklearn import decomposition # allows you to do PCA\n", "from sklearn import cluster # allows you to do k means clustering\n", "from matplotlib import cm, colors # helps with creating a legend\n", "import pandas as pd # awesome data handling\n", "import plotly as py\n", "import warnings\n", "warnings.filterwarnings('ignore')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 45 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set display options for pandas for easier viewing of tables" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pandas.set_option('display.height', 500)\n", "pandas.set_option('display.max_rows', 500)\n", "pandas.set_option('display.max_columns', 500)\n", "pandas.set_option('display.width', 500)\n", "pandas.set_option('display.mpl_style', False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "username='geomando'\n", "api_key = '83orx28wm4'\n", "py = plotly.plotly(username=username, key=api_key)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 47 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read data for PCA and pull out columns and rows for labeling later" ] }, { "cell_type": "code", "collapsed": false, "input": [ "rec_data = pd.read_csv('sample_data.csv') # our recovery data table\n", "row_labels = rec_data.index.astype('int')\n", "column_labels = rec_data.columns" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 28 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Print out recovery data. Notice how pandas permits pretty printing of tables" ] }, { "cell_type": "code", "collapsed": false, "input": [ "rec_data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | % No Mitigation | \n", "% High Income | \n", "% Low Income | \n", "% MFR | \n", "% SFR | \n", "% Owner | \n", "% Renter | \n", "% Damaged | \n", "% Low Damage | \n", "% Medium Damage | \n", "% High Damage | \n", "% MFR Damaged | \n", "% Injured | \n", "Health Recovery Wks | \n", "# Residents Left | \n", "% Residents Left | \n", "Household Debt Wk 13 | \n", "Household Debt Wk 208 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "88.902861 | \n", "14.397988 | \n", "85.602012 | \n", "29.424709 | \n", "70.575291 | \n", "48.066646 | \n", "51.933354 | \n", "0.125747 | \n", "25.000000 | \n", "50.000000 | \n", "25.000000 | \n", "0.427350 | \n", "100.000000 | \n", "4 | \n", "64 | \n", "0.1981 | \n", "0.0011 | \n", "0.0007 | \n", "
1 | \n", "80.258398 | \n", "6.201550 | \n", "93.798450 | \n", "56.227390 | \n", "43.772610 | \n", "21.705426 | \n", "78.294574 | \n", "0.671835 | \n", "7.692308 | \n", "61.538462 | \n", "30.769231 | \n", "1.011029 | \n", "92.307692 | \n", "1 | \n", "62 | \n", "0.3116 | \n", "0.0099 | \n", "0.0040 | \n", "
2 | \n", "94.282311 | \n", "5.479452 | \n", "94.520548 | \n", "34.901727 | \n", "65.098273 | \n", "34.633711 | \n", "65.366289 | \n", "0.268017 | \n", "0.000000 | \n", "88.888889 | \n", "11.111111 | \n", "0.597270 | \n", "100.000000 | \n", "1 | \n", "78 | \n", "0.2415 | \n", "0.0009 | \n", "0.0006 | \n", "
3 | \n", "86.457342 | \n", "12.781478 | \n", "87.218522 | \n", "51.538218 | \n", "48.461782 | \n", "41.801459 | \n", "58.198541 | \n", "1.998097 | \n", "53.968254 | \n", "38.095238 | \n", "7.936508 | \n", "3.753846 | \n", "96.825397 | \n", "120 | \n", "69 | \n", "0.2270 | \n", "0.0347 | \n", "0.0327 | \n", "
4 | \n", "86.523929 | \n", "9.319899 | \n", "90.680101 | \n", "45.541562 | \n", "54.458438 | \n", "37.002519 | \n", "62.997481 | \n", "0.377834 | \n", "13.333333 | \n", "40.000000 | \n", "46.666667 | \n", "0.829646 | \n", "100.000000 | \n", "105 | \n", "91 | \n", "0.2258 | \n", "0.0111 | \n", "0.0063 | \n", "
5 | \n", "67.575758 | \n", "17.595960 | \n", "82.404040 | \n", "79.878788 | \n", "20.121212 | \n", "22.222222 | \n", "77.777778 | \n", "1.757576 | \n", "36.781609 | \n", "44.827586 | \n", "18.390805 | \n", "2.124431 | \n", "88.505747 | \n", "120 | \n", "130 | \n", "0.2808 | \n", "0.0152 | \n", "0.0098 | \n", "
6 | \n", "69.792439 | \n", "17.550037 | \n", "82.449963 | \n", "73.202372 | \n", "26.797628 | \n", "24.369904 | \n", "75.630096 | \n", "1.593773 | \n", "41.860465 | \n", "38.372093 | \n", "19.767442 | \n", "2.101266 | \n", "94.186047 | \n", "120 | \n", "145 | \n", "0.2599 | \n", "0.0162 | \n", "0.0090 | \n", "
7 | \n", "72.354740 | \n", "9.816514 | \n", "90.183486 | \n", "71.376147 | \n", "28.623853 | \n", "19.143731 | \n", "80.856269 | \n", "0.550459 | \n", "22.222222 | \n", "61.111111 | \n", "16.666667 | \n", "0.771208 | \n", "88.888889 | \n", "8 | \n", "100 | \n", "0.3226 | \n", "0.0112 | \n", "0.0052 | \n", "
8 | \n", "75.730230 | \n", "49.465685 | \n", "50.534315 | \n", "37.117074 | \n", "62.882926 | \n", "63.452862 | \n", "36.547138 | \n", "1.733555 | \n", "38.356164 | \n", "43.835616 | \n", "17.808219 | \n", "4.094690 | \n", "78.082192 | \n", "12 | \n", "84 | \n", "0.1795 | \n", "0.0220 | \n", "0.0117 | \n", "
9 | \n", "76.138681 | \n", "17.165194 | \n", "82.834806 | \n", "61.080897 | \n", "38.919103 | \n", "33.752549 | \n", "66.247451 | \n", "6.084296 | \n", "50.279330 | \n", "35.754190 | \n", "13.966480 | \n", "9.015025 | \n", "93.854749 | \n", "27 | \n", "81 | \n", "0.2765 | \n", "0.0340 | \n", "0.0112 | \n", "
10 | \n", "87.480438 | \n", "19.092332 | \n", "80.907668 | \n", "20.031299 | \n", "79.968701 | \n", "64.241002 | \n", "35.758998 | \n", "11.189358 | \n", "60.839161 | \n", "27.272727 | \n", "11.888112 | \n", "19.531250 | \n", "90.209790 | \n", "31 | \n", "28 | \n", "0.1972 | \n", "0.0913 | \n", "0.0327 | \n", "
11 | \n", "93.720336 | \n", "12.340270 | \n", "87.659730 | \n", "9.602045 | \n", "90.397955 | \n", "36.692223 | \n", "63.307777 | \n", "7.192406 | \n", "51.269036 | \n", "37.055838 | \n", "11.675127 | \n", "16.730038 | \n", "92.893401 | \n", "120 | \n", "66 | \n", "0.2661 | \n", "0.0397 | \n", "0.0255 | \n", "
12 | \n", "95.712695 | \n", "12.193764 | \n", "87.806236 | \n", "6.737194 | \n", "93.262806 | \n", "53.229399 | \n", "46.770601 | \n", "15.367483 | \n", "60.144928 | \n", "24.637681 | \n", "15.217391 | \n", "23.966942 | \n", "90.217391 | \n", "120 | \n", "40 | \n", "0.2299 | \n", "0.0486 | \n", "0.0344 | \n", "
13 | \n", "95.726496 | \n", "14.574899 | \n", "85.425101 | \n", "3.463788 | \n", "96.536212 | \n", "63.607737 | \n", "36.392263 | \n", "0.314890 | \n", "0.000000 | \n", "71.428571 | \n", "28.571429 | \n", "2.597403 | \n", "100.000000 | \n", "120 | \n", "60 | \n", "0.2469 | \n", "0.0160 | \n", "0.0084 | \n", "
14 | \n", "95.519542 | \n", "21.401335 | \n", "78.598665 | \n", "2.621544 | \n", "97.378456 | \n", "76.739752 | \n", "23.260248 | \n", "26.358437 | \n", "66.184448 | \n", "21.518987 | \n", "12.296564 | \n", "43.636364 | \n", "82.640145 | \n", "120 | \n", "42 | \n", "0.1963 | \n", "0.1018 | \n", "0.0353 | \n", "
15 | \n", "94.163424 | \n", "25.052380 | \n", "74.947620 | \n", "4.160431 | \n", "95.839569 | \n", "67.015864 | \n", "32.984136 | \n", "3.621670 | \n", "59.504132 | \n", "29.752066 | \n", "10.743802 | \n", "9.352518 | \n", "86.776860 | \n", "53 | \n", "76 | \n", "0.2190 | \n", "0.0526 | \n", "0.0354 | \n", "
16 | \n", "94.011229 | \n", "28.820961 | \n", "71.179039 | \n", "4.179663 | \n", "95.820337 | \n", "69.931379 | \n", "30.068621 | \n", "15.127885 | \n", "61.443299 | \n", "25.360825 | \n", "13.195876 | \n", "24.626866 | \n", "79.587629 | \n", "120 | \n", "66 | \n", "0.1935 | \n", "0.0560 | \n", "0.0291 | \n", "
17 | \n", "93.702178 | \n", "21.542084 | \n", "78.457916 | \n", "5.002943 | \n", "94.997057 | \n", "62.831077 | \n", "37.168923 | \n", "3.766922 | \n", "50.000000 | \n", "35.156250 | \n", "14.843750 | \n", "7.058824 | \n", "87.500000 | \n", "120 | \n", "76 | \n", "0.2190 | \n", "0.0508 | \n", "0.0284 | \n", "
18 | \n", "90.293542 | \n", "24.422701 | \n", "75.577299 | \n", "12.054795 | \n", "87.945205 | \n", "40.273973 | \n", "59.726027 | \n", "1.487280 | \n", "57.894737 | \n", "23.684211 | \n", "18.421053 | \n", "6.493506 | \n", "84.210526 | \n", "5 | \n", "47 | \n", "0.1888 | \n", "0.0252 | \n", "0.0164 | \n", "
19 | \n", "93.202694 | \n", "17.677077 | \n", "82.322923 | \n", "9.471321 | \n", "90.528679 | \n", "40.640947 | \n", "59.359053 | \n", "0.796081 | \n", "30.769231 | \n", "56.410256 | \n", "12.820513 | \n", "4.094828 | \n", "94.871795 | \n", "120 | \n", "141 | \n", "0.2640 | \n", "0.0233 | \n", "0.0102 | \n", "
20 | \n", "96.365173 | \n", "11.355311 | \n", "88.644689 | \n", "7.213300 | \n", "92.786700 | \n", "43.927867 | \n", "56.072133 | \n", "0.028177 | \n", "0.000000 | \n", "0.000000 | \n", "100.000000 | \n", "0.390625 | \n", "100.000000 | \n", "1 | \n", "94 | \n", "0.2500 | \n", "0.0005 | \n", "0.0005 | \n", "