{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NumPy Data Access Using ArcPy "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(1.30711491471, 1449840, 37.0) (0.949885742909, 1209, 38.3)\n",
      " (0.814953582949, 35153, 41.4) (0.763664715408, 203807, 42.9)\n",
      " (0.867384795299, 40645, 48.1) (0.813593366955, 18817, 44.3)\n",
      " (1.5038884687, 952810, 32.1) (0.612516024774, 27440, 50.0)\n",
      " (1.23401339141, 157162, 36.3) (0.758988619134, 801288, 47.3)\n",
      " (0.666868957024, 26416, 46.4) (0.791296201062, 126462, 44.3)\n",
      " (0.625993159441, 142410, 56.9) (0.838241159573, 17955, 38.3)\n",
      " (0.709664321446, 663803, 48.3) (0.558059878736, 129835, 50.9)\n",
      " (0.795906059028, 58538, 49.6) (0.59516742156, 33719, 43.3)\n",
      " (0.98591007865, 9538191, 38.0) (0.626400277293, 123587, 58.4)\n",
      " (2.30772253595, 247520, 20.9) (0.763503906916, 17101, 44.5)\n",
      " (0.856496344376, 86407, 43.1) (0.659142469682, 212258, 49.3)\n",
      " (0.696038086886, 9406, 40.1) (0.906145315496, 12921, 31.1)\n",
      " (1.04151492927, 402990, 37.5) (1.28209385709, 124565, 36.6)\n",
      " (1.06427625094, 92520, 39.0) (1.2656840848, 2854513, 29.5)\n",
      " (1.25503460751, 251012, 36.9) (0.871632187308, 20754, 39.4)\n",
      " (0.809361084377, 1558985, 41.6) (0.970339838629, 1229940, 34.2)\n",
      " (0.97698382771, 53781, 55.9) (0.746540559206, 1718037, 43.3)\n",
      " (1.11463689739, 2827366, 34.7) (1.87815228123, 777885, 38.2)\n",
      " (0.829782920213, 567885, 50.6) (0.94607588002, 247839, 40.2)\n",
      " (2.00222842151, 707820, 28.5) (1.0993818875, 399990, 28.7)\n",
      " (1.83427209221, 1684947, 31.0) (1.35152165122, 255835, 36.9)\n",
      " (0.83764148855, 163615, 41.1) (0.788376107359, 3574, 41.7)\n",
      " (0.771636055401, 44218, 43.7) (0.93624192685, 396974, 37.3)\n",
      " (1.24377672351, 460421, 36.4) (0.8013076198, 449471, 49.6)\n",
      " (0.844111222501, 79192, 42.7) (0.64214819647, 56170, 44.2)\n",
      " (0.676074973366, 12980, 43.3) (0.662256627258, 368627, 55.8)\n",
      " (0.800930337009, 54658, 41.2) (1.13167786774, 756506, 36.2)\n",
      " (0.894016777176, 169835, 38.4) (0.644656909946, 60372, 43.4)]\n"
     ]
    }
   ],
   "source": [
    "import arcpy as ARCPY\n",
    "import arcpy.da as DA\n",
    "inputFC = r'../data/CA_Polygons.shp'\n",
    "fieldNames = ['PCR2000', 'POP2000', 'PERCNOHS']\n",
    "tab = DA.TableToNumPyArray(inputFC, fieldNames)\n",
    "print(tab)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SSDataObject\n",
    "1. Environment Settings (Except Extent)\n",
    "2. Bad Records\n",
    "3. Error/Warning Messages\n",
    "4. Localization\n",
    "5. **Feature Accounting**\n",
    "  * Cursors and DataAccess are not assured to read attributes in order.\n",
    "\n",
    "  * Keeps track of the shapes and their attributes so that one can create output features w/o post-joins.\n",
    "\n",
    "  * Unique ID works with Spatial Weights Formats in ArcGIS, PySAL, R, Matlab, GeoDa etc.."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 1.30711491  0.94988574  0.81495358  0.76366472  0.8673848   0.81359337\n",
      "  1.50388847  0.61251602  1.23401339  0.75898862  0.66686896  0.7912962\n",
      "  0.62599316  0.83824116  0.70966432  0.55805988  0.79590606  0.59516742\n",
      "  0.98591008  0.62640028  2.30772254  0.76350391  0.85649634  0.65914247\n",
      "  0.69603809  0.90614532  1.04151493  1.28209386  1.06427625  1.26568408\n",
      "  1.25503461  0.87163219  0.80936108  0.97033984  0.97698383  0.74654056\n",
      "  1.1146369   1.87815228  0.82978292  0.94607588  2.00222842  1.09938189\n",
      "  1.83427209  1.35152165  0.83764149  0.78837611  0.77163606  0.93624193\n",
      "  1.24377672  0.80130762  0.84411122  0.6421482   0.67607497  0.66225663\n",
      "  0.80093034  1.13167787  0.89401678  0.64465691]\n"
     ]
    }
   ],
   "source": [
    "import SSDataObject as SSDO\n",
    "ssdo = SSDO.SSDataObject(inputFC)\n",
    "ssdo.obtainData(\"MYID\", fieldNames)\n",
    "print(ssdo.fields['PCR2000'].data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Using PANDAS to get that R Feel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      PCR2000  PERCNOHS  POP2000\n",
      "158  1.307115      37.0  1449840\n",
      "159  0.949886      38.3     1209\n",
      "160  0.814954      41.4    35153\n",
      "161  0.763665      42.9   203807\n",
      "162  0.867385      48.1    40645\n",
      "163  0.813593      44.3    18817\n",
      "164  1.503888      32.1   952810\n",
      "165  0.612516      50.0    27440\n",
      "166  1.234013      36.3   157162\n",
      "167  0.758989      47.3   801288\n",
      "168  0.666869      46.4    26416\n",
      "169  0.791296      44.3   126462\n",
      "170  0.625993      56.9   142410\n",
      "171  0.838241      38.3    17955\n",
      "172  0.709664      48.3   663803\n",
      "173  0.558060      50.9   129835\n",
      "174  0.795906      49.6    58538\n",
      "175  0.595167      43.3    33719\n",
      "176  0.985910      38.0  9538191\n",
      "177  0.626400      58.4   123587\n",
      "178  2.307723      20.9   247520\n",
      "179  0.763504      44.5    17101\n",
      "180  0.856496      43.1    86407\n",
      "181  0.659142      49.3   212258\n",
      "182  0.696038      40.1     9406\n",
      "183  0.906145      31.1    12921\n",
      "184  1.041515      37.5   402990\n",
      "185  1.282094      36.6   124565\n",
      "186  1.064276      39.0    92520\n",
      "187  1.265684      29.5  2854513\n",
      "188  1.255035      36.9   251012\n",
      "189  0.871632      39.4    20754\n",
      "190  0.809361      41.6  1558985\n",
      "191  0.970340      34.2  1229940\n",
      "192  0.976984      55.9    53781\n",
      "193  0.746541      43.3  1718037\n",
      "194  1.114637      34.7  2827366\n",
      "195  1.878152      38.2   777885\n",
      "196  0.829783      50.6   567885\n",
      "197  0.946076      40.2   247839\n",
      "198  2.002228      28.5   707820\n",
      "199  1.099382      28.7   399990\n",
      "200  1.834272      31.0  1684947\n",
      "201  1.351522      36.9   255835\n",
      "202  0.837641      41.1   163615\n",
      "203  0.788376      41.7     3574\n",
      "204  0.771636      43.7    44218\n",
      "205  0.936242      37.3   396974\n",
      "206  1.243777      36.4   460421\n",
      "207  0.801308      49.6   449471\n",
      "208  0.844111      42.7    79192\n",
      "209  0.642148      44.2    56170\n",
      "210  0.676075      43.3    12980\n",
      "211  0.662257      55.8   368627\n",
      "212  0.800930      41.2    54658\n",
      "213  1.131678      36.2   756506\n",
      "214  0.894017      38.4   169835\n",
      "215  0.644657      43.4    60372\n"
     ]
    }
   ],
   "source": [
    "import pandas as PANDAS\n",
    "df = ssdo.getDataFrame()\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Advanced Analysis [SciPy Example - KMeans]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[4 3 3 3 2 3 4 2 4 2 2 3 1 3 2 2 2 3 5 1 0 3 3 2 3 4 4 4 3 4 4 3 3 4 1 2 4\n",
      " 0 2 3 0 4 0 4 3 3 3 3 4 2 3 3 3 1 3 4 3 3]\n"
     ]
    }
   ],
   "source": [
    "import numpy as NUM\n",
    "import scipy.cluster.vq as CLUST\n",
    "import arcgisscripting as ARC\n",
    "X = df.as_matrix()\n",
    "whiteData = CLUST.whiten(X)\n",
    "centers, distortion = CLUST.kmeans(whiteData, 6)\n",
    "groups = ARC._ss.closest_centroid(whiteData, centers)\n",
    "print(groups)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Max-P Regions Using PySAL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pysal as PYSAL\n",
    "import WeightsUtilities as WU\n",
    "import SSUtilities as UTILS\n",
    "\n",
    "def swm2pysal(swmfile):\n",
    "    neighbors = {}\n",
    "    weights = {}\n",
    "    swm = WU.SWMReader(swmfile)\n",
    "    N = swm.numObs\n",
    "    for r in UTILS.ssRange(N):\n",
    "            info = swm.swm.readEntry()\n",
    "            masterID, nn, nhs, whs, sumUnstandard = info\n",
    "            if nn != 0:\n",
    "                neighbors[masterID] = nhs\n",
    "                weights[masterID] = whs\n",
    "    swm.close()\n",
    "    ids = list(neighbors.keys())\n",
    "    ids.sort()\n",
    "    w = PYSAL.W(neighbors, weights, ids)\n",
    "    return w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0, [0, 38, 33, 6])\n",
      "(1, [44, 24, 46, 31, 51, 45, 10, 52, 22, 16, 7, 11, 27, 5, 48, 56, 57, 47, 30, 20, 17, 28, 50, 8, 3, 1, 54, 2, 21])\n",
      "(2, [40, 37, 43, 42, 26])\n",
      "(3, [13, 35, 29, 25])\n",
      "(4, [41, 55, 18])\n",
      "(5, [39, 14, 53, 15, 34, 9, 23, 19, 49, 4])\n",
      "(6, [12, 32, 36])\n"
     ]
    }
   ],
   "source": [
    "swmFile = r\"C:\\Data\\Conferences\\esri_stat_summit_16\\zzQueen.swm\"\n",
    "w = swm2pysal(swmFile)\n",
    "maxp = PYSAL.region.Maxp(w, X[:,0:2], 3000000., floor_variable = X[:,2])\n",
    "maxpGroups = NUM.empty((ssdo.numObs,), int)\n",
    "for regionID, masterIDs in enumerate(maxp.regions):\n",
    "    orderIDs = [ssdo.master2Order[i] for i in masterIDs]\n",
    "    maxpGroups[orderIDs] = regionID\n",
    "    print((regionID, orderIDs))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SKATER for Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 4 4 4 5 4 1 4 4 5 4 4 5 4 5 5 4 4 0 5 2 5 4 5 4 4 5 4 4 3 4 4 5 4 5 5 3\n",
      " 1 5 5 1 5 1 1 4 4 4 4 4 5 4 4 4 5 4 5 4 4]\n"
     ]
    }
   ],
   "source": [
    "import Partition as PART\n",
    "skater = PART.Partition(ssdo, fieldNames, spaceConcept = \"GET_SPATIAL_WEIGHTS_FROM_FILE\",\n",
    "                        weightsFile = swmFile, kPartitions = 6)\n",
    "print(skater.partition)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ARCPY.env.overwriteOutput = True\n",
    "outputFC = r'C:\\Data\\Conferences\\esri_stat_summit_16\\PYDemo\\PYDemo.gdb\\cluster_output'\n",
    "outK = SSDO.CandidateField('KMEANS', 'LONG', groups + 1)\n",
    "outMax = SSDO.CandidateField('MAXP', 'LONG', maxpGroups + 1)\n",
    "outSKATER = SSDO.CandidateField('SKATER', 'LONG', skater.partitionOutput)\n",
    "outFields = {'KMEANS': outK, 'MAXP': outMax, 'SKATER': outSKATER}\n",
    "appendFields = fieldNames + [\"NEW_NAME\"]\n",
    "ssdo.output2NewFC(outputFC, outFields, appendFields = appendFields)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.4"
  },
  "widgets": {
   "state": {},
   "version": "1.1.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}