{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# NumPy Data Access Using ArcPy " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[(1.30711491471, 1449840, 37.0) (0.949885742909, 1209, 38.3)\n", " (0.814953582949, 35153, 41.4) (0.763664715408, 203807, 42.9)\n", " (0.867384795299, 40645, 48.1) (0.813593366955, 18817, 44.3)\n", " (1.5038884687, 952810, 32.1) (0.612516024774, 27440, 50.0)\n", " (1.23401339141, 157162, 36.3) (0.758988619134, 801288, 47.3)\n", " (0.666868957024, 26416, 46.4) (0.791296201062, 126462, 44.3)\n", " (0.625993159441, 142410, 56.9) (0.838241159573, 17955, 38.3)\n", " (0.709664321446, 663803, 48.3) (0.558059878736, 129835, 50.9)\n", " (0.795906059028, 58538, 49.6) (0.59516742156, 33719, 43.3)\n", " (0.98591007865, 9538191, 38.0) (0.626400277293, 123587, 58.4)\n", " (2.30772253595, 247520, 20.9) (0.763503906916, 17101, 44.5)\n", " (0.856496344376, 86407, 43.1) (0.659142469682, 212258, 49.3)\n", " (0.696038086886, 9406, 40.1) (0.906145315496, 12921, 31.1)\n", " (1.04151492927, 402990, 37.5) (1.28209385709, 124565, 36.6)\n", " (1.06427625094, 92520, 39.0) (1.2656840848, 2854513, 29.5)\n", " (1.25503460751, 251012, 36.9) (0.871632187308, 20754, 39.4)\n", " (0.809361084377, 1558985, 41.6) (0.970339838629, 1229940, 34.2)\n", " (0.97698382771, 53781, 55.9) (0.746540559206, 1718037, 43.3)\n", " (1.11463689739, 2827366, 34.7) (1.87815228123, 777885, 38.2)\n", " (0.829782920213, 567885, 50.6) (0.94607588002, 247839, 40.2)\n", " (2.00222842151, 707820, 28.5) (1.0993818875, 399990, 28.7)\n", " (1.83427209221, 1684947, 31.0) (1.35152165122, 255835, 36.9)\n", " (0.83764148855, 163615, 41.1) (0.788376107359, 3574, 41.7)\n", " (0.771636055401, 44218, 43.7) (0.93624192685, 396974, 37.3)\n", " (1.24377672351, 460421, 36.4) (0.8013076198, 449471, 49.6)\n", " (0.844111222501, 79192, 42.7) (0.64214819647, 56170, 44.2)\n", " (0.676074973366, 12980, 43.3) (0.662256627258, 368627, 55.8)\n", " (0.800930337009, 54658, 41.2) (1.13167786774, 756506, 36.2)\n", " (0.894016777176, 169835, 38.4) (0.644656909946, 60372, 43.4)]\n" ] } ], "source": [ "import arcpy as ARCPY\n", "import arcpy.da as DA\n", "inputFC = r'../data/CA_Polygons.shp'\n", "fieldNames = ['PCR2000', 'POP2000', 'PERCNOHS']\n", "tab = DA.TableToNumPyArray(inputFC, fieldNames)\n", "print(tab)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# SSDataObject\n", "1. Environment Settings (Except Extent)\n", "2. Bad Records\n", "3. Error/Warning Messages\n", "4. Localization\n", "5. **Feature Accounting**\n", " * Cursors and DataAccess are not assured to read attributes in order.\n", "\n", " * Keeps track of the shapes and their attributes so that one can create output features w/o post-joins.\n", "\n", " * Unique ID works with Spatial Weights Formats in ArcGIS, PySAL, R, Matlab, GeoDa etc.." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 1.30711491 0.94988574 0.81495358 0.76366472 0.8673848 0.81359337\n", " 1.50388847 0.61251602 1.23401339 0.75898862 0.66686896 0.7912962\n", " 0.62599316 0.83824116 0.70966432 0.55805988 0.79590606 0.59516742\n", " 0.98591008 0.62640028 2.30772254 0.76350391 0.85649634 0.65914247\n", " 0.69603809 0.90614532 1.04151493 1.28209386 1.06427625 1.26568408\n", " 1.25503461 0.87163219 0.80936108 0.97033984 0.97698383 0.74654056\n", " 1.1146369 1.87815228 0.82978292 0.94607588 2.00222842 1.09938189\n", " 1.83427209 1.35152165 0.83764149 0.78837611 0.77163606 0.93624193\n", " 1.24377672 0.80130762 0.84411122 0.6421482 0.67607497 0.66225663\n", " 0.80093034 1.13167787 0.89401678 0.64465691]\n" ] } ], "source": [ "import SSDataObject as SSDO\n", "ssdo = SSDO.SSDataObject(inputFC)\n", "ssdo.obtainData(\"MYID\", fieldNames)\n", "print(ssdo.fields['PCR2000'].data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Using PANDAS to get that R Feel" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " PCR2000 PERCNOHS POP2000\n", "158 1.307115 37.0 1449840\n", "159 0.949886 38.3 1209\n", "160 0.814954 41.4 35153\n", "161 0.763665 42.9 203807\n", "162 0.867385 48.1 40645\n", "163 0.813593 44.3 18817\n", "164 1.503888 32.1 952810\n", "165 0.612516 50.0 27440\n", "166 1.234013 36.3 157162\n", "167 0.758989 47.3 801288\n", "168 0.666869 46.4 26416\n", "169 0.791296 44.3 126462\n", "170 0.625993 56.9 142410\n", "171 0.838241 38.3 17955\n", "172 0.709664 48.3 663803\n", "173 0.558060 50.9 129835\n", "174 0.795906 49.6 58538\n", "175 0.595167 43.3 33719\n", "176 0.985910 38.0 9538191\n", "177 0.626400 58.4 123587\n", "178 2.307723 20.9 247520\n", "179 0.763504 44.5 17101\n", "180 0.856496 43.1 86407\n", "181 0.659142 49.3 212258\n", "182 0.696038 40.1 9406\n", "183 0.906145 31.1 12921\n", "184 1.041515 37.5 402990\n", "185 1.282094 36.6 124565\n", "186 1.064276 39.0 92520\n", "187 1.265684 29.5 2854513\n", "188 1.255035 36.9 251012\n", "189 0.871632 39.4 20754\n", "190 0.809361 41.6 1558985\n", "191 0.970340 34.2 1229940\n", "192 0.976984 55.9 53781\n", "193 0.746541 43.3 1718037\n", "194 1.114637 34.7 2827366\n", "195 1.878152 38.2 777885\n", "196 0.829783 50.6 567885\n", "197 0.946076 40.2 247839\n", "198 2.002228 28.5 707820\n", "199 1.099382 28.7 399990\n", "200 1.834272 31.0 1684947\n", "201 1.351522 36.9 255835\n", "202 0.837641 41.1 163615\n", "203 0.788376 41.7 3574\n", "204 0.771636 43.7 44218\n", "205 0.936242 37.3 396974\n", "206 1.243777 36.4 460421\n", "207 0.801308 49.6 449471\n", "208 0.844111 42.7 79192\n", "209 0.642148 44.2 56170\n", "210 0.676075 43.3 12980\n", "211 0.662257 55.8 368627\n", "212 0.800930 41.2 54658\n", "213 1.131678 36.2 756506\n", "214 0.894017 38.4 169835\n", "215 0.644657 43.4 60372\n" ] } ], "source": [ "import pandas as PANDAS\n", "df = ssdo.getDataFrame()\n", "print(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Advanced Analysis [SciPy Example - KMeans]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[4 3 3 3 2 3 4 2 4 2 2 3 1 3 2 2 2 3 5 1 0 3 3 2 3 4 4 4 3 4 4 3 3 4 1 2 4\n", " 0 2 3 0 4 0 4 3 3 3 3 4 2 3 3 3 1 3 4 3 3]\n" ] } ], "source": [ "import numpy as NUM\n", "import scipy.cluster.vq as CLUST\n", "import arcgisscripting as ARC\n", "X = df.as_matrix()\n", "whiteData = CLUST.whiten(X)\n", "centers, distortion = CLUST.kmeans(whiteData, 6)\n", "groups = ARC._ss.closest_centroid(whiteData, centers)\n", "print(groups)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Max-P Regions Using PySAL" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pysal as PYSAL\n", "import WeightsUtilities as WU\n", "import SSUtilities as UTILS\n", "\n", "def swm2pysal(swmfile):\n", " neighbors = {}\n", " weights = {}\n", " swm = WU.SWMReader(swmfile)\n", " N = swm.numObs\n", " for r in UTILS.ssRange(N):\n", " info = swm.swm.readEntry()\n", " masterID, nn, nhs, whs, sumUnstandard = info\n", " if nn != 0:\n", " neighbors[masterID] = nhs\n", " weights[masterID] = whs\n", " swm.close()\n", " ids = list(neighbors.keys())\n", " ids.sort()\n", " w = PYSAL.W(neighbors, weights, ids)\n", " return w" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(0, [0, 38, 33, 6])\n", "(1, [44, 24, 46, 31, 51, 45, 10, 52, 22, 16, 7, 11, 27, 5, 48, 56, 57, 47, 30, 20, 17, 28, 50, 8, 3, 1, 54, 2, 21])\n", "(2, [40, 37, 43, 42, 26])\n", "(3, [13, 35, 29, 25])\n", "(4, [41, 55, 18])\n", "(5, [39, 14, 53, 15, 34, 9, 23, 19, 49, 4])\n", "(6, [12, 32, 36])\n" ] } ], "source": [ "swmFile = r\"C:\\Data\\Conferences\\esri_stat_summit_16\\zzQueen.swm\"\n", "w = swm2pysal(swmFile)\n", "maxp = PYSAL.region.Maxp(w, X[:,0:2], 3000000., floor_variable = X[:,2])\n", "maxpGroups = NUM.empty((ssdo.numObs,), int)\n", "for regionID, masterIDs in enumerate(maxp.regions):\n", " orderIDs = [ssdo.master2Order[i] for i in masterIDs]\n", " maxpGroups[orderIDs] = regionID\n", " print((regionID, orderIDs))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# SKATER for Comparison" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 4 4 4 5 4 1 4 4 5 4 4 5 4 5 5 4 4 0 5 2 5 4 5 4 4 5 4 4 3 4 4 5 4 5 5 3\n", " 1 5 5 1 5 1 1 4 4 4 4 4 5 4 4 4 5 4 5 4 4]\n" ] } ], "source": [ "import Partition as PART\n", "skater = PART.Partition(ssdo, fieldNames, spaceConcept = \"GET_SPATIAL_WEIGHTS_FROM_FILE\",\n", " weightsFile = swmFile, kPartitions = 6)\n", "print(skater.partition)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ARCPY.env.overwriteOutput = True\n", "outputFC = r'C:\\Data\\Conferences\\esri_stat_summit_16\\PYDemo\\PYDemo.gdb\\cluster_output'\n", "outK = SSDO.CandidateField('KMEANS', 'LONG', groups + 1)\n", "outMax = SSDO.CandidateField('MAXP', 'LONG', maxpGroups + 1)\n", "outSKATER = SSDO.CandidateField('SKATER', 'LONG', skater.partitionOutput)\n", "outFields = {'KMEANS': outK, 'MAXP': outMax, 'SKATER': outSKATER}\n", "appendFields = fieldNames + [\"NEW_NAME\"]\n", "ssdo.output2NewFC(outputFC, outFields, appendFields = appendFields)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.4" }, "widgets": { "state": {}, "version": "1.1.2" } }, "nbformat": 4, "nbformat_minor": 0 }