{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# NumPy Data Access Using ArcPy " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[(1.30711491471, 1449840, 37.0) (0.949885742909, 1209, 38.3)\n", " (0.814953582949, 35153, 41.4) (0.763664715408, 203807, 42.9)\n", " (0.867384795299, 40645, 48.1) (0.813593366955, 18817, 44.3)\n", " (1.5038884687, 952810, 32.1) (0.612516024774, 27440, 50.0)\n", " (1.23401339141, 157162, 36.3) (0.758988619134, 801288, 47.3)\n", " (0.666868957024, 26416, 46.4) (0.791296201062, 126462, 44.3)\n", " (0.625993159441, 142410, 56.9) (0.838241159573, 17955, 38.3)\n", " (0.709664321446, 663803, 48.3) (0.558059878736, 129835, 50.9)\n", " (0.795906059028, 58538, 49.6) (0.59516742156, 33719, 43.3)\n", " (0.98591007865, 9538191, 38.0) (0.626400277293, 123587, 58.4)\n", " (2.30772253595, 247520, 20.9) (0.763503906916, 17101, 44.5)\n", " (0.856496344376, 86407, 43.1) (0.659142469682, 212258, 49.3)\n", " (0.696038086886, 9406, 40.1) (0.906145315496, 12921, 31.1)\n", " (1.04151492927, 402990, 37.5) (1.28209385709, 124565, 36.6)\n", " (1.06427625094, 92520, 39.0) (1.2656840848, 2854513, 29.5)\n", " (1.25503460751, 251012, 36.9) (0.871632187308, 20754, 39.4)\n", " (0.809361084377, 1558985, 41.6) (0.970339838629, 1229940, 34.2)\n", " (0.97698382771, 53781, 55.9) (0.746540559206, 1718037, 43.3)\n", " (1.11463689739, 2827366, 34.7) (1.87815228123, 777885, 38.2)\n", " (0.829782920213, 567885, 50.6) (0.94607588002, 247839, 40.2)\n", " (2.00222842151, 707820, 28.5) (1.0993818875, 399990, 28.7)\n", " (1.83427209221, 1684947, 31.0) (1.35152165122, 255835, 36.9)\n", " (0.83764148855, 163615, 41.1) (0.788376107359, 3574, 41.7)\n", " (0.771636055401, 44218, 43.7) (0.93624192685, 396974, 37.3)\n", " (1.24377672351, 460421, 36.4) (0.8013076198, 449471, 49.6)\n", " (0.844111222501, 79192, 42.7) (0.64214819647, 56170, 44.2)\n", " (0.676074973366, 12980, 43.3) (0.662256627258, 368627, 55.8)\n", " (0.800930337009, 54658, 41.2) (1.13167786774, 756506, 36.2)\n", " (0.894016777176, 169835, 38.4) (0.644656909946, 60372, 43.4)]\n" ] } ], "source": [ "import arcpy as ARCPY\n", "import arcpy.da as DA\n", "inputFC = r'../data/CA_Polygons.shp'\n", "fieldNames = ['PCR2000', 'POP2000', 'PERCNOHS']\n", "tab = DA.TableToNumPyArray(inputFC, fieldNames)\n", "print(tab)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# SSDataObject\n", "1. Environment Settings (Except Extent)\n", "2. Bad Records\n", "3. Error/Warning Messages\n", "4. Localization\n", "5. **Feature Accounting**\n", " * Cursors and DataAccess are not assured to read attributes in order.\n", "\n", " * Keeps track of the shapes and their attributes so that one can create output features w/o post-joins.\n", "\n", " * Unique ID works with Spatial Weights Formats in ArcGIS, PySAL, R, Matlab, GeoDa etc.." ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1513043 1162 38026 220000 45485 21475 1052605 28634 181151\n", " 933168 28112 134762 175136 18519 842813 153183 64753 34811\n", " 9826773 151468 252789 18267 87812 256754 9695 14224 416366\n", " 136840 98765 3017598 350233 19949 2202978 1422094 55520 2042027\n", " 3105115 805340 687661 270112 719604 424630 1786267 262880 177308\n", " 3229 44897 414125 484712 515420 94859 63637 13768 443509\n", " 55221 825445 201105 72364]\n" ] } ], "source": [ "import SSDataObject as SSDO\n", "import os as OS\n", "inputFC = r'../data/CA_Polygons.shp'\n", "fullFC = OS.path.abspath(inputFC)\n", "fullPath, fcName = OS.path.split(fullFC)\n", "ssdo = SSDO.SSDataObject(inputFC)\n", "uniqueIDField = \"MYID\"\n", "fieldNames = ['PCR2010', 'POP2010', 'PERCNOHS']\n", "ssdo.obtainData(uniqueIDField, fieldNames)\n", "ssdo = SSDO.SSDataObject(inputFC)\n", "ssdo.obtainData(\"MYID\", fieldNames)\n", "print(ssdo.fields['POP2010'].data)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Using PANDAS to get that R Feel" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " PCR2010 PERCNOHS POP2010\n", "158 1.206422 37.0 1513043\n", "159 1.079837 38.3 1162\n", "160 0.886305 41.4 38026\n", "161 0.816018 42.9 220000\n", "162 0.877746 48.1 45485\n", "163 1.130213 44.3 21475\n", "164 1.391533 32.1 1052605\n", "165 0.675817 50.0 28634\n", "166 1.209810 36.3 181151\n", "167 0.775344 47.3 933168\n", "168 0.832085 46.4 28112\n", "169 0.818885 44.3 134762\n", "170 0.685975 56.9 175136\n", "171 0.931374 38.3 18519\n", "172 0.742849 48.3 842813\n", "173 0.666717 50.9 153183\n", "174 0.816463 49.6 64753\n", "175 0.692517 43.3 34811\n", "176 1.048479 38.0 9826773\n", "177 0.646304 58.4 151468\n", "178 2.080740 20.9 252789\n", "179 0.839421 44.5 18267\n", "180 0.890045 43.1 87812\n", "181 0.685651 49.3 256754\n", "182 0.849918 40.1 9695\n", "183 0.984652 31.1 14224\n", "184 1.022456 37.5 416366\n", "185 1.249204 36.6 136840\n", "186 1.081794 39.0 98765\n", "187 1.250990 29.5 3017598\n", "188 1.179444 36.9 350233\n", "189 0.973480 39.4 19949\n", "190 0.733145 41.6 2202978\n", "191 0.945829 34.2 1422094\n", "192 0.889421 55.9 55520\n", "193 0.742851 43.3 2042027\n", "194 1.144717 34.7 3105115\n", "195 1.760952 38.2 805340\n", "196 0.771014 50.6 687661\n", "197 0.978288 40.2 270112\n", "198 1.705111 28.5 719604\n", "199 1.100690 28.7 424630\n", "200 1.455588 31.0 1786267\n", "201 1.189412 36.9 262880\n", "202 0.879299 41.1 177308\n", "203 0.833698 41.7 3229\n", "204 0.842544 43.7 44897\n", "205 0.951722 37.3 414125\n", "206 1.108553 36.4 484712\n", "207 0.782673 49.6 515420\n", "208 0.837630 42.7 94859\n", "209 0.664463 44.2 63637\n", "210 0.739619 43.3 13768\n", "211 0.703468 55.8 443509\n", "212 0.897840 41.2 55221\n", "213 1.120266 36.2 825445\n", "214 0.915843 38.4 201105\n", "215 0.747482 43.4 72364\n" ] } ], "source": [ "import pandas as PANDAS\n", "df = ssdo.getDataFrame()\n", "print(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Advanced Analysis [SciPy Example - KMeans]" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 1 2 2 2 1 3 0 1 0 2 2 4 1 0 4 0 2 5 4 3 2 2 0 2 1 1 1 1 3 1 1 0 1 4 0 3\n", " 3 0 1 3 1 3 1 2 2 2 1 1 0 2 2 2 4 2 1 1 2]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ArcGIS\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\ipykernel\\__main__.py:7: DeprecationWarning: PyArray_FromDims: use PyArray_SimpleNew.\n", "C:\\ArcGIS\\bin\\Python\\envs\\arcgispro-py3\\lib\\site-packages\\ipykernel\\__main__.py:7: DeprecationWarning: PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr.\n" ] } ], "source": [ "import numpy as NUM\n", "import scipy.cluster.vq as CLUST\n", "import arcgisscripting as ARC\n", "X = df.as_matrix()\n", "whiteData = CLUST.whiten(X)\n", "centers, distortion = CLUST.kmeans(whiteData, 6)\n", "groups = ARC._ss.closest_centroid(whiteData, centers)\n", "print(groups)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Max-P Regions Using PySAL" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 5, 2, 1, 1, 2, 2, 4, 1, 0, 2, 2,\n", " 1, 2, 2, 4, 0, 2, 7, 2, 2, 5, 0, 1, 5, 6, 3, 2, 4, 3, 4, 3, 4, 2, 2,\n", " 2, 0, 0, 1, 0, 2, 2, 1, 2, 4, 0, 2])" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pysal as PYSAL\n", "import pysal2ArcGIS as PYSAL_UTILS\n", "swmFile = OS.path.join(fullPath, \"rook_bin.swm\")\n", "w = PYSAL_UTILS.swm2Weights(ssdo, swmFile)\n", "maxp = PYSAL.region.Maxp(w, X[:,0:2], 3000000., floor_variable = X[:,2])\n", "maxpGroups = NUM.empty((ssdo.numObs,), int)\n", "for regionID, orderIDs in enumerate(maxp.regions):\n", " maxpGroups[orderIDs] = regionID\n", "maxpGroups" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# SKATER for Comparison" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 4 4 4 5 4 1 4 4 5 4 4 5 4 5 5 4 4 0 5 2 5 4 5 4 4 5 4 4 3 4 4 5 4 5 5 3\n", " 1 5 5 1 5 1 5 4 4 4 4 4 5 4 4 4 5 4 5 4 4]\n" ] } ], "source": [ "import Partition as PART\n", "skater = PART.Partition(ssdo, fieldNames, spaceConcept = \"GET_SPATIAL_WEIGHTS_FROM_FILE\",\n", " weightsFile = swmFile, kPartitions = 6)\n", "print(skater.partition)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ARCPY.env.overwriteOutput = True\n", "outputFC = r'../data/cluster_output.shp'\n", "outK = SSDO.CandidateField('KMEANS', 'LONG', groups + 1)\n", "outMax = SSDO.CandidateField('MAXP', 'LONG', maxpGroups + 1)\n", "outSKATER = SSDO.CandidateField('SKATER', 'LONG', skater.partitionOutput)\n", "outFields = {'KMEANS': outK, 'MAXP': outMax, 'SKATER': outSKATER}\n", "appendFields = fieldNames + [\"NEW_NAME\"]\n", "ssdo.output2NewFC(outputFC, outFields, appendFields = appendFields)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.4" }, "widgets": { "state": {}, "version": "1.1.2" } }, "nbformat": 4, "nbformat_minor": 0 }