{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#### Read SWUDS data workbook\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#import modules\n", "import sys, os, xlrd\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Filenames\n", "dataDir = '../../Data'\n", "scratchDir = '../../Scratch'\n", "xlFN = dataDir + os.sep + '/LouisianaWaterUse_Distribute.xlsx'\n", "crosswalkFN = dataDir + os.sep + 'naics02tosic87.xls'\n", "crosswalkFN = dataDir + os.sep + '2017_NAICS_to_ISIC_4.xlsx'\n", "##https://www.census.gov/eos/www/naics/concordances/concordances.html\n", "outFN = dataDir + os.sep + \"LASsummary.xlsx\"\n", "outFNAll = dataDir + os.sep + \"LAData.xlsx\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Read the xlsx file in as a dataframe\n", "dfRaw = pd.read_excel(xlFN,sheetname='data1')\n", "dfCrosswalk = pd.read_excel(crosswalkFN)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index([u'Part of NAICS US', u'2017\\nNAICS\\nUS ', u'2017 NAICS US TITLE',\n", " u'Part of ISIC', u'ISIC 4.0', u'ISIC Revision 4.0 Title',\n", " u'Notes: link content based on NAICS definition, entire NAICS industry if blank'],\n", " dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfCrosswalk.columns" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Remove facility columns\n", "dfRaw.drop(dfRaw.columns[3:5],axis=1,inplace=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index([u'County Code', u'SIC1', u'Description', u'Water Use Type', u'MP ID',\n", " u'MP Type', u'MP Name', u'Source Water Type', u'Aquifer Code',\n", " u'Aquifer Name2', u'Aquifer Group ID', u'Aquifer Group Name', u'HUC-8',\n", " u'HUC-12', u'Latitude', u'Longitude', u'strMPLatitudeDD',\n", " u'strMPLongitudeDD', u'Year', u'Annual Amt. (MGD)', u'Part of NAICS US',\n", " u'2017\\nNAICS\\nUS ', u'2017 NAICS US TITLE', u'Part of ISIC',\n", " u'ISIC 4.0', u'ISIC Revision 4.0 Title',\n", " u'Notes: link content based on NAICS definition, entire NAICS industry if blank'],\n", " dtype='object')" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Join the NAICS codes to the data frame\n", "dfAll = pd.merge(dfRaw,dfCrosswalk,how='left',left_on='SIC1',right_on='ISIC 4.0')\n", "dfAll.columns" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Select only 2000, 2005, and 2010 records and list the number of data for each column\n", "dfSelect = dfAll[(dfAll.Year == 2000) | (dfAll.Year == 2005) | (dfAll.Year == 2010)]\n", "dfSelect.to_excel(outFNAll,index=False)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Pivot on year, listing usage (Annual Amt. MGD) by Source Water Type and SCI1 Description\n", "#pvt = dfSelect.pivot_table(columns='Year',index=('NAICSTEXT','Description'),values='Annual Amt. (MGD)',aggfunc='sum')\n", "pvt = dfSelect.pivot_table(columns='Year',index=('2017\\nNAICS\\nUS ','2017 NAICS US TITLE','ISIC 4.0','Description'),values='Annual Amt. (MGD)',aggfunc='sum')\n", "pvt.to_excel(outFN)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 2 }