{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Read SWUDS data workbook\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#import modules\n",
    "import sys, os, xlrd\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Filenames\n",
    "dataDir = '../../Data'\n",
    "scratchDir = '../../Scratch'\n",
    "xlFN = dataDir + os.sep + '/LouisianaWaterUse_Distribute.xlsx'\n",
    "crosswalkFN = dataDir + os.sep + 'naics02tosic87.xls'\n",
    "crosswalkFN = dataDir + os.sep + '2017_NAICS_to_ISIC_4.xlsx'\n",
    "##https://www.census.gov/eos/www/naics/concordances/concordances.html\n",
    "outFN = dataDir + os.sep + \"LASsummary.xlsx\"\n",
    "outFNAll = dataDir + os.sep + \"LAData.xlsx\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Read the xlsx file in as a dataframe\n",
    "dfRaw = pd.read_excel(xlFN,sheetname='data1')\n",
    "dfCrosswalk = pd.read_excel(crosswalkFN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'Part of NAICS US', u'2017\\nNAICS\\nUS  ', u'2017 NAICS US TITLE',\n",
       "       u'Part of ISIC', u'ISIC 4.0', u'ISIC Revision 4.0 Title',\n",
       "       u'Notes:  link content based on NAICS definition, entire NAICS industry if blank'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfCrosswalk.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Remove facility columns\n",
    "dfRaw.drop(dfRaw.columns[3:5],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'County Code', u'SIC1', u'Description', u'Water Use Type', u'MP ID',\n",
       "       u'MP Type', u'MP Name', u'Source Water Type', u'Aquifer Code',\n",
       "       u'Aquifer Name2', u'Aquifer Group ID', u'Aquifer Group Name', u'HUC-8',\n",
       "       u'HUC-12', u'Latitude', u'Longitude', u'strMPLatitudeDD',\n",
       "       u'strMPLongitudeDD', u'Year', u'Annual Amt. (MGD)', u'Part of NAICS US',\n",
       "       u'2017\\nNAICS\\nUS  ', u'2017 NAICS US TITLE', u'Part of ISIC',\n",
       "       u'ISIC 4.0', u'ISIC Revision 4.0 Title',\n",
       "       u'Notes:  link content based on NAICS definition, entire NAICS industry if blank'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Join the NAICS codes to the data frame\n",
    "dfAll = pd.merge(dfRaw,dfCrosswalk,how='left',left_on='SIC1',right_on='ISIC 4.0')\n",
    "dfAll.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Select only 2000, 2005, and 2010 records and list the number of data for each column\n",
    "dfSelect = dfAll[(dfAll.Year == 2000) | (dfAll.Year == 2005) | (dfAll.Year == 2010)]\n",
    "dfSelect.to_excel(outFNAll,index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Pivot on year, listing usage (Annual Amt. MGD) by Source Water  Type and SCI1 Description\n",
    "#pvt = dfSelect.pivot_table(columns='Year',index=('NAICSTEXT','Description'),values='Annual Amt. (MGD)',aggfunc='sum')\n",
    "pvt = dfSelect.pivot_table(columns='Year',index=('2017\\nNAICS\\nUS  ','2017 NAICS US TITLE','ISIC 4.0','Description'),values='Annual Amt. (MGD)',aggfunc='sum')\n",
    "pvt.to_excel(outFN)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}