{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Downloads State Water Use data for 2000/2005/2010 and creates a formatted Physical Water Use Supply table. \n", "\n", "Sample URL (Louisiana)\n", "https://waterdata.usgs.gov/la/nwis/water_use?format=rdb&rdb_compression=value&wu_area=County&wu_year=2000%2C2005%2C2010&wu_county=ALL&wu_category=ALL&wu_county_nms=--ALL%2BCounties--&wu_category_nms=--ALL%2BCategories--\n", "\n", "Workflow\n", "* Construct the url and download the data into a pandas data frame\n", "* Melt/gather the usage columns into row values under the column name 'Group'\n", "* Remove rows with no usage data (identified by not having \"Mgal\" in the 'Group' name)\n", "*" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Import modules\n", "import sys, os, urllib\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Specify the state and year to process\n", "state = 'la' #Louisiana\n", "year = 2010" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Set the output file location to the Data/State data folder\n", "outFolder = '../../Data/Statedata/'\n", "outFN = outFolder + os.sep + '{0}_{1}.csv'.format(state,year)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Set the data URL path and parameters and construct the url\n", "path = 'https://waterdata.usgs.gov/{}/nwis/water_use?'.format(state)\n", "values = {'format':'rdb',\n", " 'rdb_compression':'value',\n", " 'wu_area':'County',\n", " 'wu_year': year,\n", " 'wu_county':'ALL',\n", " 'wu_county_nms':'--ALL+Counties--',\n", " 'wu_category_nms':'--ALL+Categories--'\n", " }\n", "url = path + urllib.urlencode(values)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Pull data in using the URL and remove the 2nd row of headers\n", "dfRaw = pd.read_table(url,comment='#',header=[0,1],na_values='-')\n", "dfRaw.columns = dfRaw.columns.droplevel(level=1)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
state_cdstate_namecounty_cdcounty_nmyearTotal Population total population of area, in thousandsPublic Supply population served by groundwater, in thousandsPublic Supply population served by surface water, in thousandsPublic Supply total population served, in thousandsPublic Supply self-supplied groundwater withdrawals, fresh, in Mgal/d...Hydroelectric Power total offstream surface-water withdrawals in Mgal/dHydroelectric Power power generated by instream use, in gigawatt-hoursHydroelectric Power power generated by offstream use, in gigawatt-hoursHydroelectric Power total power generated, in gigawatt-hoursHydroelectric Power number of instream facilitiesHydroelectric Power number of offstream facilitiesHydroelectric Power total number of facilitiesWastewater Treatment returns by public wastewater facilities, in Mgal/dWastewater Treatment number of public wastewater facilitiesWastewater Treatment reclaimed wastewater released by wastewater facilities, in Mgal/d
022Louisiana1Acadia Parish201061.77344.6660.00044.6665.82...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
122Louisiana3Allen Parish201025.76422.5730.00022.5734.27...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
222Louisiana5Ascension Parish2010107.21529.51027.09456.6043.02...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
322Louisiana7Assumption Parish201023.4210.00023.04123.0410.00...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
422Louisiana9Avoyelles Parish201042.07339.7560.00039.7563.85...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

5 rows × 281 columns

\n", "
" ], "text/plain": [ " state_cd state_name county_cd county_nm year \\\n", "0 22 Louisiana 1 Acadia Parish 2010 \n", "1 22 Louisiana 3 Allen Parish 2010 \n", "2 22 Louisiana 5 Ascension Parish 2010 \n", "3 22 Louisiana 7 Assumption Parish 2010 \n", "4 22 Louisiana 9 Avoyelles Parish 2010 \n", "\n", " Total Population total population of area, in thousands \\\n", "0 61.773 \n", "1 25.764 \n", "2 107.215 \n", "3 23.421 \n", "4 42.073 \n", "\n", " Public Supply population served by groundwater, in thousands \\\n", "0 44.666 \n", "1 22.573 \n", "2 29.510 \n", "3 0.000 \n", "4 39.756 \n", "\n", " Public Supply population served by surface water, in thousands \\\n", "0 0.000 \n", "1 0.000 \n", "2 27.094 \n", "3 23.041 \n", "4 0.000 \n", "\n", " Public Supply total population served, in thousands \\\n", "0 44.666 \n", "1 22.573 \n", "2 56.604 \n", "3 23.041 \n", "4 39.756 \n", "\n", " Public Supply self-supplied groundwater withdrawals, fresh, in Mgal/d \\\n", "0 5.82 \n", "1 4.27 \n", "2 3.02 \n", "3 0.00 \n", "4 3.85 \n", "\n", " ... \\\n", "0 ... \n", "1 ... \n", "2 ... \n", "3 ... \n", "4 ... \n", "\n", " Hydroelectric Power total offstream surface-water withdrawals in Mgal/d \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Hydroelectric Power power generated by instream use, in gigawatt-hours \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Hydroelectric Power power generated by offstream use, in gigawatt-hours \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Hydroelectric Power total power generated, in gigawatt-hours \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Hydroelectric Power number of instream facilities \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Hydroelectric Power number of offstream facilities \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Hydroelectric Power total number of facilities \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Wastewater Treatment returns by public wastewater facilities, in Mgal/d \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Wastewater Treatment number of public wastewater facilities \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Wastewater Treatment reclaimed wastewater released by wastewater facilities, in Mgal/d \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", "[5 rows x 281 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Read locally, for debugging, and drop the 2nd row of headers\n", "#dfRaw = pd.read_table('../../Data/Proprietary/LA.txt',comment='#',header=[0,1],na_values='-')\n", "#dfRaw.columns = dfRaw.columns.droplevel(level=1)\n", "dfRaw.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(17664, 7)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Tidy the data\n", "rowHeadings = ['county_cd', 'county_nm', 'state_cd', 'state_name', 'year']\n", "dfTidy = pd.melt(dfRaw,id_vars=rowHeadings,value_name='MGal',var_name='Group')\n", "dfTidy.shape" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(15040, 7)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Remove rows that don't have volume data (i.e. keep only columns with 'Mgal' in the name)\n", "dfTidy = dfTidy[dfTidy['Group'].str.contains('Mgal')]\n", "dfTidy.shape" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "50620.130000000005" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Change the type of the MGal column to float\n", "dfTidy['MGal'] = dfTidy.MGal.astype(np.float)\n", "dfTidy['MGal'].sum()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
county_cdcounty_nmstate_cdstate_nameyearGroupMGal
2561Acadia Parish22Louisiana2010Public Supply self-supplied groundwater withdr...5.82
2573Allen Parish22Louisiana2010Public Supply self-supplied groundwater withdr...4.27
2585Ascension Parish22Louisiana2010Public Supply self-supplied groundwater withdr...3.02
2597Assumption Parish22Louisiana2010Public Supply self-supplied groundwater withdr...0.00
2609Avoyelles Parish22Louisiana2010Public Supply self-supplied groundwater withdr...3.85
\n", "
" ], "text/plain": [ " county_cd county_nm state_cd state_name year \\\n", "256 1 Acadia Parish 22 Louisiana 2010 \n", "257 3 Allen Parish 22 Louisiana 2010 \n", "258 5 Ascension Parish 22 Louisiana 2010 \n", "259 7 Assumption Parish 22 Louisiana 2010 \n", "260 9 Avoyelles Parish 22 Louisiana 2010 \n", "\n", " Group MGal \n", "256 Public Supply self-supplied groundwater withdr... 5.82 \n", "257 Public Supply self-supplied groundwater withdr... 4.27 \n", "258 Public Supply self-supplied groundwater withdr... 3.02 \n", "259 Public Supply self-supplied groundwater withdr... 0.00 \n", "260 Public Supply self-supplied groundwater withdr... 3.85 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfTidy.head()" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Create lists\n", "\n", "##Note: need to keep Livestock above its subcategories, otherwise it will overwrite\n", "# values in the output UseGroup field\n", "useClasses = [\"Aquaculture\",\n", " \"Commercial\",\n", " \"Domestic\",\n", " \"Fossil-fuel Thermoelectric Power\",\n", " \"Geothermal Thermoelectric Power\",\n", " \"Hydroelectric Power\",\n", " \"Industrial\",\n", " \"Irrigation, Crop\",\n", " \"Irrigation, Golf Courses\",\n", " \"Irrigation, Total\",\n", " \"Livestock\",\n", " \"Livestock \\(Animal Specialties\\)\",\n", " \"Livestock \\(Stock\\)\",\n", " \"Mining\",\n", " \"Nuclear Thermoelectric Power\",\n", " \"Public Supply\",\n", " \"Thermoelectric Power \\(Closed-loop cooling\\)\",\n", " \"Thermoelectric Power \\(Once-through cooling\\)\",\n", " \"Total Thermoelectric Power\",\n", " \"Wastewater Treatment\"\n", " ]\n", "srcClasses = [\"consumptive use\",\n", " \"self-supplied\",\n", " \"deliveries from public supply\",\n", " \"reclaimed wastewater\",\n", " \"conveyance loss\",\n", " \"instream water use\",\n", " \"offstream surface-water withdrawals\",\n", " \"surface water self-supplied offstream withdrawals,\"\n", " \"deliveries to commercial,\",\n", " \"deliveries to domestic\",\n", " \"deliveries to thermoelectric\",\n", " \"public use and losses\",\n", " \"reclaimed wastewater\" \n", " ]\n", "srcTypes = ['fresh',\n", " 'saline',\n", " 'surface water withdrawals',\n", " 'groundwater withdrawals',\n", " ]" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "#Create and populate UseClass field\n", "dfTidy['UseClass'] = '-'\n", "#Loop thru use classes and assign as new column\n", "for useClass in useClasses:\n", " #Remote the backslash needed in selecting records\n", " fixClass = useClass.replace(\"\\\\\",\"\")\n", " #Update rows where the Group field contains the use class\n", " dfTidy.ix[dfTidy.Group.str.contains(useClass),'UseClass'] = fixClass\n", "#dfTidy.UseClass.unique().tolist()" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [], "source": [ "#Create a field of everythihng but the UseClass\n", "dfTidy['Foo'] = [e.replace(k,'') for e,k in zip(dfTidy.Group,dfTidy.UseClass)]\n", "vals = pd.DataFrame(dfTidy['Foo'].unique())\n", "vals.to_csv(\"etc.csv\")\n" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "256 NaN\n", "257 NaN\n", "258 NaN\n", "259 NaN\n", "260 NaN\n", "Name: Foo, dtype: object" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Subtract useclass from group\n", "dfTidy['Foo'] = '-'\n", "def substrX(grpVal,useVal):\n", " return \"foo\"\n", " #return grpVal.replace(useVal,\"\")\n", "dfTidy['Foo'] = dfTidy.apply(lambda row: substrX(\"AB\",\"B\"))#.str.replace(x['UseClass'],\"\"))\n", "#for index,rows in dfTidy.iterrows():\n", "dfTidy['Foo'].head()" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['self-supplied',\n", " 'deliveries to domestic',\n", " 'deliveries to commercial,',\n", " '-',\n", " 'deliveries to thermoelectric',\n", " 'public use and losses',\n", " 'reclaimed wastewater',\n", " 'deliveries from public supply',\n", " 'consumptive use',\n", " 'conveyance loss']" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Create and populate SrcClass field\n", "dfTidy['SrcClass'] = '-'\n", "#Loop thru use classes and assign as new column\n", "for srcClass in srcClasses:\n", " #Update rows where the Group field contains the use class\n", " dfTidy.ix[dfTidy.Group.str.contains(srcClass),'SrcClass'] = srcClass\n", "dfTidy.SrcClass.unique().tolist()" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Public Supply deliveries to industrial, in Mgal/d',\n", " 'Public Supply total deliveries, in Mgal/d',\n", " 'Hydroelectric Power instream water use, in Mgal/d',\n", " 'Hydroelectric Power offstream surface-water withdrawals, fresh, in Mgal/d',\n", " 'Hydroelectric Power total offstream surface-water withdrawals in Mgal/d',\n", " 'Wastewater Treatment returns by public wastewater facilities, in Mgal/d'], dtype=object)" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#CHECK\n", "dfCheck = dfTidy[dfTidy['SrcClass'] == '-']\n", "#dfCheck2 = dfTidy[dfTidy.Group.str.contains(str('Thermoelectric Power \\(Once-through cooling\\)'))]\n", "dfCheck.Group.unique()\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Summarize \n", "dfState = dfTidy.groupby(['Group'])['MGal'].sum()\n", "dfState.to_csv(outFN)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 2 }