{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Downloads State Water Use data for 2000/2005/2010 and creates a formatted Physical Water Use Supply table. \n", "\n", "Sample URL (Louisiana)\n", "https://waterdata.usgs.gov/la/nwis/water_use?format=rdb&rdb_compression=value&wu_area=County&wu_year=2000%2C2005%2C2010&wu_county=ALL&wu_category=ALL&wu_county_nms=--ALL%2BCounties--&wu_category_nms=--ALL%2BCategories--\n", "\n", "Workflow\n", "* Construct the url and download the data into a pandas data frame\n", "* Melt/gather the usage columns into row values under the column name 'Group'\n", "* Remove rows with no usage data (identified by not having \"Mgal\" in the 'Group' name)\n", "*" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Import modules\n", "import sys, os, urllib\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Specify the state and year to process\n", "state = 'la' #Louisiana\n", "year = 2010" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Set the output file location to the Data/State data folder\n", "outFolder = '../../Data/Statedata/'\n", "outFN = outFolder + os.sep + '{0}_{1}.csv'.format(state,year)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Set the data URL path and parameters and construct the url\n", "path = 'https://waterdata.usgs.gov/{}/nwis/water_use?'.format(state)\n", "values = {'format':'rdb',\n", " 'rdb_compression':'value',\n", " 'wu_area':'County',\n", " 'wu_year': year,\n", " 'wu_county':'ALL',\n", " 'wu_county_nms':'--ALL+Counties--',\n", " 'wu_category_nms':'--ALL+Categories--'\n", " }\n", "url = path + urllib.urlencode(values)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#Pull data in using the URL and remove the 2nd row of headers\n", "dfRaw = pd.read_table(url,comment='#',header=[0,1],na_values='-')\n", "dfRaw.columns = dfRaw.columns.droplevel(level=1)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | state_cd | \n", "state_name | \n", "county_cd | \n", "county_nm | \n", "year | \n", "Total Population total population of area, in thousands | \n", "Public Supply population served by groundwater, in thousands | \n", "Public Supply population served by surface water, in thousands | \n", "Public Supply total population served, in thousands | \n", "Public Supply self-supplied groundwater withdrawals, fresh, in Mgal/d | \n", "... | \n", "Hydroelectric Power total offstream surface-water withdrawals in Mgal/d | \n", "Hydroelectric Power power generated by instream use, in gigawatt-hours | \n", "Hydroelectric Power power generated by offstream use, in gigawatt-hours | \n", "Hydroelectric Power total power generated, in gigawatt-hours | \n", "Hydroelectric Power number of instream facilities | \n", "Hydroelectric Power number of offstream facilities | \n", "Hydroelectric Power total number of facilities | \n", "Wastewater Treatment returns by public wastewater facilities, in Mgal/d | \n", "Wastewater Treatment number of public wastewater facilities | \n", "Wastewater Treatment reclaimed wastewater released by wastewater facilities, in Mgal/d | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "22 | \n", "Louisiana | \n", "1 | \n", "Acadia Parish | \n", "2010 | \n", "61.773 | \n", "44.666 | \n", "0.000 | \n", "44.666 | \n", "5.82 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "22 | \n", "Louisiana | \n", "3 | \n", "Allen Parish | \n", "2010 | \n", "25.764 | \n", "22.573 | \n", "0.000 | \n", "22.573 | \n", "4.27 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "22 | \n", "Louisiana | \n", "5 | \n", "Ascension Parish | \n", "2010 | \n", "107.215 | \n", "29.510 | \n", "27.094 | \n", "56.604 | \n", "3.02 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "22 | \n", "Louisiana | \n", "7 | \n", "Assumption Parish | \n", "2010 | \n", "23.421 | \n", "0.000 | \n", "23.041 | \n", "23.041 | \n", "0.00 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "22 | \n", "Louisiana | \n", "9 | \n", "Avoyelles Parish | \n", "2010 | \n", "42.073 | \n", "39.756 | \n", "0.000 | \n", "39.756 | \n", "3.85 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
5 rows × 281 columns
\n", "\n", " | county_cd | \n", "county_nm | \n", "state_cd | \n", "state_name | \n", "year | \n", "Group | \n", "MGal | \n", "
---|---|---|---|---|---|---|---|
256 | \n", "1 | \n", "Acadia Parish | \n", "22 | \n", "Louisiana | \n", "2010 | \n", "Public Supply self-supplied groundwater withdr... | \n", "5.82 | \n", "
257 | \n", "3 | \n", "Allen Parish | \n", "22 | \n", "Louisiana | \n", "2010 | \n", "Public Supply self-supplied groundwater withdr... | \n", "4.27 | \n", "
258 | \n", "5 | \n", "Ascension Parish | \n", "22 | \n", "Louisiana | \n", "2010 | \n", "Public Supply self-supplied groundwater withdr... | \n", "3.02 | \n", "
259 | \n", "7 | \n", "Assumption Parish | \n", "22 | \n", "Louisiana | \n", "2010 | \n", "Public Supply self-supplied groundwater withdr... | \n", "0.00 | \n", "
260 | \n", "9 | \n", "Avoyelles Parish | \n", "22 | \n", "Louisiana | \n", "2010 | \n", "Public Supply self-supplied groundwater withdr... | \n", "3.85 | \n", "